*Dendro macro - Version 3 - January 21, 1996 ;
* Enhancements include:
Landscape format;
* Array names changed to allow up to 9999 cases;
options nosource2;
%macro checkn;
/* checks if node is a terminal node */
node=c(i,j);
if node>0
then nodestat=1;
else nodestat=stat(-node);
%mend;
%macro findnode;
/* traverses the tree until a terminal */
/* node is located */
do while (nodestat=0);
prev(-c(i,j))=j;
j=-c(i,j);
i=1;
%checkn
end;
%mend;
%macro fuse;
/* increments the count of the number of */
/* fusions, records the order and status */
/* of the node and resets starting point */
/* for the next search */
do;
fusions=fusions+1;
ordr(j)=fusions;
stat(j)=1;
j=prev(j);
end;
%mend;
%macro inverty;
/* reverses y axis values for plotting */
%if "&order"="reverse"
%then %do;
y=ymax-y;
%end;
%mend;
%macro genlines;
/* generates annotate data set instructions */
/* to draw one fusion in the dendrogram */
if i=1 then
do;
y=ystart; %inverty;
function='move'; output dendro ;
y=ht; %inverty;
function='draw'; output dendro;
end;
else do;
y=ht; %inverty;
function='draw'; output dendro;
y=ystart; %inverty;
function='draw'; output dendro;
end;
xcoord(i)=x;
%mend;
proc format;
/* used to determine suitable intervals on y-axis */
value rft
1-<2='1'
2-<5='2'
5-<10='5'
10='10';
run;
%macro dendro(format,xlabel,ylabel,labels=down,color=black,
axis=black,back=white,text=black);
%global nc np proc order ymax ymin inc diff;
/* */
/* Purpose: Plots a dendrogram using the OUTTREE data */
/* set created by PROC CLUSTER */
/* */
/* Author: Paul Nicholson, University of Leeds */
/* */
/* Copyright: Paul Nicholson, University of Leeds */
/* */
/* Conditions of use: */
/* */
/* This macro may be used for educational and research */
/* purposes only and may not be used for commercial */
/* gain. */
/* */
/* Usage: */
/* */
/* (i) Run PROC CLUSTER or PROC VARCLUS specifying */
/* OUTTREE=TREE to save the cluster schedule */
/* in a data set called TREE. */
/* (ii) %INCLUDE dendro; */
/* (iii) %dendro; */
/* */
/* */
/* Parameter definitions */
/* */
/* Parameter Values Action */
/* format portrait vertical display */
/* landscape horizontal display */
/* xlabel 'text' label for x-axis (values axis) */
/* ylabel 'text' label for y-axis (distance axis) */
/* labels down plots x-axis labels vertically */
/* across plots x-axis labels horizontally */
/* */
/* color any determines colour of lines */
/* */
/* axis any determines colour of axes */
/* */
/* back any determines colour of background */
/* */
/* text any determines colour of text */
/* */
/* Take a copy of the tree data set. This allows macro */
/* to be re-executed without re-running cluster step */
data tree2;
set tree;
run;
data heights(keep=nc _height_)
formatds(keep=fmtname start label);
/* pick up the distances at which each fusion is formed */
length label start fmtname $ 8;
retain n nc varclus 0 last 0;
retain ymin 99999 ymax -99999 minid maxid;
retain fmtname '$clnamft';
set tree2 end=eof;
if _n_ = 1
then do;
/* detect the procedure used to create the tree dataset */
if substr(_name_,1,5) = 'CLUS1'
then do;
varclus=1;
call symput('proc','varclus');
end;
else do;
n=_ncl_;
nc=input(substr(_parent_,3,4),4.);
if _height_ = 0.0 then ymin=0.0;
call symput('nc',left(put(nc,4.)));
call symput('proc','cluster');
end;
end;
if varclus = 0
then do;
if _ncl_ < n;
nc=input(substr(_name_,3,4),4.);
actualnc+1;
output heights;
end;
else do;
ch=substr(_name_,5,1);
digit=indexc('0123456789',ch);
if substr(_name_,1,4) = 'CLUS' and digit > 0
then do;
nc=_ncl_;
actualnc+1;
_height_=_propor_;
output heights;
start=_name_;
label=left(put(_ncl_,4.));
output formatds;
end;
else do;
last=1;
call symput('nc',left(put(nc,4.)));
end;
end;
if last = 0
then do;
if _height_ > ymax
then do;
ymax=_height_;
maxid=_n_;
end;
if _height_ < ymin
then do;
ymin=_height_;
minid=_n_;
end;
end;
if eof = 1 or last = 1
then do;
call symput('actualnc',left(put(actualnc,4.)));
call symput('ymax',left(put(ymax,best12.)));
call symput('ymin',left(put(ymin,best12.)));
if varclus = 0 and maxid > minid
then call symput('order','normal');
else call symput('order','reverse');
end;
run;
proc sort data=heights;
by descending nc;
run;
/* create macro variable NP (no. of observations) */
%let np=%eval(&nc+1);
/* adjust nc (if disjoint clusters exist */
%let diff=%eval(&nc-&actualnc);
%if %eval(&diff) > 0
%then %let nc=%eval(&actualnc);
%if &proc=varclus
%then %do;
proc format cntlin=formatds;
run;
proc sort data=tree2;
by descending _parent_;
run;
%end;
data clusters (keep=nc clno1 clno2)
formatds (keep=fmtname start label);
/* pick up the node numbers for each fusion */
length c1 c2 label $ 16 fmtname $ 8 clusid $ 4;
length nc clno1 clno2 start 8;
array cl(2) c1 c2;
array clusno(2) clno1 clno2;
retain ic 0 fmtname 'nodeft' pos 3 clusid 'CL ';
/* reset cluster flags if tree created by VARCLUS */
if _n_=1 and "&proc"="varclus"
then do;
pos=5;
clusid='CLUS';
end;
/* process two records pertaining to fusion */
do i=1 to 2;
set tree2;
if _parent_ = '' then stop;
/* store the cluster name - use left to */
/* eliminate spaces that exist in numeric */
/* IDs. */
cl(i)=left(_name_);
/* pick up the number of the parent node */
%if "&proc"="cluster"
%then %do;
nc=input(substr(_parent_,3,4),4.);
%end;
%else %do;
nc=input(left(put(_parent_,$clnamft.)),4.);
%end;
/* establish node number as numeric to facilitate */
/* implementation of the tree-traversal algorithm */
/* flag clusters by negating the node number */
/* build format for re-attachment of node IDs later */
ch=substr(cl(i),pos,1);
digit=indexc('0123456789',ch);
if substr(cl(i),1,pos-1) eq substr(clusid,1,pos-1)
and digit > 0
then do;
%if "&proc"="cluster"
%then %do;
clusno(i)=-input(substr(cl(i),3,4),4.);
%end;
%else %do;
clusno(i)=-input(left(put(cl(i),$clnamft.)),4.);
%end;
end;
else do;
ic=ic+1;
clusno(i)=ic;
start=ic;
label=cl(i);
output formatds;
end;
end;
output clusters;
run;
/* Use PROC FORMAT to create format variable nodeft */
/* from the control data set , formatds. */
proc format cntlin=formatds;
run;
%if &proc=varclus
%then %do;
proc sort data=clusters;
by descending nc;
run;
%end;
/* combine the distances with the node numbers */
data treedata(keep=nc _height_ clno1 clno2);
merge clusters heights;
by descending nc;
/* adjust cluster numbers to range from 1 if */
/* disjoint clusters have been detected */
%if %eval(&diff)>0
%then %do;
nc=nc-%eval(&diff);
if clno1<0 then clno1=clno1+%eval(&diff);
if clno2<0 then clno2=clno2+%eval(&diff);
%end;
run;
proc sort;
by nc;
run;
/* transpose the data to facilitate building of arrays */
proc transpose data=treedata out=treetran;
run;
/* build arrays used by tree traversal algorithm */
data arrays;
array ncl(j) nc1-nc&nc;
array c1(j) acl1-acl&nc ;
array c2(j) bcl1-bcl&nc;
array height(j) h1-h&nc;
array ordr(j) ordr1-ordr&nc;
array prev(j) prev1-prev&nc;
array stat(j) stat1-stat&nc;
array col(j) col1-col&nc;
array super(k) ncl c1 c2 height;
do k=1 to 4;
set treetran;
do j=1 to &nc;
super=col;
end;
end;
do j=1 to &nc;
stat=0;
ordr=0;
prev=0;
end;
output;
drop col1-col&nc j k _name_;
stop;
run;
/* Tree traversal step */
/* The search begins at the top of the tree and progresses */
/* downwards until a terminal node is found. The tree is */
/* then traversed backwards to the parent node from which */
/* a search for a second terminal node is made. Once the */
/* second node has been found, the algorithm returns to the */
/* pair of nodes of which the parent node is one and */
/* attempts to fuse those. If fusion is not possible, the */
/* search continues as before in a downward direction. */
data ordernos(keep=acl1-acl&nc bcl1-bcl&nc nc1-nc&nc
h1-h&nc ordr1-ordr&nc);
array c(2,&nc) acl1-acl&nc bcl1-bcl&nc;
array ncl(&nc) nc1-nc&nc;
array height(&nc) h1-h&nc;
array ordr(&nc) ordr1-ordr&nc;
array prev(&nc) prev1-prev&nc;
array stat(&nc) stat1-stat&nc;
set arrays;
fusions=0; i=1; j=1; prev(1)=0;
%checkn
do while (fusions<&nc);
/* if disjoint clusters exist, find start of new sub-tree */
if j = 0
then do;
do j=1 to &nc until(nodestat=0);
nodestat=stat(j);
end;
prev(j)=0;
i=1;
end;
select (nodestat);
/* if its a fusion node with inferior */
/* terminal nodes as yet not reached */
when (0)
do;
%findnode
i=2;
%checkn
if nodestat=1 then %fuse
end;
/* if its a terminal node */
when (1)
do;
i=2;
/* find the status of its partner */
%checkn
/* if its a terminal node, then fuse */
if nodestat=1 then %fuse
else do;
/* else, continue search until one found */
%findnode
i=2;
/* and then perform similar attempt at fusion */
/* Note that failure to fuse here leaves the */
/* starting position for a search set correctly */
/* for the return to the top of the DO WHILE loop */
%checkn
if nodestat=1 then %fuse
end;
end;
end;
end;
output;
stop;
run;
/* Transpose the contents of the arrays to form */
/* a data set containing the original data on nodes */
/* together with the variable ord indicating the */
/* order in which the fusion is to be processed */
data ordtree(keep=nc clno1 clno2 ht ord);
length h1-h&nc nc1-nc&nc ordr1-ordr&nc 8;
array c1(&nc) acl1-acl&nc;
array c2(&nc) bcl1-bcl&nc;
array height(&nc) h1-h&nc;
array ncl(&nc) nc1-nc&nc;
array ordr(&nc) ordr1-ordr&nc;
set ordernos;
do j=1 to &nc;
nc=ncl(j);
clno1=c1(j);
clno2=c2(j);
ht=height(j);
ord=ordr(j);
output;
end;
stop;
run;
/* sort in ascending oder of ord */
proc sort;
by ord;
run;
/* Generate the annotate data set to draw the */
/* dendrogram */
data dendro(keep=function x y xsys ysys color)
formatds(keep=fmtname start label)
formaty(keep=fmtname start ylabel
rename=(ylabel=label));
length function color fmtname start $ 8 xsys ysys $ 1;
length label ylabel $ 16;
array clx(&nc) cx1-cx&nc;
array cly(&nc) cy1-cy&nc;
array cl(2) clno1 clno2;
array height(2) ht1 ht2;
array xcoord(2) x1 x2;
retain cx1-cx&nc cy1-cy&nc ipt shiftl 0 ymax ymin inc;
retain xsys ysys '2' color "&color";
if _n_ = 1
then do;
/* determine a suitable increment for y axis */
ymax=input(symget('ymax'),best12.);
ymin=input(symget('ymin'),best12.);
r=ymax-ymin;
p=floor(log10(r));
k=10**(p-1);
inc=input(put(ceil(r/k)*0.1,rft.),2.)*k;
call symput('inc',left(put(inc,best12.)));
/* adjust limits if necessary and re-set macro variables */
if r-int(r/inc)*inc > 0
then do;
ymin=floor(ymin/inc)*inc;
call symput('ymin',left(put(ymin,best12.)));
newr=ymax-ymin;
if newr-int(newr/inc)*inc > 0
then do;
ymax=ceil(ymax/inc)*inc;
call symput('ymax',left(put(ymax,best12.)));
end;
end;
/* if y values are inverted, generate format for y axis */
%if "&order"="reverse"
%then %do;
ylim=ymax-ymin;
call symput('ymin',put(0,1.));
call symput('ymax',left(put(ylim,best12.)));
fmtname='yft';
do y=ymax to ymin by -inc;
start=left(put(ymax-y,best12.));
ylabel=left(put(y,best12.));
output formaty;
end;
%end;
end;
set ordtree;
clusters=0;
do i=1 to 2;
/* if its a terminal node (but not a fusion point) */
/* assign it as the next point on the x-axis and */
/* use format variable nodeft., created earlier, */
/* to re-assign label to the assigned value of x. */
/* Then generate plot instructions. */
if cl(i) > 0
then do;
ipt+1;
start=left(put(ipt,5.));
label=put(cl(i),nodeft.);
fmtname='labelft';
output formatds;
x=ipt;
%if "&order"="normal"
%then %do;
ystart=ymin;
%end;
%else %do;
ystart=ymax;
%end;
height(i)=ht;
%genlines
end;
/* if its a fusion point, pick up its co-ordinates */
/* from the arrays clx and cly and generate plot */
/* instructions. */
else do;
clusters=clusters+1;
clpt=-cl(i);
x=clx(clpt);
ystart=cly(clpt);
height(i)=ystart;
%genlines
end;
end;
/* after processing this pair, store co-ordinates */
/* of the new fusion node. If two nodes fuse at */
/* the same height and one is a cluster, apply a */
/* correction factor to centre verticals. */
/* (applies to density linkage) */
if clusters=1 and ht1=ht2 then shiftl=shiftl+0.25;
else shiftl=0;
clx(nc)=(x1+x2)/2-shiftl; cly(nc)=ht;
run;
/* interchange x and y if landscape format is required */
%if %upcase(&format)=LANDSCAPE
%then %do;
data dendro(rename=(x=y y=x));
set dendro;
run;
%end;
/* Use PROC FORMAT to create format variable labelft */
/* from the control data set , formatds. */
proc format cntlin=formatds;
run;
%if "&order"="reverse"
%then %do;
proc format cntlin=formaty;
run;
%end;
goptions cback=&back ctext=&text vpos=60 hpos=120;
/* plot dendrogram using Annotate data set */
proc gplot data=dendro;
axis1 order=1 to &np by 1
minor=none
%if %upcase(&labels)=DOWN & %upcase(&format) NE LANDSCAPE
%then %do;
value=(angle=-90 rotate=90 height=1 font=swiss)
%end;
%else %do;
value=(height=1 font=swiss)
%end;
%if &xlabel=
%then %do;
label=none;
%end;
%else %do;
%if %upcase(&format) ne LANDSCAPE
%then %do;
label=(height=2 font=swiss "&xlabel");
%end;
%else %do;
label=(angle=90 height=2 font=swiss "&xlabel");
%end;
%end;
axis2 order = &ymin to &ymax by &inc
minor = none
value = (height=1 font=swiss)
%if &ylabel=
%then %do;
%if %upcase(&format) ne LANDSCAPE
%then %do;
label=(angle=90 height=2 font=swiss "Distance");
%end;
%else %do;
label=(height=2 font=swiss "Distance");
%end;
%end;
%else %do;
%if %upcase(&format) ne LANDSCAPE
%then label=(angle=90 height=2 font=swiss "&ylabel");
%else label=(height=2 font=swiss "&ylabel");
%end;
%if %upcase(&format) ne LANDSCAPE
%then %do;
plot y*x/haxis=axis1
vaxis=axis2
%end;
%else %do;
plot x*y/haxis=axis2
vaxis=axis1
%end;
caxis=&axis
annotate=dendro;
symbol1 v=none; /* suppresses plot symbols */
footnote1 angle=-90 ' ';
%if %upcase(&format) ne LANDSCAPE
%then %do;
format x labelft.;
%end;
%else %do;
format y labelft.;
%end;
%if "&order"="reverse"
%then %do;
%if %upcase(&format) ne LANDSCAPE
%then %do;
format y yft.;
%end;
%else %do;
format x yft.;
%end;
%end;
run;
%mend dendro;
options source2;