*Dendro macro - Version 3 - January 21, 1996 ;
* Enhancements include:
  Landscape format;
* Array names changed to allow up to 9999 cases;
options nosource2;
%macro checkn;
/* checks if node is a terminal node */
node=c(i,j);
if node>0
then nodestat=1;
else nodestat=stat(-node);
%mend;
 
%macro findnode;
/* traverses the tree until a terminal */
/* node is located                     */
do while (nodestat=0);
  prev(-c(i,j))=j;
  j=-c(i,j);
  i=1;
  %checkn
end;
%mend;
 
%macro fuse;
/* increments the count of the number of */
/* fusions, records the order and status */
/* of the node and resets starting point */
/* for the next search                   */
do;
  fusions=fusions+1;
  ordr(j)=fusions;
  stat(j)=1;
  j=prev(j);
end;
%mend;
 
%macro inverty;
/* reverses y axis values for plotting   */
 %if "&order"="reverse"
 %then %do;
         y=ymax-y;
       %end;
%mend;
 
%macro genlines;
/* generates annotate data set instructions */
/* to draw one fusion in the dendrogram     */
     if i=1 then
            do;
              y=ystart; %inverty;
              function='move'; output dendro ;
              y=ht; %inverty;
              function='draw'; output dendro;
            end;
     else   do;
              y=ht; %inverty;
              function='draw'; output dendro;
              y=ystart; %inverty;
              function='draw'; output dendro;
            end;
     xcoord(i)=x;
%mend;
 
proc format;
/* used to determine suitable intervals on y-axis */
value rft
1-<2='1'
2-<5='2'
5-<10='5'
10='10';
run;
 
%macro dendro(format,xlabel,ylabel,labels=down,color=black,
              axis=black,back=white,text=black);
   %global nc np proc order ymax ymin inc diff;
/*                                                          */
/*  Purpose: Plots a dendrogram using the OUTTREE data      */
/*           set created by PROC CLUSTER                    */
/*                                                          */
/*  Author: Paul Nicholson, University of Leeds             */
/*                                                          */
/*  Copyright: Paul Nicholson, University of Leeds          */
/*                                                          */
/*  Conditions of use:                                      */
/*                                                          */
/*  This macro may be used for educational and research     */
/*  purposes only and may not be used for commercial        */
/*  gain.                                                   */
/*                                                          */
/*  Usage:                                                  */
/*                                                          */
/*  (i)   Run PROC CLUSTER or PROC VARCLUS specifying       */
/*        OUTTREE=TREE to save the cluster schedule         */
/*        in a data set called TREE.                        */
/*  (ii)  %INCLUDE dendro;                                  */
/*  (iii) %dendro;                                          */
/*                                                          */
/*                                                          */
/*  Parameter definitions                                   */
/*                                                          */
/*  Parameter  Values     Action                            */
/*  format     portrait   vertical display                  */
/*             landscape  horizontal display                */
/*  xlabel     'text'     label for x-axis (values axis)    */
/*  ylabel     'text'     label for y-axis (distance axis)  */
/*  labels     down       plots x-axis labels vertically    */
/*             across     plots x-axis labels horizontally  */
/*                                                          */
/*  color      any        determines colour of lines        */
/*                                                          */
/*  axis       any        determines colour of axes         */
/*                                                          */
/*  back       any        determines colour of background   */
/*                                                          */
/*  text       any        determines colour of text         */
/*                                                          */ 
/* Take a copy of the tree data set. This allows macro      */
/* to be re-executed without re-running cluster step        */
data tree2;
  set tree;
run;

data heights(keep=nc _height_)
     formatds(keep=fmtname start label);
/* pick up the distances at which each fusion is formed */
  length label start fmtname $ 8;
  retain n nc varclus 0 last 0;
  retain ymin 99999 ymax -99999 minid maxid;
  retain fmtname '$clnamft';
  set tree2 end=eof;
  if _n_ = 1
  then do;
/* detect the procedure used to create the tree dataset */
         if substr(_name_,1,5) = 'CLUS1'
         then do;
                varclus=1;
                call symput('proc','varclus');
              end;
         else do;
                n=_ncl_;
                nc=input(substr(_parent_,3,4),4.);
                if _height_ = 0.0 then ymin=0.0;
                call symput('nc',left(put(nc,4.)));
                call symput('proc','cluster');
              end;
       end;
  if varclus = 0
  then do;
         if _ncl_ < n;
         nc=input(substr(_name_,3,4),4.);
         actualnc+1;
         output heights;
       end;
  else do;
         ch=substr(_name_,5,1);
         digit=indexc('0123456789',ch);
         if substr(_name_,1,4) = 'CLUS' and digit > 0
         then do;
                nc=_ncl_;
                actualnc+1;
                _height_=_propor_;
                output heights;
                start=_name_;
                label=left(put(_ncl_,4.));
                output formatds;
              end;
         else do;
                last=1;
                call symput('nc',left(put(nc,4.)));
              end;
      end;
  if last = 0
  then do;
         if _height_ > ymax
         then do;
                ymax=_height_;
                maxid=_n_;
              end;
         if _height_ < ymin
         then do;
                ymin=_height_;
                minid=_n_;
              end;
       end;
  if eof = 1 or last = 1
  then do;
         call symput('actualnc',left(put(actualnc,4.)));
         call symput('ymax',left(put(ymax,best12.)));
         call symput('ymin',left(put(ymin,best12.)));
         if varclus = 0 and maxid > minid
         then call symput('order','normal');
         else call symput('order','reverse');
      end;
run;
 
proc sort data=heights;
  by descending nc;
run;
 
/* create macro variable NP (no. of observations) */
%let np=%eval(&nc+1);

/* adjust nc (if disjoint clusters exist */
%let diff=%eval(&nc-&actualnc);
%if %eval(&diff) > 0
%then %let nc=%eval(&actualnc);

%if &proc=varclus
%then %do;
         proc format cntlin=formatds;
         run;
         proc sort data=tree2;
           by descending _parent_;
         run;
      %end;

data clusters (keep=nc clno1 clno2)
     formatds (keep=fmtname start label);

/* pick up the node numbers for each fusion */
  length c1 c2 label $ 16 fmtname $ 8 clusid $ 4;
  length nc clno1 clno2 start 8;
  array cl(2) c1 c2;
  array clusno(2) clno1 clno2;
  retain ic 0 fmtname 'nodeft' pos 3 clusid 'CL  ';
 
/* reset cluster flags if tree created by VARCLUS */
  if _n_=1 and "&proc"="varclus"
  then do;
         pos=5;
         clusid='CLUS';
       end;
  /* process two records pertaining to fusion */
  do i=1 to 2;
    set tree2;
    if _parent_ = '' then stop;

  /* store the cluster name - use left to   */
  /* eliminate spaces that exist in numeric */
  /*  IDs.                                  */

    cl(i)=left(_name_);
 
  /* pick up the number of the parent node  */
    %if "&proc"="cluster"
    %then %do;
            nc=input(substr(_parent_,3,4),4.);
          %end;
    %else %do;
            nc=input(left(put(_parent_,$clnamft.)),4.);
          %end;

  /* establish node number as numeric to facilitate   */
  /* implementation of the tree-traversal algorithm   */
  /* flag clusters by negating the node number        */
  /* build format for re-attachment of node IDs later */

    ch=substr(cl(i),pos,1);
    digit=indexc('0123456789',ch);
    if substr(cl(i),1,pos-1) eq substr(clusid,1,pos-1)
       and digit > 0
    then do;
           %if "&proc"="cluster"
           %then %do;
                   clusno(i)=-input(substr(cl(i),3,4),4.);
                 %end;
           %else %do;
                   clusno(i)=-input(left(put(cl(i),$clnamft.)),4.);
                 %end;
         end;
    else do;
           ic=ic+1;
           clusno(i)=ic;
           start=ic;
           label=cl(i);
           output formatds;
         end;
  end;
  output clusters;
run;
 
/* Use PROC FORMAT to create format variable nodeft   */
/* from the control data set , formatds.              */
proc format cntlin=formatds;
run;
 
%if &proc=varclus
%then %do;
         proc sort data=clusters;
           by descending nc;
         run;
      %end;

/* combine the distances with the node numbers */
data treedata(keep=nc _height_ clno1 clno2);
  merge clusters heights;
  by descending nc;
/* adjust cluster numbers to range from 1 if   */
/* disjoint clusters have been detected        */
  %if %eval(&diff)>0
  %then %do;
           nc=nc-%eval(&diff);
           if clno1<0 then clno1=clno1+%eval(&diff);
           if clno2<0 then clno2=clno2+%eval(&diff);
        %end;
run;
proc sort;
  by nc;
run;
 

/* transpose the data to facilitate building of arrays */
proc transpose data=treedata out=treetran;
run;
 
/* build arrays used by tree traversal algorithm */
data arrays;
array ncl(j) nc1-nc&nc;
array c1(j) acl1-acl&nc ;
array c2(j) bcl1-bcl&nc;
array height(j) h1-h&nc;
array ordr(j) ordr1-ordr&nc;
array prev(j) prev1-prev&nc;
array stat(j) stat1-stat&nc;
array col(j) col1-col&nc;
array super(k) ncl c1 c2 height;
do k=1 to 4;
set treetran;
do j=1 to &nc;
  super=col;
end;
end;
do j=1 to &nc;
  stat=0;
  ordr=0;
  prev=0;
end;
output;
drop col1-col&nc j k _name_;
stop;
run;
 
/* Tree traversal step                                      */
/* The search begins at the top of the tree and progresses  */
/* downwards until a terminal node is found.  The tree is   */
/* then traversed backwards to the parent node from which   */
/* a search for a second terminal node is made.  Once the   */
/* second node has been found, the algorithm returns to the */
/* pair of nodes of which the parent node is one and        */
/* attempts to fuse those.  If fusion is not possible, the  */
/* search continues as before in a downward direction.      */

data ordernos(keep=acl1-acl&nc bcl1-bcl&nc nc1-nc&nc
                   h1-h&nc ordr1-ordr&nc);
array c(2,&nc) acl1-acl&nc bcl1-bcl&nc;
array ncl(&nc) nc1-nc&nc;
array height(&nc) h1-h&nc;
array ordr(&nc) ordr1-ordr&nc;
array prev(&nc) prev1-prev&nc;
array stat(&nc) stat1-stat&nc;
set arrays;
fusions=0; i=1; j=1; prev(1)=0;
%checkn
do while (fusions<&nc);

/* if disjoint clusters exist, find start of new sub-tree */
   if j = 0
   then do;
          do j=1 to &nc until(nodestat=0);
             nodestat=stat(j);
          end;
          prev(j)=0;
          i=1;
        end;

   select (nodestat);
/* if its a fusion node with inferior */
/* terminal nodes as yet not reached  */
   when (0)
        do;
          %findnode
          i=2;
          %checkn
          if nodestat=1 then %fuse
        end;
/* if its a terminal node  */
   when (1)
        do;
          i=2;
/* find the status of its partner */
          %checkn
/* if its a terminal node, then fuse */
          if nodestat=1 then %fuse
          else do;
/* else, continue search until one found */
                 %findnode
                 i=2;
/* and then perform similar attempt at fusion     */
/* Note that failure to fuse here leaves the      */
/* starting position for a search set correctly   */
/* for the return to the top of the DO WHILE loop */
                 %checkn
                 if nodestat=1 then %fuse
               end;
        end;
end;
end;
output;
stop;
run;
 
/* Transpose the contents of the arrays to form     */
/* a data set containing the original data on nodes */
/* together with the variable ord indicating the    */
/* order in which the fusion is to be processed     */
data ordtree(keep=nc clno1 clno2 ht ord);
length h1-h&nc nc1-nc&nc ordr1-ordr&nc  8;
array c1(&nc) acl1-acl&nc;
array c2(&nc) bcl1-bcl&nc;
array height(&nc) h1-h&nc;
array ncl(&nc) nc1-nc&nc;
array ordr(&nc) ordr1-ordr&nc;
set ordernos;
do j=1 to &nc;
  nc=ncl(j);
  clno1=c1(j);
  clno2=c2(j);
  ht=height(j);
  ord=ordr(j);
output;
end;
stop;
run;

/* sort in ascending oder of ord */
proc sort;
  by ord;
run;
 
/* Generate the annotate data set to draw the  */
/* dendrogram                                  */
 
data dendro(keep=function x y xsys ysys color)
     formatds(keep=fmtname start label)
     formaty(keep=fmtname start ylabel
             rename=(ylabel=label));
length function color fmtname start $ 8 xsys ysys $ 1;
length label ylabel $ 16;
array clx(&nc) cx1-cx&nc;
array cly(&nc) cy1-cy&nc;
array cl(2) clno1 clno2;
array height(2) ht1 ht2;
array xcoord(2) x1 x2;
retain cx1-cx&nc cy1-cy&nc ipt shiftl 0 ymax ymin inc;
retain xsys ysys '2' color "&color";

if _n_ = 1
then do;

/* determine a suitable increment for y axis */
       ymax=input(symget('ymax'),best12.);
       ymin=input(symget('ymin'),best12.);
       r=ymax-ymin;
       p=floor(log10(r));
       k=10**(p-1);
       inc=input(put(ceil(r/k)*0.1,rft.),2.)*k;
       call symput('inc',left(put(inc,best12.)));

/* adjust limits if necessary and re-set macro variables */
       if r-int(r/inc)*inc > 0
       then do;
             ymin=floor(ymin/inc)*inc;
             call symput('ymin',left(put(ymin,best12.)));
             newr=ymax-ymin;
             if newr-int(newr/inc)*inc > 0
             then do;
                   ymax=ceil(ymax/inc)*inc;
                   call symput('ymax',left(put(ymax,best12.)));
                  end;
            end;
 
/* if y values are inverted, generate format for y axis */
       %if "&order"="reverse"
       %then %do;
               ylim=ymax-ymin;
               call symput('ymin',put(0,1.));
               call symput('ymax',left(put(ylim,best12.)));
               fmtname='yft';
               do y=ymax to ymin by -inc;
                  start=left(put(ymax-y,best12.));
                  ylabel=left(put(y,best12.));
                  output formaty;
               end;
             %end;
     end;
set ordtree;
clusters=0;
do i=1 to 2;
/* if its a terminal node (but not a fusion point) */
/* assign it as the next point on the x-axis and   */
/* use format variable nodeft., created earlier,   */
/* to re-assign label to the assigned value of x.  */
/* Then generate plot instructions.                */
  if cl(i) > 0
  then do;
         ipt+1;
         start=left(put(ipt,5.));
         label=put(cl(i),nodeft.);
         fmtname='labelft';
         output formatds;
         x=ipt;
         %if "&order"="normal"
         %then %do;
                  ystart=ymin;
               %end;
         %else %do;
                  ystart=ymax;
               %end;
         height(i)=ht;
         %genlines
       end;
/* if its a fusion point, pick up its co-ordinates */
/* from the arrays clx and cly and generate plot   */
/* instructions.                                   */
  else do;
         clusters=clusters+1;
         clpt=-cl(i);
         x=clx(clpt);
         ystart=cly(clpt);
         height(i)=ystart;
         %genlines
       end;
end;

/* after processing this pair, store co-ordinates  */
/* of the new fusion node.  If two nodes fuse at   */
/* the same height and one is a cluster, apply a   */
/* correction factor to centre verticals.          */
/* (applies to density linkage)                    */
if clusters=1 and ht1=ht2 then shiftl=shiftl+0.25;
else shiftl=0;
clx(nc)=(x1+x2)/2-shiftl; cly(nc)=ht;
run;
 
/* interchange x and y if landscape format is required */

%if %upcase(&format)=LANDSCAPE
%then %do;
data dendro(rename=(x=y y=x));
   set dendro;
run;
%end;


/* Use PROC FORMAT to create format variable labelft  */
/* from the control data set , formatds.              */
proc format cntlin=formatds;
run;
 
%if "&order"="reverse"
%then %do;
        proc format cntlin=formaty;
        run;
      %end;

goptions cback=&back ctext=&text vpos=60 hpos=120;
/* plot dendrogram using Annotate data set */
proc gplot data=dendro;
         axis1 order=1 to &np by 1
         minor=none
%if %upcase(&labels)=DOWN & %upcase(&format) NE LANDSCAPE
%then %do;
         value=(angle=-90 rotate=90 height=1 font=swiss)
      %end;
%else %do;
         value=(height=1 font=swiss)
      %end;
%if &xlabel=
%then %do;
         label=none;
      %end;
%else %do;
         %if %upcase(&format) ne LANDSCAPE
         %then %do;
                  label=(height=2 font=swiss "&xlabel");
               %end;
         %else %do;
                  label=(angle=90 height=2 font=swiss "&xlabel");
               %end;
      %end;
         axis2 order = &ymin to &ymax by &inc
               minor = none
               value = (height=1 font=swiss)
%if &ylabel=
%then %do;
         %if %upcase(&format) ne LANDSCAPE
         %then %do;
                  label=(angle=90 height=2 font=swiss "Distance");
               %end;
         %else %do;
                  label=(height=2 font=swiss "Distance");
               %end;
      %end;
%else %do;
         %if %upcase(&format) ne LANDSCAPE
         %then label=(angle=90 height=2 font=swiss "&ylabel");
         %else label=(height=2 font=swiss "&ylabel");
      %end;
%if %upcase(&format) ne LANDSCAPE
%then %do;
plot y*x/haxis=axis1
         vaxis=axis2
%end;
%else %do;
plot x*y/haxis=axis2
         vaxis=axis1
%end;
         caxis=&axis
         annotate=dendro;
symbol1 v=none;    /* suppresses plot symbols */
footnote1 angle=-90 ' ';
%if %upcase(&format) ne LANDSCAPE
%then %do;
         format x labelft.;
      %end;
%else %do;
         format y labelft.;
      %end;
%if "&order"="reverse"
%then %do;
         %if %upcase(&format) ne LANDSCAPE
         %then %do;
                  format y yft.;
               %end;
         %else %do;
                  format x yft.;
               %end;
      %end;
run;
%mend dendro;
options source2;