
/****************************************************************/
/* SAS SAMPLE LIBRARY */
/* */
/* NAME: CLUSTEX4 */
/* TITLE: DOCUMENTATION EXAMPLE 4 FROM PROC CLUSTER */
/* PRODUCT: STAT */
/* SYSTEM: ALL */
/* KEYS: CLUSTER */
/* PROCS: CLUSTER TREE STANDARD */
/* DATA: */
/* */
/* REF: */
/* MISC: */
/* */
/****************************************************************/
data teeth;
title 'HIERARCHICAL CLUSTER ANALYSIS OF MAMMALS'' TEETH DATA';
title2 'Evaluating the Effects of Ties';
input mammal $ 1-16
@21 (v1-v8) (1.);
label v1='Top incisors'
v2='Bottom incisors'
v3='Top canines'
v4='Bottom canines'
v5='Top premolars'
v6='Bottom premolars'
v7='Top molars'
v8='Bottom molars';
cards;
BROWN BAT 23113333
MOLE 32103333
SILVER HAIR BAT 23112333
PIGMY BAT 23112233
HOUSE BAT 23111233
RED BAT 13112233
PIKA 21002233
RABBIT 21003233
BEAVER 11002133
GROUNDHOG 11002133
GRAY SQUIRREL 11001133
HOUSE MOUSE 11000033
PORCUPINE 11001133
WOLF 33114423
BEAR 33114423
RACCOON 33114432
MARTEN 33114412
WEASEL 33113312
WOLVERINE 33114412
BADGER 33113312
RIVER OTTER 33114312
SEA OTTER 32113312
JAGUAR 33113211
COUGAR 33113211
FUR SEAL 32114411
SEA LION 32114411
GREY SEAL 32113322
ELEPHANT SEAL 21114411
REINDEER 04103333
ELK 04103333
DEER 04003333
MOOSE 04003333
;
proc cluster data=teeth method=average nonorm outtree=_null_;
var v1-v8;
id mammal;
title3 'Raw Data';
run;
proc cluster data=teeth std method=average nonorm outtree=_null_;
var v1-v8;
id mammal;
title3 'Standardized Data';
run;
options mprint;
*---------------------------------------------------------------------+
| |
| the macro CLUSPERM randomly permutes observations and does a |
| cluster analysis for each permutation. the arguments are as |
| follows: |
| |
| data data set name |
| var list of variables to cluster |
| id id variable for proc cluster |
| method clustering method (and possibly other options) |
| nperm number of random permutations |
| |
+---------------------------------------------------------------------;
%macro CLUSPERM(data,var,id,method,nperm);
*------create temporary data set with random numbers------;
data _temp_;
set &data;
array _random_ _ran_1-_ran_&nperm;
do over _random_;
_random_=ranuni(835297461);
end;
run;
*------permute and cluster the data------;
%do n=1 %to &nperm;
proc sort data=_temp_(keep=_ran_&n &var &id) out=_perm_;
by _ran_&n;
proc cluster method=&method noprint outtree=_tree_&n;
var &var;
id &id; ; /* extra semicolon needed for macro bug */
run;
%end;
%mend;
*---------------------------------------------------------------------+
| |
| the macro PLOTPERM plots various cluster statistics against the |
| number of clusters for each permutation. the arguments are as |
| follows: |
| |
| stats names of variables from tree data set |
| nclus maximum number of clusters to be plotted |
| nperm number of random permutations |
| |
+---------------------------------------------------------------------;
%macro PLOTPERM(stat,nclus,nperm);
*------concatenate tree data sets for 20 or fewer clusters------;
data _plot_;
set %do n=1 %to &nperm; _tree_&n(in=_in_&n) %end; ;
if _ncl_ <= &nclus;
%do n=1 %to &nperm;
if _in_&n then _perm_=&n; ;
%end;
label _perm_='permutation number';
keep _ncl_ &stat _perm_;
run;
*------plot the requested statistics by number of clusters------;
proc plot;
plot (&stat)*_ncl_=_perm_;
run;
%mend;
*---------------------------------------------------------------------+
| |
| the macro TREEPERM generates cluster-membership variables for a |
| specified number of clusters for each permutation. proc print lists |
| the objects in each cluster-combination and proc tabulate gives |
| the frequencies and means. the arguments are as follows: |
| |
| var list of variables to cluster (no "-" or ":" allowed) |
| id id variable for proc cluster |
| meanfmt format for printing means in proc tabulate |
| nclus number of clusters desired |
| nperm number of random permutations |
| |
+---------------------------------------------------------------------;
%macro TREEPERM(var,id,meanfmt,nclus,nperm);
*------create data sets giving cluster membership------;
%do n=1 %to &nperm;
proc tree data=_tree_&n noprint n=&nclus
out=_out_&n(drop=clusname rename=(cluster=_clus_&n));
copy &var;
id &id;
proc sort;
by &id &var;
run;
%end;
*------merge the cluster variables------;
data _merge_;
merge %do n=1 %to &nperm; _out_&n %end; ;
by &id &var;
length all_clus $ %eval(3 * &nperm);
%do n=1 %to &nperm;
substr( all_clus, %eval(1+(&n-1)*3), 3) =
put( _clus_&n, 3.);
%end;
run;
*------print and tabulate cluster combinations------;
proc sort;
by _clus_:;
proc print;
var &var;
id &id;
by all_clus notsorted;
proc tabulate order=data formchar=' ';
class all_clus;
var &var;
table all_clus, n='FREQ'*f=5. mean*f=&meanfmt*(&var) /
rts=%eval(&nperm*3+1);
run;
%mend;
%let vlist=v1 v2 v3 v4 v5 v6 v7 v8;
TITLE3 'raw data';
*------cluster raw data with average linkage------;
%CLUSPERM( teeth, &vlist, mammal, average, 10);
*------plot statistics for the last 20 levels------;
%PLOTPERM( _psf_ _pst2_ _ccc_, 20, 10);
*------analyze the 4-cluster level------;
%TREEPERM( &vlist, mammal, 9.1, 4, 10);
title3 'Standardized Data';
*------cluster standardized data with average linkage------;
%CLUSPERM( teeth, &vlist, mammal, average std, 10);
*------plot statistics for the last 20 levels------;
%PLOTPERM( _psf_ _pst2_ _ccc_, 20, 10);
*------analyze the 4-cluster level------;
%TREEPERM( &vlist, mammal, 9.1, 4, 10);
|