options s=80 symbolgen macrogen mprint; dm 'clear log'; dm 'clear listing'; /**********************************************************************/ /* Name of Program: annoboxn.sas */ /* NOTE: This code has been improved since it was published in */ /* OBSERVATIONS. */ /* Purpose: This program generates a notched box plot with fences */ /* and whiskers bypassing boxplot width limitations using */ /* annotate also places labels at right hand side of the */ /* of the median and does the notched output */ /* Authors: SAS Institute Staff - Peter Ruzsa and Dr. Mike Kalt */ /* General Structure: 1. Program reads data */ /* 2. Extracts min and maximum values */ /* 3. Runs PROC UNIVARIATE to get box plot */ /* values */ /* 4. Automatically computes axis values */ /* 5. Links annotate dataset to the data and */ /* plots it */ /* */ /* Assumptions: o That y and x variables are numeric */ /* o That there are at least 2 different values for */ /* the X variable (If there is only one value */ /* for the X variable, contact SAS Institute */ /* Institute Technical Support.) */ /* o That outlying points will be those points */ /* beyond the percentile values */ /* Updates: 01oct95 */ /* 1. Fix of whiskers to reflect real Percentile */ /* values */ /* 2. Allows upper and lower whiskers to be */ /* non-symetric */ /* 26oct95 */ /* 1. Added support for plotting outlier points */ /* 2. Added macro support for outliers as well */ /* 04dec95 */ /* 1. Code and comment tightening for efficiency */ /* and better code blocking */ /* 2. X and Y variables are now macros that can be */ /* any name */ /* 06dec95 */ /* 1. code and comment enhancements to make the */ /* program easier to follow */ /* 05apr96 */ /* 1. added notching capability */ /* 2. redid whisker capability to follow Tukey model */ /* 3. Added upper and lower fences */ /* 4. Moved labels to bottom of graph instead of top */ /* of each individual box */ /* 09apr96 */ /* 1. fixed lower half of box so boundary matches */ /* poly *; */ /* 2. added comments to annotate dataset */ /* */ /* For more information on notching formulae, see */ /* the SAS/QC Software Volume 2 on box plot procedures */ /**********************************************************************/ /**********************************************************************/ /* First we set up the variables and controls. */ /**********************************************************************/ /**********************************************************************/ /* GENERAL CONTROLS */ /* */ /* Note: To turn off the lines and symbols, set symbol and interpol */ /* to none. The labels at the top of the bars are controlled */ /* in the annotate dataset itself. */ /* 1. BARWIDTH - controls the width of the actual bars themselves. */ /* To increase the width of the bar increase the .05 */ /* value in the %LET statement. To decrease the */ /* width, decrease the value. */ /* 2. MEDWIDTH - controls the width of the median line. To increase */ /* the width of the bar increase the .03 value in the */ /* %LET statement. To decrease the width, decrease */ /* the value. */ /* 3. ENDWIDTH - controls the width of the whisker end bar. */ /* Whiskers are now the maximum and minimum numbers */ /* inside the fence boudaries. */ /* 4. SYMBOL - controls the character placed at the mean point */ /* 5. SYMCOLOR - color of the symbol placed at the mean point */ /* 6. SYMHEIGHT - controls the height of the symbol placed at the */ /* mean */ /* 7. LINECLR - controls the color of the line that connects the */ /* medin points on the graph */ /* 8. LINESTYL - controls the line style type */ /* valid values are L1-L46 */ /* 9. FENCESTY - controls the line style type of the fences */ /* valid values are 1-46, not L1-L46 */ /* 10. INTERPOL - controls the line interpolation (such as join or */ /* spline) */ /* 11. OUTLIER - controls whether the outlying points are plotted. */ /* By default a square is used for the symbol. */ /* You can turn this off by setting outlier to NONE */ /* with NO quote marks */ /* 12. OUTCOLOR - color of the outlying points */ /* 13. OUTHT - height of the outlying points */ /* */ /**********************************************************************/ /* */ /* COLOR CONTROLS */ /* */ /* 1. WHISKTPC - controls the color of the upper vertical whisker */ /* line */ /* 2. WHISKBTC - controls the color of the bottom vertical whisker */ /* line */ /* 3. BOXLINEC - controls the color of the outline of the box */ /* 4. BOXCOLOR - controls the color of the interior of the box */ /* 5. BOXFILL - controls the pattern of the box, if filled */ /* 6. MEDIANC - controls the color of the median bar */ /* 7. ENDTPCLR - controls the color of the top whisker end bar */ /* 8. ENDBTCLR - controls the color of the bottom whisker end bar */ /* 9. FENCECLR - controls the color of the fence boundaries */ /* */ /**********************************************************************/ /* */ /* THICKNESS CONTROLS */ /* */ /* 1. TOPSIZE - controls the thickness of the top whisker vertical */ /* bar */ /* 2. BOTSIZE - controls the thickness of the bottom whisker */ /* vertical bar */ /* 3. MEDSIZE - controls the thickness of the median horizontal */ /* bar */ /* 4. BOXSIZE - controls the thickness of the box outline */ /* 5. ENDTOPSZ - controls the thickness of the upper whisker end */ /* bar */ /* 6. ENDBOTSZ - controls the thickness of the bottom whisker end */ /* bar */ /* */ /**********************************************************************/ /* */ /* LABEL CONTROLS */ /* */ /* 1. TOPLABEL - label to place at the top of each box */ /* 2. TOPSTAT - the statistics at the top of each box */ /* */ /* This labels the top of the bar - if you do not want the label, */ /* comment it out of the program. The step is in the Annotate DATA */ /* step under 'label at the top' */ /* */ /* Valid topstat values are - MED50 - the median */ /* - MEAN1 - the mean */ /* - Q3 - the upper 75 % */ /* - Q1 - the lower 25 % */ /* - N1 - the number of observations */ /* - RANGE1 - range of values */ /* - UNIMIN - the minimum value */ /* - UNIMAX - the maximum value */ /* - QRANGE1 - range of q1 to q3 */ /* - &XVAR - value of the horizontal */ /* value */ /* */ /* 3. TOPSEP - controls the space between the label and the */ /* statistic value current values are set to .15 but if */ /* you are doing a portrait graph you probably want to */ /* decrease the value */ /* 4. XLABEL - text to place below the horizontal axis */ /* 5. XCLR - color to make the horizontal axis label */ /* 6. XFONT - font for the horizontal axis label */ /* 7. XHEIGHT- height of the horizontal axis label */ /* 8. YLABEL - text to place on the vertical axis */ /* 9. YCLR - color to make the vertical axis label */ /* 10. XFONT - font for the horizontal axis label */ /* 11. XHEIGHT- height of the horizontal axis label */ /* */ /* TO GET RID OF THE TITLE, COMMENT it out in the GPLOT STEP */ /* */ /* 12. TITLE1 - the title to go on the chart, add more titles if */ /* needed */ /* 13. TCLR - the color of the title */ /* 14. THEIGHT- the height of the title */ /* 15. TFONT - the font of the title */ /**********************************************************************/ %let xvar=locat; /* X variable */ %let yvar=measure; /* Y variable */ %let barwidth=.050; /* width of the bar */ %let endwidth=.025; /* width of the whisker */ %let medwidth=.030; /* width of the median line */ %let pfact1=P90; /* percentile upper statistic */ %let pfact2=P10; /* percentile lower statistic */ %let symbol=+; /* symbol for the mean value */ %let symcolor=magenta; /* color for the mean value */ %let symhigh =2; /* height for the mean value */ %let linecolr=cyan; /* color for the joining line to connect */ /* medians */ %let linestyl=2; /* line style for the joining line */ %let linewid=4; /* width for the joining line */ %let interpol=join; /* type of joining line */ /* none value turns off line */ %let whisktpc=cyan; /* color of the top whisker */ %let whiskbtc=cyan; /* color of the bottom whisker */ %let medianc=red; /* color of the median line */ %let boxlinec=yellow; /* color of the box outline */ %let boxfill=solid; /* fill of the box */ %let boxcolor=blue; /* color of the box interior */ %let endtpclr=red; /* color of the end of the upper whisker */ %let endbtclr=red; /* color of the end of the bottom */ /* whisker */ %let fenceclr=magenta; /* color of the upper and lower fences */ %let fencesty=2; /* line style of upper and lower fences */ %let topsize=5; /* thickness of the top vertical whisker */ %let botsize=5; /* thickness of the bottom vertical */ /* whisker */ %let medsize=2; /* thickness of the median line */ %let boxsize=2; /* thickness of the box outline */ %let endtopsz=4; /* thickness of the top horizontal */ /* whisker bar */ %let endbotsz=4; /* thickness of the bottom */ /* horizontal whisker bar */ %let toplabel=mean; /* label at top of the box */ %let topstat=mean1; /* statistic at the top of box */ %let topsep =.15; /* separation between the label and stat */ %let topcolor=blue; /* color of the label at the top of box */ %let outlier=P; /* marker font representation of outlier symbols */ /* setting this value to none without quotes */ /* disables this feature */ /* good letters to use are U - square font */ /* C - filled triangle */ /* P - diamond */ %let outcolor=green; /* color of the outlying symbol */ %let outht=.5; /* height of the outlying symbol */ %let outfont=marker; /* font of the outlier symbols font */ %let xlabel=Altitude; /* label to place on the x axis */ %let xclr=magenta; /* color of the x label */ %let xheight=1; /* height of the x label */ %let xfont=swiss /* font of the x label */ %let ylabel=Level; /* label to place on the y axis */ %let yclr=blue; /* color of the y label */ %let yheight=1; /* height of the y label */ %let yfont=swiss; /* font of the y label */ %let Title1= Ozone Levels; /* value to place on the title */ %let tclr= magenta; /* color of the title */ %let theight =2; /* height of the title */ %let tfont=swiss; /* font of the title */ /**********************************************************************/ /* Then we read in the data. */ /**********************************************************************/ /* goptions reset=all; */ /**********************************************************************/ data rawdata; n=1; /* Constant value to be used in merging later. */ input &yvar &xvar; cards; 70 10000 65 10000 82 10000 39 10000 73 10000 90 10000 40 10000 77 10000 83 10000 66 10000 88 10000 73 10000 70 10000 30 10000 44 10000 51 10000 98 10000 15 15000 13 15000 11 15000 9 15000 2 15000 22 15000 -3 15000 2 15000 42 20000 25 20000 18 20000 6 20000 33 20000 12 20000 17 20000 27 20000 3 20000 41 20000 2 20000 19 20000 31 20000 15 25000 13 25000 11 25000 9 25000 2 25000 22 25000 -3 25000 2 25000 130 30000 125 30000 70 30000 50 30000 30 30000 30 30000 32 30000 34 30000 35 30000 36 30000 37 30000 39 30000 40 30000 41 30000 42 30000 43 30000 81 30000 85 30000 10 30000 5 30000 52 40000 22 40000 18 40000 15 40000 11 40000 9 40000 52 40000 58 40000 150 40000 56 40000 57 40000 58 40000 75 40000 25 40000 19 40000 11 40000 0 40000 ; proc sort data=rawdata; by &xvar; run; /**********************************************************************/ /* PROC UNIVARIATE is then run to get the statistics we need to */ /* generate the box plots including the quartiles and percentiles. */ /* The NOPRINT option is used here--however you can remove this if */ /* you want to compare the boxplot output with the graphics boxplot. */ /* The &PFACTR values control the percentiles used for the whiskers */ /**********************************************************************/ proc univariate data=rawdata; id n; var &yvar; by &xvar; output Q3=up75 MEDIAN=med50 Q1=low25 mean=mean1 n=n1 &pfact1=pfactr1 &pfact2=pfactr2 range=range1 qrange=qrange1 min=unimin max=unimax out=statuni; run; /**********************************************************************/ /* This datastep generates the fence values. For more information on */ /* fence values, see the SAS/QC reference volume 2. */ /**********************************************************************/ data statuni; set statuni; upfence=up75 + (1.5 * qrange1); lowfence=low25 - (1.5 * qrange1); run; proc sort data=statuni; by &xvar; data rawdata2; merge rawdata statuni; by &xvar; /**********************************************************************/ /* The next two datasteps take the output, remove values above and */ /* below the fence values and gets the maximum and minimum values for */ /* the whiskers. See the SAS/QC reference for more information on */ /* this topic. The whiskmin and whiskmax values are used to draw the */ /* whiskers later */ /**********************************************************************/ data rawdata2; set rawdata2; if ((&yvar > lowfence) and (&yvar < upfence)); proc univariate data=rawdata2; id n; var &yvar; by &xvar; output min=whiskmin max=whiskmax out=statuni2; /**********************************************************************/ /* PROC MEANS is used to calculate the minimum and maximum values for */ /* the X and Y variables. */ /**********************************************************************/ proc means data=rawdata; id n; var &xvar &yvar; output out=minmax max(&yvar)=maxy min(&yvar)=miny max(&xvar)=maxx min(&xvar)=minx; /**********************************************************************/ /* The minimum and maximum values for the X and Y variables are */ /* merged with the output from PROC UNIVARIATE. */ /**********************************************************************/ data mergstat; merge statuni statuni2 minmax; by n; /**********************************************************************/ /* This DATA step creates a new variable called X2 which will be used */ /* on the horizontal axis so that the annotate structures created */ /* below will fit on the graph itself. In the GPLOT procedure we */ /* will plot &XVAR and X2 with the OVERLAY option. We subtract 10% */ /* of the top X value in the dataset from the minumum X value for */ /* observation 1. The same 10% is added to the the maximum X value */ /* for the rest of the observations. A similar factor was used to */ /* create Y2 so that the annotate whiskers would fit on the graph. */ /* These are plotted later in the GPLOT step */ /**********************************************************************/ data limits; set mergstat; if _n_=1 then x2 = (minx - (abs(maxx * abs(&barwidth)))); else x2=(maxx + abs((maxx * .10))); y2 = low25-((abs(pfactr2) + .10*(abs(pfactr2)))); output; y2 = up75+((abs(pfactr1) + .10*(abs(pfactr1)))); run; /**********************************************************************/ /* Debugging section, only uncomment when required. */ /* proc print data=limits; */ /* run; */ /**********************************************************************/ /* General annotate information */ /**********************************************************************/ /* 1. the LENGTH statement makes sure that none of the functions */ /* are truncated. */ /* 2. The RETAIN statement sets up the kind of annotation you */ /* going to do in particular */ /* LABEL: - means we are going to draw text */ /* COLOR: - the color of the label */ /* STYLE: - the font of the label */ /* WHEN: - draw the annotation after the graph is complete */ /* XSYS: - sets up the xcoordinate system within the graph */ /* YSYS: - sets up the ycoordinate system within the graph */ /* POSITION: - says to place annotation centered on the point */ /* MOVE: - indicates a move to a location on the graph */ /* DRAW: - draws to the next location on the graph */ /* BAR : - draws a rectangle fill */ /**********************************************************************/ /* Other variables in the annotate dataset are: */ /* width controls */ /* 1. MOVEMENT - controls the width of the actual bars themselves */ /* 2. MOVE2 - controls the width of the whisker end bar */ /**********************************************************************/ /* COLOR CONTROLS */ /* */ /* 1. WHISKTPC - controls the color of the upper vertical whisker */ /* line */ /* 2. WHISKBTC - controls the color of the bottom vertical whisker */ /* line */ /* 3. BOXLINEC - controls the color of the outline of the box */ /* 4. BOXCOLOR - controls the color of the interior of the box */ /* 5. BOXFILL - controls the pattern of the box that is filled */ /* 6. MEDIANC - controls the color of the middle bar */ /* 7. ENDTPCLR - controls the color of the top whisker end bar */ /* 8. ENDBTCLR - controls the color of the bottom whisker end bar */ /**********************************************************************/ /* THICKNESS CONTROLS */ /* */ /* 1. TOPSIZE - controls the thickness of the top whisker vertical */ /* bar */ /* 2. BOTSIZE - controls the thickness of the bottom whisker */ /* vertical bar */ /* 3. MEDSIZE - controls the thickness of the median horizontal */ /* bar */ /* 4. BOXSIZE - controls the thickness of the box outline */ /* 5. ENDTOPSZ - controls the thickness of the upper whisker end */ /* bar */ /* 6. ENDBOTSZ - controls the thickness of the bottom whisker end */ /* bar */ /**********************************************************************/ data anno1; set limits; length function color style $ 8; function ='label'; color='red'; when= 'b'; style= 'swissb'; xsys= '2'; ysys= '2'; position= '2'; x = &xvar; y = med50; movement=(maxx - minx) * abs(&barwidth); moveleft=(x - movement); movert=(x + movement); move2=(maxx - minx) * abs(&endwidth); move3=(maxx - minx) * abs(&medwidth); movelft2=(x - move2); movert2 =(x + move2); movelft3=(x - move3); movert3 =(x + move3); moverng =(1.58 * (qrange1)) / (sqrt(n1)); moveiqr1 =(med50 + moverng); moveiqr2 =(med50 - moverng); whisktpc="&whisktpc"; whiskbtc="&whiskbtc"; medianc="&medianc"; boxlinec="&boxlinec"; boxfill="&boxfill"; boxcolor="&boxcolor"; endtpclr="&endtpclr"; endbtclr="&endbtclr"; fenceclr="&fenceclr"; fencesty=&fencesty; topcolor="&topcolor"; topsize=&topsize; botsize=&botsize; medsize=&medsize; boxsize=&boxsize; endtopsz=&endtopsz; endbotsz=&endbotsz; toplabel="&toplabel"; topstat=&topstat; topsep =&topsep; medwidth=&medwidth; /* draw the bottom barfill */ function='move'; x=moveleft; y=low25;output; function='poly'; x=moveleft; y=low25; style='solid'; color=boxcolor; output; function='polycont'; x=moveleft; y=moveiqr2; color=boxcolor; output; function='polycont'; x=movelft3; y=med50; color=boxcolor; output; function='polycont'; x=movert3; y=med50; color=boxcolor; output; function='polycont'; x=movert; y=moveiqr2; style='solid'; color=boxcolor; output; function='polycont'; x=movert; y=low25; color=boxcolor; output; /* draw the top barfill */ function='move'; x=moveleft; y=up75;output; function='poly'; x=moveleft; y=up75; style='solid'; color=boxcolor; output; function='polycont'; x=moveleft; y=moveiqr1; color=boxcolor; output; function='polycont'; x=movelft3; y=med50; color=boxcolor; output; function='polycont'; x=movert3; y=med50; color=boxcolor; output; function='polycont'; x=movert; y=moveiqr1; color=boxcolor; output; function='polycont'; x=movert; y=up75; color=boxcolor; output; /* left side of bar */ function='move'; x=moveleft; y=low25; output; function='draw';line=1;size=boxsize;x=moveleft; color=boxlinec; y=moveiqr2; output; function='draw';line=1;size=boxsize;x=movelft3;y=med50; color=boxlinec; output; function='draw';line=1;size=boxsize;x=moveleft;y=moveiqr1; color=boxlinec; output; function='draw';line=1;size=boxsize;x=moveleft;y=up75; color=boxlinec; output; /* right side of bar */ function='move'; x=movert; y=low25;output; function='draw';line=1;size=boxsize;x=movert; color=boxlinec; y=moveiqr2; output; function='draw';line=1;size=boxsize;x=movert3;y=med50; color=boxlinec; output; function='draw';line=1;size=boxsize;x=movert;y=moveiqr1; color=boxlinec; output; function='draw';line=1;size=boxsize;x=movert;y=up75; color=boxlinec; output; /* bottom of bar */ function='move'; x=&xvar-movement; y=low25;output; function='draw';line=1;size=boxsize;x=&xvar+movement;y=low25; color=boxlinec; output; /* top of bar */ function='move'; x=&xvar-movement; y=up75;output; function='draw';line=1;size=boxsize;x=&xvar+movement;y=up75; color=boxlinec; output; /* top whisker */ function='move'; x=&xvar; y=up75;output; function='draw';line=1;size=topsize;x=&xvar;y=whiskmax; color=whisktpc; output; /* end of top whisker */ function='move'; x=movelft2; y=whiskmax; output; function='draw';line=1;size=endtopsz;x=movert2;y=whiskmax; color=endtpclr; output; /* bottom whisker */ function='move'; x=&xvar; y=low25;output; function='draw';line=1;size=botsize;x=&xvar;y=whiskmin; color=whiskbtc; output; /* end of bottom whisker */ function='move'; x=movelft2; y=whiskmin; output; function='draw';line=1;size=endbotsz;x=movert2;y=whiskmin; color=endbtclr; output; /* median */ function='move'; x=movelft3; y=med50;output; function='draw';line=1;size=medsize;x=movert3; y=med50; color=medianc; when='b'; output; /* draw the upper and lower fences */ function='move'; x=movelft3; y=upfence;output; function='draw';line=fencesty;size=medsize;x=movert3; y=upfence; color=fenceclr; when='b'; output; function='move'; x=movelft3; y=lowfence;output; function='draw';line=fencesty;size=medsize;x=movert3; y=lowfence; color=fenceclr; when='b'; output; /**********************************************************************/ /* Create the label here. */ /**********************************************************************/ /* label at the top */ function='move'; x=&xvar; ysys='1'; y=10; output; function='label'; style='swiss'; position='2'; size=1; color=topcolor; text=toplabel; when='a'; output; function='move'; x=&xvar; y=0; ysys='1'; output; function='label'; style='swiss'; position='2'; size=1; text=left(put(topstat,10.2)); color=topcolor; when='a'; output; run; /**********************************************************************/ /* PROC PRINT is commented out but may be used for debugging. */ /* proc print data=anno1; */ /* run; */ /**********************************************************************/ /**********************************************************************/ /* The DATA step here sets up the outlying points on the box plot */ /* and removes the points internal to the box plots fence limits. */ /**********************************************************************/ data final; merge rawdata limits; by &xvar; outliers=&yvar; if ((outliers > lowfence) and (outliers < upfence)) then outliers=.; run; /**********************************************************************/ /* PROC PRINT is commented out but may be used for debugging. */ /* proc print data=final; */ /* run; */ /**********************************************************************/ /**********************************************************************/ /* Now use the annotate dataset in your GPLOT step */ /* 1. Three variables are plotted */ /* a. med50 * &xvar - these are the actual values that are */ /* plotted. The symbol is turned off by */ /* default. Annotate is used to draw a line */ /* at the median. */ /* b. mean1 * &xvar - plots the mean values in the box plot */ /* c. Y2 * X2 - this stretches the vertical and */ /* horizontal axes so that the lowest */ /* values are included, and there is room */ /* for the boxes and labels. These values */ /* are not actually plotted. */ /* d. outliers* &xvar - plots the outlying points on the box */ /* plot. */ /* e. upfence * &xvar - plots the upper and lower fence points */ /* f. lowfence * &xvar - so they will be included but not */ /* plotted; instead they are annotated */ /* lines. */ /* 2. You will get a note about observations containing missing */ /* values in the SAS log. This is intentional and allows us to */ /* plot the outlying points without the points inside the box */ /* ranges. */ /**********************************************************************/ proc gplot data=final anno=anno1; plot med50*&xvar mean1*&xvar y2*x2 outliers*&xvar upfence * &xvar lowfence * &xvar /overlay vaxis=axis1 haxis=axis2; Title h=&theight f=&tfont c=&tclr "&title1"; Footnote1 f=swiss c=&outcolor h=1 "Outlying points are &outcolor"; Footnote2 f=swiss c=&outcolor h=1 "Plus signs are mean values"; axis1 label=(a=90 c=&yclr f=&yfont h=&yheight "&ylabel"); axis2 label=(c=&xclr f=&xfont h=&xheight "&xlabel"); symbol1 i=&interpol v=none l=&linestyl c=&linecolr w=&linewid; symbol2 v=&symbol c=&symcolor h=&symhigh f=swissb; symbol3 v=none c=blue; symbol4 v=&outlier h=&outht f=&outfont c=&outcolor; symbol5 v=none; symbol6 v=none; run; quit;