/**********************************************************************/
/***************************************************************/
/* */
/* The following OPTIONS and TITLE statements are used with */
/* all examples in this article that do not explicitly */
/* contain other settings: */
/* */
/* options nodate nonumber nolabel ls=64; */
/* title; */
/* */
/***************************************************************/
/* */
/* The following SAS data set, SASUSER.HOUSES, is used to */
/* supply the data for most examples in this article: */
/* */
/* STYLE SQFEET BEDROOMS BATHS STREET PRICE */
/* RANCH 1250 2 1.0 Sheppard Avenue $64,000 */
/* SPLIT 1190 1 1.0 Rand Street $65,850 */
/* CONDO 1400 2 1.5 Market Street $80,050 */
/* TWOSTORY 1810 4 3.0 Garris Street $107,250 */
/* RANCH 1500 3 3.0 Kemble Avenue $86,650 */
/* SPLIT 1615 4 3.0 West Drive $94,450 */
/* SPLIT 1305 3 1.5 Graham Avenue $73,650 */
/* CONDO 1390 3 2.5 Hampshire Avenue $79,350 */
/* TWOSTORY 1040 2 1.0 Sanders Road $55,850 */
/* CONDO 2105 4 2.5 Jeans Avenue $127,150 */
/* RANCH 1535 3 3.0 State Highway $89,100 */
/* TWOSTORY 1240 2 1.0 Fairbanks Circle $69,250 */
/* RANCH 720 1 1.0 Nicholson Drive $34,550 */
/* TWOSTORY 1745 4 2.5 Highland Road $102,950 */
/* CONDO 1860 2 2.0 Arcata Avenue $110,700 */
/* */
/***************************************************************/
/**********************************************************************/
/* OUTPUT 1 */
proc tabulate data=sasuser.houses noseps;
class style baths;
var sqfeet;
table baths, style*sqfeet=' '*(n*f=2. mean*f=5.)
/ rts=10 misstext='0';
format baths 3.1;
run;
/**********************************************************************/
/* Create the Dummy Data Set */
data dummy;
length style $ 8;
input style baths;
cards;
CONDO 1
SPLIT 1.5
RANCH 2
TWOSTORY 2.5
TWOSTORY 3
;
/**********************************************************************/
data houses;
set dummy sasuser.houses(in=is_valid);
if is_valid then valid=1;
run;
/**********************************************************************/
/* create work.houses by sorting sasuser.houses */
proc sort data=sasuser.houses out=houses;
by bedrooms;
/* append dummy in front of each by group */
data houses;
/* loop once for each observation in work.houses */
do i1=1 to nobs1;
set houses point=i1 nobs=nobs1;
/* check for the beginning of a new by group */
if bedrooms ne last_bed then do;
/* loop once for each observation in dummy */
do i2=1 to nobs2;
set dummy point=i2 nobs=nobs2;
/* set valid=. to indicate a dummy ob */
valid=.;
/* set all analysis variables to missing */
sqfeet=.;
/* output dummy observation to work.houses */
output;
end;
/* output valid observation to work.houses */
set houses point=i1;
valid=1;
output;
end;
/* output valid observation to work.houses */
else output;
/* prepare last_bed for next by group check */
last_bed = bedrooms;
end;
/* force data step to stop because point= is used */
stop;
/**********************************************************************/
/* OUTPUT 2 */
proc tabulate data=houses noseps
order=data;
class style baths;
var sqfeet valid;
table baths, style*(valid=' '*n*f=2. sqfeet=''*mean*f=5.)
/ rts=10 misstext='0';
format baths 3.1;
run;
/**********************************************************************/
/* OUTPUT 3 */
proc tabulate data=sasuser.houses noseps order=data;
class style baths;
var sqfeet;
table baths, style*sqfeet=' '*(n*f=2. mean*f=5.)
/ rts=10 misstext='0';
format baths 3.1;
run;
/**********************************************************************/
/* OUTPUT 4 */
proc report data=sasuser.houses headline spacing=1 nowd;
column baths style,('--' sqfeet sqfeet=avgsqft)
('ALL/--' sqfeet=all_n sqfeet=all_mean);
define baths / group format=5.1 order=internal left;
define style / across order=data "STYLE/--";
define sqfeet / n format=2.0 "N";
define avgsqft / mean format=5.0 "MEAN";
define all_n / n format=2.0 "N";
define all_mean / mean format=5.0 "MEAN";
run;
/**********************************************************************/
/* create an informat and format to map the values */
proc format;
invalue style 'RANCH'=1
'SPLIT'=2
'CONDO'=3
'TWOSTORY'=4;
value style 1='RANCH'
2='SPLIT'
3='CONDO'
4='TWOSTORY';
/* change old internal values to new sequential values */
data houses;
set sasuser.houses;
style2=input(style,style.);
drop style;
rename style2=style;
format style2 style.;
run;
/**********************************************************************/
/* build the key data set using order=data */
proc freq data=sasuser.houses order=data;
tables style / out=key(keep=style) noprint;
/* build the control data set */
data control;
set key;
fmtname='STYLE';
/* Output A numeric informat observation */
start =style;
label =put(_n_,8.);
type='I';
output;
/* Output A numeric format observation */
start =put(_n_,8.);
label =style;
type='N';
output;
/* separate the informat from the format by sorting */
/* because the informat and format must be contiguous */
/* for proc format to correctly use them */
proc sort data=control;
by type;
/* generate format and informat using the cntlin= */
/* option with proc format */
proc format cntlin=control;
/* remap the style values in master data set */
data houses;
set sasuser.houses;
style2=input(style,style.);
drop style;
rename style2=style;
format style2 style.;
run;
/**********************************************************************/
/* OUTPUT 5 */
proc tabulate data=houses noseps;
class style baths;
var sqfeet;
table baths, style*sqfeet=' '*(n*f=2. mean*f=5.)
/ rts=10 misstext='0';
format baths 3.1;
run;
/**********************************************************************/
/* create sample data set */
data sample;
input date date7.;
cards;
06JAN93 /* Wednesday - SAS date value: 12059 */
08JAN93 /* Friday - SAS date value: 12061 */
09JAN93 /* Saturday - SAS date value: 12062 */
11JAN93 /* Monday - SAS date value: 12064 */
14JAN93 /* Thursday - SAS date value: 12067 */
17JAN93 /* Sunday - SAS date value: 12070 */
19JAN93 /* Tuesday - SAS date value: 12072 */
21JAN93 /* Thursday - SAS date value: 12074 */
22JAN93 /* Friday - SAS date value: 12075 */
;
/* OUTPUT 6 */
proc chart data=sample;
hbar date / discrete space=0 axis=0 1 2 nostat;
format date weekdate3.;
run;
/**********************************************************************/
/* generate the dummy data set */
data dummy;
do date='03JAN60'D to '09JAN60'D;
output;
end;
run;
/* concatenate WORK.DUMMY with WORK.SAMPLE */
data sample2;
set dummy sample(in=is_valid);
if is_valid then valid=1;
run;
/* OUTPUT 6 */
proc chart data=sample2;
hbar date / discrete space=0 axis=0 1 2 nostat missing
freq=valid format date weekdate3.;
run;
/**********************************************************************/
/* map the sas date values to numeric weekdays using */
/* the weekday function */
data sample2;
set sample;
weekday=weekday(date);
drop date;
rename weekday=date;
/* create a day of the week format */
proc format;
value dayfmt 1='Sun' 2='Mon' 3='Tue' 4='Wed' 5='Thu'
6='Fri' 7='Sat';
/* OUTPUT 7 */
proc chart data=sample2;
hbar date / discrete space=0 axis=0 1 2 nostat;
format date dayfmt.;
run;
/**********************************************************************/
/* create the dummy data set */
data dummy;
length style $ 8;
input style baths;
cards;
CONDO 1
SPLIT 2
RANCH 3
TWOSTORY 1.5
TWOSTORY 2.5
;
/* concatenate dummy with sasuser.houses */
data houses;
set dummy sasuser.houses(in=is_valid);
if is_valid then valid=1;
run;
/* OUTPUT 8 */
proc tabulate data=houses noseps order=data;
class style baths;
var sqfeet valid;
table baths, style*(valid=' '*n*f=2. sqfeet=' '*
mean*f=5.)
/ rts=10 misstext='0';
format baths 3.1;
run;
/**********************************************************************/
/* generate the control data set using */
/* dummy in example 6 */
data control;
set dummy;
fmtname = 'STYLE'; /* INFORMAT FOR STYLE */
start = style;
label = put(_n_,8.);
type = 'I';
output;
fmtname = 'BATHS'; /* INFORMAT FOR BATHS */
start = put(baths,3.1);
output;
fmtname = 'STYLE'; /* FORMAT FOR STYLE */
start = put(_n_,8.);
label = style;
type = 'N';
output;
fmtname = 'BATHS'; /* FORMAT FOR BATHS */
label = put(baths,3.1);
output;
/* separate the informat from the format by sorting */
proc sort data=control nodupkey;
by fmtname type start;
/* generate format and informat using control data set */
proc format cntlin=control;
/* map the data values of master data set */
data houses;
set sasuser.houses;
style2=input(style,style.);
baths2=input(put(baths,3.1),baths.);
drop style baths;
rename style2=style baths2=baths;
format style2 style. baths2 baths.;
run;
/* OUTPUT 8 */
proc tabulate data=houses noseps
order=internal;
class style baths;
var sqfeet;
table baths,
style*(valid=' '*n*f=2. sqfeet=' '*mean*f=5.)
/ rts=10 misstext='0';
run;
/**********************************************************************/
/* OUTPUT 9 */
options label;
proc sql;
select
style label='STYLE',
n(style) label='N Obs',
mean(price) as price label='Mean' format=dollar8.
from sasuser.houses
group style
order price desc;
run;
/**********************************************************************/
/* OUTPUT 10a */
proc means data=sasuser.houses mean maxdec=0;
title1 "DEFAULT";
title2 "ORDERING SEQUENCE";
class style;
var price;
/* calculate the mean of price for each value of style */
proc means data=sasuser.houses nway noprint;
class style;
var price;
output out=houses mean= n=n;
/* sort by descending mean values of price */
proc sort data=houses;
by descending price;
/* OUTPUT 10b */
proc means data=houses mean maxdec=0 order=data;
title1 "DESIRED";
title2 "ORDERING SEQUENCE";
class style;
var price;
freq n;
run;
/**********************************************************************/
/* OUTPUT 11 */
proc tabulate data=sasuser.houses noseps format=7.;
class style baths;
var price;
table style, (baths all)*price*mean;
run;
/**********************************************************************/
/* calculate the mean of price for each value of style */
proc summary data=sasuser.houses nway;
class style;
var price;
output out=temp1 mean=;
/* sort by descending mean values of price */
proc sort data=temp1;
by descending price;
/* generate ascending values for bedrooms */
proc freq data=sasuser.houses;
tables baths / out=temp2 noprint;
/* generate the dummy data set */
data dummy;
merge temp1(in=a) temp2(in=b);
one=1;
if not a then set temp1 point=one;
if not b then set temp2 point=one;
drop price;
/* concatenate dummy with sasuser.houses */
data houses;
set dummy sasuser.houses;
keep style baths price;
/* OUTPUT 13 */
proc tabulate data=houses noseps format=7. order=data;
class style baths;
var price;
table style, (baths all)*price*mean;
run;
/**********************************************************************/
/* calculate the mean statistic for each style */
proc summary data=sasuser.houses nway;
class style;
var price;
output out=key mean=;
/* sort by descending means of price */
proc sort data=key;
by descending price;
/* build control data set */
data control;
set key;
fmtname='STYLEF';
start =style;
label =put(_n_,8.);
type='I';
output;
start =put(_n_,8.);
label =style;
type='N';
output;
/* separate the informat from the format by sorting */
proc sort data=control;
by type;
/* generate format and informat using control data set */
proc format cntlin=control;
/* map the data values of master data set */
data houses;
set sasuser.houses;
stylenum=input(style,stylef.);
drop style;
rename stylenum=style;
/* OUTPUT 12 */
proc tabulate data=houses noseps format=7.;
class style baths;
var price;
table style, (baths all)*price*mean;
format style stylef.;
run;
/**********************************************************************/