www.sas.com > Service and Support > Technical Support
 
Technical Support SAS - The power to know(tm)
  TS Home | Intro to Services | News and Info | Contact TS | Site Map | FAQ | Feedback

  

/****************************************************************/
/* SAS SAMPLE LIBRARY */
/* */
/* NAME: CATMODEX */
/* TITLE: Documentation Examples from PROC CATMOD */
/* PRODUCT: STAT */
/* SYSTEM: ALL */
/* KEYS: categorical data analysis, */
/* PROCS: CATMOD */
/* DATA: */
/* */
/* REF: */
/* MISC: */
/* */
/****************************************************************/

/*-- Example 1 ---------------------------------------------------*/
/* */
/* Detergent Preference Study */
/* -------------------------- */
/* The data are from a consumer blind trial of detergent */
/* preference. The variables measured in the study were */
/* softness=softness of laundry water (soft, med, hard) */
/* prev=previous user of brand m? (yes, no) */
/* temp=temperature of laundry water (high, low) */
/* brand=brand preferred (m, x). */
/* */
/* From: Ries and Smith (1963). */
/* See also Cox (1970, p. 38). */
/* */
/* Illustrate: linear response function, r=2 responses */
/* */
/*----------------------------------------------------------------*/

title 'DETERGENT PREFERENCE STUDY';
data deterg;
input softness $ brand $ prev $ temp $ count @@;
cards;
soft x yes high 19 soft x yes low 57 soft x no high 29 soft x no low 63
soft m yes high 29 soft m yes low 49 soft m no high 27 soft m no low 53
med x yes high 23 med x yes low 47 med x no high 33 med x no low 66
med m yes high 47 med m yes low 55 med m no high 23 med m no low 50
hard x yes high 24 hard x yes low 37 hard x no high 42 hard x no low 68
hard m yes high 43 hard m yes low 52 hard m no high 30 hard m no low 42
;
proc catmod;
response 1 0;
weight count;
model brand=softness|prev|temp / freq prob nodesign;
title2 'SATURATED MODEL';
run;

model brand=softness prev temp / noprofile;
title2 'MAIN EFFECTS MODEL';
run;
quit;

/*-- Example 2 ---------------------------------------------------*/
/* */
/* Dumping Syndrome Data */
/* --------------------- */
/* Four surgical operations for duodenal ulcers were compared */
/* in a clinical trial at four hospitals. The response was the */
/* severity of an undesirable complication called dumping */
/* syndrome. The operations were */
/* */
/* a. drainage and vagotomy */
/* b. 25% resection and vagotomy */
/* c. 50% resection and vagotomy */
/* d. 75% resection */
/* */
/* From: Grizzle, Starmer, and Koch (1969, 489-504). */
/* */
/* Illustrate: mean score response function, r=3 responses */
/* */
/*----------------------------------------------------------------*/

title 'DUMPING SYNDROME DATA';
data operate;
input hospital trt $ severity $ wt @@;
cards;
1 a none 23 1 a slight 7 1 a moderate 2
1 b none 23 1 b slight 10 1 b moderate 5
1 c none 20 1 c slight 13 1 c moderate 5
1 d none 24 1 d slight 10 1 d moderate 6
2 a none 18 2 a slight 6 2 a moderate 1
2 b none 18 2 b slight 6 2 b moderate 2
2 c none 13 2 c slight 13 2 c moderate 2
2 d none 9 2 d slight 15 2 d moderate 2
3 a none 8 3 a slight 6 3 a moderate 3
3 b none 12 3 b slight 4 3 b moderate 4
3 c none 11 3 c slight 6 3 c moderate 2
3 d none 7 3 d slight 7 3 d moderate 4
4 a none 12 4 a slight 9 4 a moderate 1
4 b none 15 4 b slight 3 4 b moderate 2
4 c none 14 4 c slight 8 4 c moderate 3
4 d none 13 4 d slight 6 4 d moderate 4
;
proc catmod order=data;
weight wt;
response 0 0.5 1;
model severity=trt hospital / freq oneway;
title2 'MAIN EFFECTS MODEL';
quit;

/*-- Example 3 ---------------------------------------------------*/
/* */
/* Maximum Likelihood Logistic Regression */
/* -------------------------------------- */
/* Ingots prepared with different heating and soaking times are */
/* tested for readiness to roll. */
/* */
/* From: Cox (1970, 67-68). */
/* */
/* Illustrate: logistic regression, standard response function */
/* */
/*----------------------------------------------------------------*/

title 'MAXIMUM-LIKELIHOOD LOGISTIC REGRESSION';
data ingots;
input heat soak nready ntotal @@;
count=nready;
y=1;
output;
count=ntotal-nready;
y=0;
output;
drop nready ntotal;
cards;
7 1.0 0 10 7 1.7 0 17 7 2.2 0 7 7 2.8 0 12
7 4.0 0 9 14 1.0 0 31 14 1.7 0 43 14 2.2 2 33
14 2.8 0 31 14 4.0 0 19 27 1.0 1 56 27 1.7 4 44
27 2.2 0 21 27 2.8 1 22 27 4.0 1 16 51 1.0 3 13
51 1.7 0 1 51 2.2 0 1 51 4.0 0 1
;
proc catmod;
weight count;
direct heat soak;
model y=heat soak / freq ml nogls covb corrb;
quit;

/*-- Example 4 ---------------------------------------------------*/
/* */
/* Bartlett's Data */
/* --------------- */
/* Cuttings of two different lengths were planted at one of two */
/* time points, and their survival status was recorded. The */
/* variables are */
/* v1=survival status (dead or alive) */
/* v2=time of planting (spring or at_once) */
/* v3=length of cutting (long or short). */
/* */
/* From: Bishop, Fienberg, and Holland (1975, 89) */
/* */
/* Illustrate: log-linear model, three dependent variables */
/* */
/*----------------------------------------------------------------*/

title 'BARTLETT''S DATA';
data b;
input v3 v2 v1 wt @@;
cards;
1 1 1 156 1 1 2 84 1 2 1 84 1 2 2 156
2 1 1 107 2 1 2 133 2 2 1 31 2 2 2 209
;
proc catmod;
weight wt;
model v3*v2*v1=_response_ / nogls noparm noresponse pred=freq ml;
loglin v3|v2|v1 @ 2;
title2 'MODEL WITH NO 3-VARIABLE INTERACTION';
quit;

/*-- Example 5 ---------------------------------------------------*/
/* */
/* Behavior of Squirrel Monkeys */
/* ---------------------------- */
/* In a population of 6 squirrel monkeys, the joint distribution */
/* of genital display with respect to (active role, passive role) */
/* was observed. Since a monkey cannot have both the active and */
/* passive roles in the same interaction, the diagonal cells of */
/* the table are structural zeros. */
/* */
/* From: Fienberg (1980, Table 8-2) */
/* */
/* Illustrate: log-linear model, structural zeros and random */
/* zeros */
/* */
/*----------------------------------------------------------------*/

title 'BEHAVIOR OF SQUIRREL MONKEYS';
data display;
input active $ passive $ wt @@;
if active ne 't';
if active ne passive then if wt=0 then wt=1e-20;
cards;
r r 0 r s 1 r t 5 r u 8 r v 9 r w 0
s r 29 s s 0 s t 14 s u 46 s v 4 s w 0
t r 0 t s 0 t t 0 t u 0 t v 0 t w 0
u r 2 u s 3 u t 1 u u 0 u v 38 u w 2
v r 0 v s 0 v t 0 v u 0 v v 0 v w 1
w r 9 w s 25 w t 4 w u 6 w v 13 w w 0
;
proc catmod;
weight wt;
model active*passive=_response_
/ ml nogls freq pred=freq noparm noresponse;
loglin active passive;
title2 'TEST QUASI-INDEPENDENCE FOR THE INCOMPLETE TABLE';
quit;

/*-- Example 6 ---------------------------------------------------*/
/* */
/* Multi-Population Repeated Measures */
/* ---------------------------------- */
/* Subjects from 3 groups have their response (0 or 1) recorded */
/* at each of four trials. */
/* */
/* From: Guthrie (1981). */
/* */
/* Illustrate: repeated measures, 2 levels of response, */
/* 3 populations */
/* */
/*----------------------------------------------------------------*/

title 'MULTI-POPULATION REPEATED MEASURES';
data group;
input a b c d group wt @@;
cards;
1 1 1 1 2 2 0 0 0 0 2 2 0 0 1 0 1 2 0 0 1 0 2 2
0 0 0 1 1 4 0 0 0 1 2 1 0 0 0 1 3 3 1 0 0 1 2 1
0 0 1 1 1 1 0 0 1 1 2 2 0 0 1 1 3 5 0 1 0 0 1 4
0 1 0 0 2 1 0 1 0 1 2 1 0 1 0 1 3 2 0 1 1 0 3 1
1 0 0 0 1 3 1 0 0 0 2 1 0 1 1 1 2 1 0 1 1 1 3 2
1 0 1 0 1 1 1 0 1 1 2 1 1 0 1 1 3 2
;
proc catmod;
weight wt;
response marginals;
model a*b*c*d=group _response_ group*_response_ / freq nodesign;
repeated trial 4;
title2 'SATURATED MODEL';
run;

model a*b*c*d=group _response_(group=3) / noprofile noparm;
title2 'TRIAL NESTED WITHIN GROUP 3';
run;
quit;

/*-- Example 7 ---------------------------------------------------*/
/* */
/* Testing Vision: Right Eye vs. Left */
/* ---------------------------------- */
/* 7477 women aged 30-39 were tested for vision in both right and */
/* left eyes. Marginal homogeneity is tested by the main effect */
/* of the repeated measurement factor, SIDE. */
/* */
/* From: Grizzle, Starmer and Koch (1969, 493). */
/* */
/* Illustrate: repeated measures, 4 levels of response, */
/* one population */
/* */
/*----------------------------------------------------------------*/

title 'VISION SYMMETRY';
data vision;
input right left count @@;
cards;
1 1 1520 1 2 266 1 3 124 1 4 66
2 1 234 2 2 1512 2 3 432 2 4 78
3 1 117 3 2 362 3 3 1772 3 4 205
4 1 36 4 2 82 4 3 179 4 4 492
;
proc catmod;
weight count;
response marginals;
model right*left=_response_ / freq;
repeated side 2;
title2 'TEST OF MARGINAL HOMOGENEITY';
quit;

/*-- Example 8 ---------------------------------------------------*/
/* */
/* Growth Curve Analysis */
/* --------------------- */
/* Subjects from 2 diagnostic groups (mild or severe) are given */
/* one of 2 treatments (std or new), and their response to */
/* treatment (n=normal or a=abnormal) is recorded at each of 3 */
/* times (weeks 1, 2, and 4) */
/* */
/* From: Koch et al. (1977) */
/* */
/* Illustrate: repeated measures, logistic analysis of growth */
/* curve */
/* */
/*----------------------------------------------------------------*/

title 'GROWTH CURVE ANALYSIS';
data growth2;
input diag $ trt $ week1 $ week2 $ week4 $ count @@;
cards;
mild std n n n 16 severe std n n n 2
mild std n n a 13 severe std n n a 2
mild std n a n 9 severe std n a n 8
mild std n a a 3 severe std n a a 9
mild std a n n 14 severe std a n n 9
mild std a n a 4 severe std a n a 15
mild std a a n 15 severe std a a n 27
mild std a a a 6 severe std a a a 28
mild new n n n 31 severe new n n n 7
mild new n n a 0 severe new n n a 2
mild new n a n 6 severe new n a n 5
mild new n a a 0 severe new n a a 2
mild new a n n 22 severe new a n n 31
mild new a n a 2 severe new a n a 5
mild new a a n 9 severe new a a n 32
mild new a a a 0 severe new a a a 6
;
proc catmod order=data;
title2 'REDUCED LOGISTIC MODEL';
weight count;
population diag trt;
response logit;
model week1*week2*week4=(1 0 0 0 ,
1 0 1 0 ,
1 0 2 0 ,
1 0 0 0 ,
1 0 0 1 ,
1 0 0 2 ,
0 1 0 0 ,
0 1 1 0 ,
0 1 2 0 ,
0 1 0 0 ,
0 1 0 1 ,
0 1 0 2 )(1='Mild diagnosis, week 1',
2='Severe diagnosis, week 1',
3='Time effect for std trt',
4='Time effect for new trt')
/ freq;
contrast 'Diagnosis effect, week 1' all_parms 1 -1 0 0;
contrast 'Equal time effects' all_parms 0 0 1 -1;
quit;

/*-- Example 9 ---------------------------------------------------*/
/* */
/* Diagnostic Procedure Comparison */
/* ------------------------------- */
/* Two diagnostic procedures (standard and test) are done on each */
/* subject, and the results of both are evaluated at each of two */
/* times as being positive or negative. */
/* */
/* From: MacMillan et al. (1981). */
/* */
/* Illustrate: repeated measures, 2 repeated measurement factors */
/* */
/*----------------------------------------------------------------*/

title 'DIAGNOSTIC PROCEDURE COMPARISON';
data a;
input std1 $ test1 $ std2 $ test2 $ wt @@;
cards;
neg neg neg neg 509 neg neg neg pos 4 neg neg pos neg 17
neg neg pos pos 3 neg pos neg neg 13 neg pos neg pos 8
neg pos pos pos 8 pos neg neg neg 14 pos neg neg pos 1
pos neg pos neg 17 pos neg pos pos 9 pos pos neg neg 7
pos pos neg pos 4 pos pos pos neg 9 pos pos pos pos 170
;
proc catmod;
title2 'MARGINAL SYMMETRY, SATURATED MODEL';
weight wt;
response marginals;
model std1*test1*std2*test2=_response_ / freq noparm;
repeated time 2, trtment 2 / _response_=time trtment time*trtment;
run;

title2 'MARGINAL SYMMETRY, REDUCED MODEL';
model std1*test1*std2*test2=_response_ / noprofile corrb;
repeated time 2, trtment 2 / _response_=trtment;
run;

title2 'SENSITIVITY AND SPECIFICITY ANALYSIS, MAIN EFFECTS MODEL';
model std1*test1*std2*test2=_response_ / covb noprofile;
repeated time 2, accuracy 2 / _response_=time accuracy;
response exp 1 -1 0 0 0 0 0 0 ,
0 0 1 -1 0 0 0 0 ,
0 0 0 0 1 -1 0 0 ,
0 0 0 0 0 0 1 -1

log 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 ,
0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 ,
1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 ,
1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 ,
0 0 0 1 0 0 1 0 0 0 1 0 0 0 1 ,
0 0 1 1 0 0 1 0 0 1 1 0 0 1 1 ,
1 0 0 0 1 0 0 1 0 0 0 1 0 0 0 ,
1 1 0 0 1 1 0 1 1 0 0 1 1 0 0 ;
run;
quit;

/*-- Example 10 --------------------------------------------------*/
/* */
/* Health Survey Data Analysis */
/* --------------------------- */
/* Variational models are fit to health survey data. Estimates */
/* of a well-being index have been computed for domains */
/* corresponding to an age by sex cross-classification. */
/* */
/* From: Koch and Stokes (1979). */
/* */
/* Illustrate: directly input response functions, FACTOR */
/* statement, interactivity, title in MODEL statement */
/* */
/*----------------------------------------------------------------*/

data fbeing(type=est);
input #1 b1-b5 _type_ $ _name_ $8. #2 b6-b10;
cards;
7.93726 7.92509 7.82815 7.73696 8.16791 parms .
7.24978 7.18991 7.35960 7.31937 7.55184
0.00739 0.00019 0.00146 -0.00082 0.00076 cov b1
0.00189 0.00118 0.00140 -0.00140 0.00039
0.00019 0.01172 0.00183 0.00029 0.00083 cov b2
-0.00123 -0.00629 -0.00088 -0.00232 0.00034
0.00146 0.00183 0.01050 -0.00173 0.00011 cov b3
0.00434 -0.00059 -0.00055 0.00023 -0.00013
-0.00082 0.00029 -0.00173 0.01335 0.00140 cov b4
0.00158 0.00212 0.00211 0.00066 0.00240
0.00076 0.00083 0.00011 0.00140 0.01430 cov b5
-0.00050 -0.00098 0.00239 -0.00010 0.00213
0.00189 -0.00123 0.00434 0.00158 -0.00050 cov b6
0.01110 0.00101 0.00177 -0.00018 -0.00082
0.00118 -0.00629 -0.00059 0.00212 -0.00098 cov b7
0.00101 0.02342 0.00144 0.00369 0.25300
0.00140 -0.00088 -0.00055 0.00211 0.00239 cov b8
0.00177 0.00144 0.01060 0.00157 0.00226
-0.00140 -0.00232 0.00023 0.00066 -0.00010 cov b9
-0.00018 0.00369 0.00157 0.02298 0.00918
0.00039 0.00034 -0.00013 0.00240 0.00213 cov b10
-0.00082 0.00253 0.00226 0.00918 0.01921
;
proc catmod data=fbeing;
title 'COMPLEX SAMPLE SURVEY ANALYSIS';
response read b1-b10;
factors sex $ 2, age $ 5 / _response_=sex age
profile=(male '25-34' ,
male '35-44' ,
male '45-54' ,
male '55-64' ,
male '65-74' ,
female '25-34' ,
female '35-44' ,
female '45-54' ,
female '55-64' ,
female '65-74' );
model _f_=_response_ / title='Main Effects for Sex and Age';
run;

contrast 'No Age Effect for Age<65' all_parms 0 0 1 0 0 -1 ,
all_parms 0 0 0 1 0 -1 ,
all_parms 0 0 0 0 1 -1 ;
run;

model _f_=(1 1 1,
1 1 1,
1 1 1,
1 1 1,
1 1 -1,
1 -1 1,
1 -1 1,
1 -1 1,
1 -1 1,
1 -1 -1)
( 1='Intercept' ,
2='Sex' ,
3='Age (25-64 vs. 65-74)' )
/ title='Binary Age Effect (25-64 vs. 65-74)' ;
run;
quit;

/*-- Example 11 --------------------------------------------------*/
/* */
/* Case-control Study with 1:4 Matching */
/* ------------------------------------ */
/* The disease studied is esophageal cancer, and the exposure */
/* variable is SAMSU (a liquor). The estimation of the relative */
/* risk requires that one of the parameters be restricted to 1. */
/* NC represents the number of consumers in a matched set of five */
/* subjects. */
/* */
/* From: Breslow (1982, 668). */
/* */
/* Illustrate: conditional logistic analysis, RESTRICT statement */
/* */
/*----------------------------------------------------------------*/

title;
data match;
input nc case wt @@;
offset=log(nc/(5-nc));
cards;
1 1 5 1 2 15 2 1 19 2 2 8 3 1 10 3 2 7 4 1 6 4 2 0
;
proc catmod data=match;
weight wt;
population nc;
direct offset;
model case=offset;
restrict b2=1;
quit;

/*-- Example 12 --------------------------------------------------*/
/* */
/* Case-control Study with One Matching Control */
/* -------------------------------------------- */
/* */
/* From: Breslow (1982, 664-667). */
/* */
/* Illustrate: conditional logistic analysis */
/* */
/*----------------------------------------------------------------*/

data cancer;
input dialect1 samsu1 dialect2 samsu2 wt1 wt2;
dialect=dialect1-dialect2;
samsu=samsu1-samsu2;
cards;
0 1 0 0 2 1
1 0 0 0 6 1
1 0 0 1 6 6
1 1 0 0 11 1
1 1 0 1 8 0
1 1 1 0 9 7
;

data cancer2;
set cancer;
case=1; wt=wt1; output;
case=2; wt=wt2; output;
run;

proc catmod data=cancer2;
weight wt;
population dialect1 samsu1 dialect2 samsu2 dialect samsu;
direct dialect samsu;
model case=dialect samsu / freq noint;
quit;

/*-- Example 13: Predicted Probabilities ------------------------*/

data loan;
input educ $ income $ purchase $ wt;
cards;
high high yes 54
high high no 23
high low yes 41
high low no 12
low high yes 35
low high no 42
low low yes 19
low low no 8
;

proc catmod data=loan order=data;
weight wt;
response marginals / out=pred;
model purchase=educ income / pred;
quit;

data pred2;
set pred;
keep educ incom _pred_;
run;

proc sort data=pred2;
by descending _pred_;
run;

proc print data=pred2;
run;

Copyright (c) 2002 SAS Institute Inc. All Rights Reserved.
Terms of Use & Legal Information | Privacy Statement