/* Source Code for Obswww16 - Olaleye and Goin */
/* First Example Data Set */
data breast (keep=id years positive);
input @1 id @4 surg_dt ddmmyy8. @14 cont_dt ddmmyy8. @24 death_dt ddmmyy8.
@34 years @40 positive;
format surg_dt cont_dt death_dt ddmmyy8.;
label id='Observation ID'
surg_dt='Surgery Date'
cont_dt='Date of Last Contact'
death_dt='Date of Death'
years='Observation time (years)'
positive='Indicator of failure: 1=event 0=censored';
cards;
1 27/06/73 . 14/04/84 10.81 1
2 17/07/73 27/01/92 . 18.49 0
3 28/01/74 18/11/91 . 17.82 0
4 29/04/76 08/01/91 . 14.70 0
5 17/01/77 . 28/09/85 8.70 1
6 19/04/78 . 15/12/88 10.67 1
7 24/07/78 12/05/92 . 13.45 0
8 26/07/78 08/06/92 . 13.43 0
9 23/04/80 . 08/03/85 4.88 1
10 30/04/80 11/03/92 . 11.68 0
;
run;
/* Second Example Data Set */
data virus (keep=id sample treat days positive);
array mth month1-month3;
retain id sample 0;
input treat month1-month3;
id + 1;
do over mth;
if mth ^= . then
do;
if mth < 0 then do; days= -mth; positive= 0; end;
else do; days= mth; positive= 1; end;
sample= _i_;
output;
end;
end;
cards;
1 9 6 7
1 4 5 10
1 6 7 6
1 10 . -21
1 15 8 .
1 3 . 6
1 4 7 3
1 9 12 12
1 9 19 -19
1 6 5 6
1 9 . 18
1 9 -20 -17
2 6 4 5
2 16 17 -21
2 31 -19 -21
2 -27 -19 .
2 7 16 -23
2 -28 7 -19
2 -28 3 16
2 15 12 16
2 18 -21 22
2 8 4 7
2 4 -21 7
3 21 9 -8
3 13 7 -21
3 16 6 20
3 3 8 6
3 21 . -25
3 7 19 3
3 11 13 -21
3 -27 -18 9
3 14 14 6
3 8 11 15
3 8 4 7
3 8 3 9
3 -19 10 -17
;
run;
/* Deriving Variables for Calculation of Event Rates */
proc format;
value positive 1='Event' 0='Censored';
value yearfmt 1='[0,5)' 2='[5,10)' 3='[10,15)' 4='15+';
run;
data _temp2_;
set breast;
if 0 <= YEARS<=5 then group=1; else
if 5 < YEARS<=10 then group=2; else
if 10< YEARS<=15 then group=3; else
if 15< YEARS<=20 then group=4;
do i=1 to group;
if group<=i then
do;
interval=i;
n_years=years-(5*(group-1));
if positive=0 then do; censor=1; fail=.; end;
else if positive=1 then do; censor=.; fail=1; end;
end;
if group > i then
do; interval=i; n_years=5; end;
n_intv=interval; /*Create a duplicate variable for Interval*/
output;
end;
drop i group;
run;
/* Case 1: Calculating Mortality Rates */
proc sort data=_temp2_; by interval; run;
options center nonumber nodate ps=54 ls=80;
proc tabulate data=_temp2_ order=data format=10. missing;
class interval;
var n_years censor fail n_intv;
table (interval='Time Interval' all='Overall'),
(n_intv='Number Interval'*N=' '*f=8.) (censor='Number Censored'*sum=' '*f=8.)
(fail='Number Failed'*sum=' '*f=7.) (n_years='Person-Years'*sum=' '*f=8.)
(fail=' ' * pctsum='Mortality Rate (%)'* F=9.2)
/ misstext='0' rts=22 condense box='Breast Cancer Sample';
format interval yearfmt. ;
title1 "Figure 1. Calculate Event Rates for Early Breast Cancer Patients in";
title2 "Veronesi et al.'s (1981) study";
run;
/* Deriving Variables for Calculation of Virus Positivity Rates */
proc format;
value sample 1='Week 4' 2='Week 8' 3='Week 12' 9='All Blood Samples';
value treat 1='Placebo' 2='Low Dose' 3='High Dose' 9='Overall';
value positive 1='Event' 0='Censored';
value interval 1='[0,5)' 2='[5,10)' 3='[10,15)' 4='[15,20)' 5='[20,25)' 6='[25,30)' 7='30+';
run;
data _temp2b_;
set virus;
if 1<=sample<=3;
if 0 <= DAYS <=5 then group=1; else
if 5 < DAYS <=10 then group=2; else
if 10< DAYS <=15 then group=3; else
if 15< DAYS <=20 then group=4; else
if 20< DAYS <=25 then group=5; else
if 25< DAYS <=30 then group=6; else
if DAYS > 30 then group=7;
do i=1 to group;
if group<=i then
do;
interval=i;
n_days=days-(5*(group-1));
if positive=0 then do; censor=1; fail=.; end;
else if positive=1 then do; censor=.; fail=1; end;
end;
if group > i then
do; interval=i; n_days=5; end;
n_intv=interval; /*Create a duplicate variable for Interval*/
output;
end;
drop i group;
run;
/* Case 2: Calculating Virus Positivity Rates */
proc sort data=_temp2b_; by sample treat interval; run;
options center nonumber nodate ps=54 ls=80;
proc tabulate data=_temp2b_ order=data format=10. missing ;
class sample treat interval;
var n_days censor fail n_intv;
table (sample=' ' ALL='All Blood Samples'),
(treat='Treatment Group') * (interval='Time Interval'),
(n_intv='Number Interval'*N=' '*f=8.) (censor='Number Censored'*sum=''*f=8.)
(fail='Number Failed' * sum=' '*f=8.) (n_days='Culture-Days'*sum=' '*f=8.)
(fail=' ' * pctsum='Event Rate (%)'* F=14.2)
/ misstext='0' rts=22 condense box=_page_;
format sample sample. interval interval. treat treat.;
title1 "Figure 2: Calculate Virus Positivity Rates for Each Treatment Group for";
title2 "Blood Samples at Weeks 4, 8, 12 and All Combined Blood Samples";
run;
/* Validation of Event Rate Computations */
data _temp2b_;
set virus;
if 1<=sample<=3;
if 0 <= DAYS <=5 then group=1; else
if 5 < DAYS <=10 then group=2; else
if 10< DAYS <=15 then group=3; else
if 15< DAYS <=20 then group=4; else
if 20< DAYS <=25 then group=5; else
if 25< DAYS <=30 then group=6; else
if DAYS > 30 then group=7;
do i=1 to group;
if group<=i then
do;
interval=i;
n_days=days-(5*(group-1));
if positive=0 then do; censor=1; fail=.; end;
else if positive=1 then do; censor=.; fail=1; end;
end;
if group > i then
do; interval=i; n_days=5; end;
output;
end;
drop i group;
run;
proc sort data=_temp2b_; by sample treat interval id; run;
data _temp3_;
set _temp2b_ (drop=n_intv) end=last;
by sample treat interval id;
if first.interval then do; n_intv=0; p_days=0; n_cens=0; n_fail=0; end;
n_intv+1;
p_days+n_days;
if censor=1 then n_cens+censor;
if fail=1 then n_fail+fail;
rate=n_fail/p_days*100;
if last.interval then output;
drop id positive censor fail days n_days;
run;
proc sort data=_temp2b_; by treat interval id; run;
data _temp4_;
set _temp2_(drop=n_intv) end=last;
by treat interval id;
sample=9; /*Category for Total of all blood samples combined*/
if first.interval then do; n_intv=0; p_days=0; n_cens=0; n_fail=0; end;
n_intv+1;
p_days+n_days;
if censor=1 then n_cens+censor;
if fail=1 then n_fail+fail;
rate=n_fail/p_days*100;
if last.interval then output;
drop id positive censor fail days n_days;
run;
data _temp34_;
set _temp3_ _temp4_;
proc fsview data=_temp34_;
run;
options center nonumber nodate ps=54 ls=80;
proc tabulate data=_temp34_ order=data format=10. missing ;
class sample treat interval;
var n_intv p_days n_cens n_fail rate;
table sample=' ',(treat='Treatment Group') * (interval='Time Interval'),
(n_intv='Number Interval'*sum=' '*f=8.) (n_cens='Number Censored'*sum=' '*f=8.)
(n_fail='Number Failed'*sum=' '*f=8.) (p_days='Culture-Days'*sum=' '*f=8.)
(rate='Event Rate(%)' * sum=' '*f=14.2)
/ misstext='0' rts=22 condense box=_page_;
format sample sample. interval interval. treat treat.;
title1 "Using DATA Step and By-Group Statements to Validate and Reproduce";
title2 "Life Table Estimates Obtained with Proc Tabulate in Figure 2";
title3 "Compute Event Rates for Each Treatment Group by Blood Sample and Combined Blood Sample";
run;
/* Deriving Variables for Calculating Percentages of Failure Times */
proc univariate data=virus noprint;
var days;
output out=stats min=min max=max median=med q1=q1 q3=q3;
run;
data stats;
set stats;
call symput('min',left(min));
call symput('max',left(max));
call symput('med',left(med));
call symput('med_',left(med+1));
call symput('q1',left(q1));
call symput('q3',left(q3));
call symput('q1_',left(q1+1));
call symput('q3_',left(q3+1));
call symput('totcat',left(max+10)); /*Macro variable to represent the*/
/*value for the Total Category*/
run;
proc format;
value quartile 1="%left(&min) to &q1" 2="%left(&q1_) to &med"
3="%left(&med_) to &q3" 4="%left(&q3_) to &max"
&totcat='Total';
value sample 1='Week 4' 2='Week 8' 3='Week 12' 9='Total';
value treat 1='Placebo' 2='Low Dose' 3='High Dose' 9='Overall';
run;
data _temp5_;
set virus;
if &min<=days<=&q1 then quartile=1; else
if &q1 )
all='Total'*(N*f=6. pctn))
days='Failure Times (Days)' * (N*f=4. Mean std Min Max)
/ misstext='0' rts=22 condense box=_page_;
format treat treat. sample sample. quartile quartile. positive positive.;
title1 "Figure 3: Descriptive Summary Statistics of Time to Virus Positivity";
title2 "Detection by Treatment, Event Status and Blood Sample Collection Week";
run;
options center nonumber nodate ps=43 ls=126;
proc tabulate data=_temp_ order=data format=6.1 missing;
class sample treat quartile;
var days;
table (treat='Treatment' ALL='All Group')* (sample='Sample' ALL='Total'),
(quartile='Failure Times (Quartiles)' * (N*f=4. pctn)
all='Total'*(N*f=6. pctn))
days='Failure Times (Days)' * (N*f=4. Mean std Min Max)
/ misstext='0' rts=22 condense box=_page_;
format treat treat. sample sample. quartile quartile. positive positive.;
title1 "Figure 4: Descriptive Summary Statistics of Time to Detection of Virus";
title2 "Positivity by Treatment and Blood Sample Collection Week";
run;