/* Source Code for Obswww16 - Olaleye and Goin */

/* First Example Data Set */
data breast (keep=id years positive);
   input @1 id @4 surg_dt ddmmyy8. @14 cont_dt ddmmyy8. @24 death_dt ddmmyy8.
         @34 years @40 positive;
   format  surg_dt cont_dt death_dt ddmmyy8.;
   label    id='Observation ID'
       surg_dt='Surgery Date'
       cont_dt='Date of Last Contact'
      death_dt='Date of Death'
         years='Observation time (years)'
      positive='Indicator of failure: 1=event 0=censored';
cards;
1  27/06/73  .         14/04/84  10.81 1
2  17/07/73  27/01/92     .      18.49 0
3  28/01/74  18/11/91     .      17.82 0
4  29/04/76  08/01/91     .      14.70 0
5  17/01/77  .         28/09/85   8.70 1
6  19/04/78  .         15/12/88  10.67 1
7  24/07/78  12/05/92     .      13.45 0
8  26/07/78  08/06/92     .      13.43 0
9  23/04/80  .         08/03/85   4.88 1
10 30/04/80  11/03/92     .      11.68 0
;
run;

/* Second Example Data Set */

data virus (keep=id sample treat days positive);
  array mth month1-month3;
  retain id sample 0;
  input treat month1-month3;
  id + 1;
  do over mth;
    if mth ^= . then
       do; 
         if mth < 0 then do; days= -mth; positive= 0; end;
         else            do; days=  mth; positive= 1; end;
         sample= _i_;
         output;
       end;
  end;
cards;
 1 9 6 7
 1 4 5 10
 1 6 7 6
 1 10 . -21
 1 15 8 .
 1 3 . 6
 1 4 7 3
 1 9 12 12
 1 9 19 -19
 1 6 5 6
 1 9 . 18
 1 9 -20 -17
 2 6 4 5
 2 16 17 -21
 2 31 -19 -21
 2 -27 -19 .
 2 7 16 -23
 2 -28 7 -19
 2 -28 3 16
 2 15 12 16
 2 18 -21 22
 2 8 4 7
 2 4 -21 7
 3 21 9 -8
 3 13 7 -21
 3 16 6 20
 3 3 8 6
 3 21 . -25
 3 7 19 3
 3 11 13 -21
 3 -27 -18 9
 3 14 14 6
 3 8 11 15
 3 8 4 7
 3 8 3 9
 3 -19 10 -17
;
run;

/* Deriving Variables for Calculation of Event Rates */

proc format;
 value positive     1='Event'  0='Censored';
 value yearfmt      1='[0,5)'  2='[5,10)'  3='[10,15)'  4='15+';
run;

data _temp2_;
  set breast;
  if 0 <= YEARS<=5  then group=1; else
  if 5 <  YEARS<=10 then group=2; else
  if 10<  YEARS<=15 then group=3; else
  if 15<  YEARS<=20 then group=4;

  do i=1 to group;
     if group<=i then
        do;
            interval=i;
            n_years=years-(5*(group-1));
            if      positive=0 then do; censor=1; fail=.; end;
            else if positive=1 then do; censor=.; fail=1; end;
        end;
     if group > i then
        do; interval=i;  n_years=5; end;
     n_intv=interval;    /*Create a duplicate variable for Interval*/
     output;
   end;
   drop i group;
run;

/* Case 1: Calculating Mortality Rates */
proc sort data=_temp2_; by interval; run;
options center nonumber nodate ps=54 ls=80;
proc tabulate data=_temp2_ order=data format=10. missing;
 class interval;
 var n_years censor fail n_intv;
 table (interval='Time Interval' all='Overall'),
(n_intv='Number Interval'*N=' '*f=8.) (censor='Number Censored'*sum=' '*f=8.)
 (fail='Number Failed'*sum=' '*f=7.) (n_years='Person-Years'*sum=' '*f=8.)
 (fail=' ' * pctsum='Mortality Rate (%)'* F=9.2)
 / misstext='0' rts=22 condense box='Breast Cancer Sample';
format interval yearfmt. ;

title1 "Figure 1. Calculate Event Rates for Early Breast Cancer Patients in";
title2 "Veronesi et al.'s (1981) study";
run;

/* Deriving Variables for Calculation of Virus Positivity Rates */
proc format;
   value sample	1='Week 4'  2='Week 8'   3='Week 12' 9='All Blood Samples';
   value treat   	1='Placebo' 2='Low Dose' 3='High Dose'  9='Overall';
   value positive	1='Event'   0='Censored';
   value interval	1='[0,5)' 	2='[5,10)'	3='[10,15)'	4='[15,20)'  5='[20,25)'	6='[25,30)' 7='30+';
run;

data _temp2b_;
  set virus;
  if 1<=sample<=3;
  if 0 <= DAYS <=5  then group=1; else
  if 5 <  DAYS <=10 then group=2; else
  if 10<  DAYS <=15 then group=3; else
  if 15<  DAYS <=20 then group=4; else
  if 20<  DAYS <=25 then group=5; else
  if 25<  DAYS <=30 then group=6; else
  if      DAYS > 30 then group=7;

  do i=1 to group;
     if group<=i then
        do; 
		interval=i;
            n_days=days-(5*(group-1));
            if      positive=0 then do; censor=1; fail=.; end;
            else if positive=1 then do; censor=.; fail=1; end;
        end;
     if group > i then
        do; interval=i;  n_days=5;  end;
     n_intv=interval;    /*Create a duplicate variable for Interval*/
     output;
  end;
  drop i group;
run;

/* Case 2: Calculating Virus Positivity Rates */
proc sort data=_temp2b_; by sample treat interval; run;

options center nonumber nodate ps=54 ls=80;
proc tabulate data=_temp2b_ order=data format=10. missing ;
 class sample treat interval;
 var n_days censor fail n_intv;
 table (sample=' ' ALL='All Blood Samples'),
 (treat='Treatment Group') * (interval='Time Interval'),
 (n_intv='Number Interval'*N=' '*f=8.) (censor='Number Censored'*sum=''*f=8.)
 (fail='Number Failed' * sum=' '*f=8.) (n_days='Culture-Days'*sum=' '*f=8.)
 (fail=' ' * pctsum='Event Rate (%)'* F=14.2)
 / misstext='0' rts=22 condense box=_page_;
 format sample sample. interval interval. treat treat.;
title1 "Figure 2: Calculate Virus Positivity Rates for Each Treatment Group for";
title2 "Blood Samples at Weeks 4, 8, 12 and All Combined Blood Samples";
run;

/* Validation of Event Rate Computations */
data _temp2b_;
  set virus;
  if 1<=sample<=3;
  if 0 <= DAYS <=5  then group=1; else
  if 5 <  DAYS <=10 then group=2; else
  if 10<  DAYS <=15 then group=3; else
  if 15<  DAYS <=20 then group=4; else
  if 20<  DAYS <=25 then group=5; else
  if 25<  DAYS <=30 then group=6; else
  if      DAYS > 30 then group=7;
  do i=1 to group;
     if group<=i then
        do; 
		interval=i;
            n_days=days-(5*(group-1));
            if      positive=0 then do; censor=1; fail=.; end;
            else if positive=1 then do; censor=.; fail=1; end;
        end;
     if group > i then
        do; interval=i;  n_days=5;  end;
     output;
  end;
  drop i group;
run;

proc sort data=_temp2b_; by sample treat interval id; run;
data _temp3_;
  set _temp2b_ (drop=n_intv) end=last;
  by sample treat interval id;
  if first.interval then do; n_intv=0; p_days=0; n_cens=0; n_fail=0; end;
  n_intv+1;
  p_days+n_days;
  if censor=1 then n_cens+censor;
  if fail=1 then n_fail+fail;
  rate=n_fail/p_days*100;
  if last.interval then output;
  drop id positive censor fail days n_days;
 run;

proc sort data=_temp2b_; by treat interval id; run;
data _temp4_;
  set _temp2_(drop=n_intv) end=last;
  by treat interval id;
  sample=9;            /*Category for Total of all blood samples combined*/

  if first.interval then do; n_intv=0; p_days=0; n_cens=0; n_fail=0; end;
  n_intv+1;
  p_days+n_days;
  if censor=1 then n_cens+censor;
  if fail=1 then n_fail+fail;
  rate=n_fail/p_days*100;
  if last.interval then output;
  drop id positive censor fail days n_days;
run;

data _temp34_;
  set _temp3_ _temp4_;
  proc fsview data=_temp34_;
run;

options center nonumber nodate ps=54 ls=80;
proc tabulate data=_temp34_ order=data format=10. missing ;
 class sample treat interval;
 var n_intv p_days n_cens n_fail rate;
 table sample=' ',(treat='Treatment Group') * (interval='Time Interval'),
 (n_intv='Number Interval'*sum=' '*f=8.) (n_cens='Number Censored'*sum=' '*f=8.)
 (n_fail='Number Failed'*sum=' '*f=8.) (p_days='Culture-Days'*sum=' '*f=8.)
 (rate='Event Rate(%)' * sum=' '*f=14.2)
 / misstext='0' rts=22 condense box=_page_;
   format sample sample. interval interval. treat treat.;

title1 "Using DATA Step and By-Group Statements to Validate and Reproduce";
title2 "Life Table Estimates Obtained with Proc Tabulate in Figure 2";
title3 "Compute Event Rates for Each Treatment Group by Blood Sample and Combined Blood Sample";
run;


/* Deriving Variables for Calculating Percentages of Failure Times */

proc univariate data=virus noprint;
  var days;
  output out=stats min=min max=max median=med q1=q1 q3=q3;
run;

data stats;
   set stats;
   call symput('min',left(min));
   call symput('max',left(max));
   call symput('med',left(med));
   call symput('med_',left(med+1));
   call symput('q1',left(q1));
   call symput('q3',left(q3));
   call symput('q1_',left(q1+1));
   call symput('q3_',left(q3+1));
   call symput('totcat',left(max+10)); /*Macro variable to represent the*/
						   /*value for the Total Category*/
run;

proc format;
  value quartile	1="%left(&min) to &q1"	2="%left(&q1_) to &med" 
			3="%left(&med_) to &q3"	4="%left(&q3_) to &max"
			&totcat='Total';
  value sample	1='Week 4'  2='Week 8'   3='Week 12'    9='Total';
  value treat	1='Placebo' 2='Low Dose' 3='High Dose'  9='Overall';
run;

data _temp5_;
   set virus;
   if &min<=days<=&q1  then quartile=1; else
   if &q1  )
     all='Total'*(N*f=6. pctn))
    days='Failure Times (Days)' * (N*f=4. Mean std Min Max)
    / misstext='0' rts=22 condense box=_page_;
  format treat treat. sample sample. quartile quartile. positive positive.;
title1 "Figure 3: Descriptive Summary Statistics of Time to Virus Positivity"; 
title2 "Detection by Treatment, Event Status and Blood Sample Collection Week";
run;

options center nonumber nodate ps=43 ls=126;
proc tabulate data=_temp_ order=data format=6.1 missing;
  class sample treat quartile;
  var days;
  table (treat='Treatment' ALL='All Group')* (sample='Sample' ALL='Total'),
   (quartile='Failure Times (Quartiles)' * (N*f=4. pctn)
             all='Total'*(N*f=6. pctn))
   days='Failure Times (Days)' * (N*f=4. Mean std Min Max)
          / misstext='0' rts=22 condense box=_page_;
   format treat treat. sample sample. quartile quartile. positive positive.;
title1 "Figure 4: Descriptive Summary Statistics of Time to Detection of Virus";
title2 "Positivity by Treatment and Blood Sample Collection Week";
run;