******************************************************************************/
******************************************************************************/
  /* Source Code for Observations article obswww09 */

  /* *********** Example Clinical Data ********** */

   DATA  VITAL;
      input  patid $ pageno $ visid $ visdate DATE10. diabp sysbp;
      format visdate DATE.;
   datalines;
   100  12  1  12JAN1997  66 110
   100  12  1  12JAN1997  68 110
   100  17  2  29JAN1997  66  .
   100  17  2  29JAN1997  66 110
   100  23  3  26JAN1997  68 120
   100  23  3  26JAN1997  68 120
   110  12  1  03FEB1997  68 110
   110  23  3  12FEB1997  64 115
   110  23  3  12FEB1997  68 115
   120  12  1  05APR1997  66 105
   120  12  1  05APR1997  64 105
   120  17  2  13APR1997  110 64
   120  17  2  13APR1997  105 82
   120  23  3  19APR1997  63 105
   120  23  3  19APR1997  105 90
   ;
   run;

   DATA  VITMEAN;
      input patid $ pageno $ visid $ visdate DATE10. mdiabp msysbp;
      format visdate DATE.;
   datalines;
   100 12 1 12JAN1997  67 110
   100 17 2 19JAN1997  66 110
   100 23 3 16JAN1997  68 120
   110 12 1 03FEB1997  68 110
   110 23 3 17FEB1997  66 115
   120 12 1 05APR1997  65 105
   120 17 2 12APR1997  64 107
   120 23 3 19APR1997  65 105
   ;
   run;

   DATA  DRUGDAY;
      input patid $ pageno $ visid $ visdate DATE10. strdate DATE10. enddate DATE10.;
      format visdate DATE. strdate DATE. enddate DATE.;
   datalines;
   100 12 1 12JAN1997 06JAN1997 12JAN1997
   100 17 2 19JAN1997 13JAN1997 18JAN1997
   100 23 3 26JAN1997 19JAN1997 25JAN1997
   110 12 1 03FEB1997 16JAN1997 03FEB1997
   110 23 3 17FEB1997 10FEB1997 16FEB1997
   120 12 1 05APR1997 01APR1997 05APR1997
   120 17 2 12APR1997 06APR1997 11APR1997
   120 23 3 19APR1997 13APR1997 19APR1997
   ;
   run;

******************************************************************************/

 /* ********** Data-Quality Check in One Row *********** */

   proc sql;
      select  distinct 'VITAL_0011' as checkno,
              patid, pageno,
              'Systolic BP must not be blank'
              as  ERRMSG
      from VITAL
      where sysbp is NULL
      union	
      select  distinct 'VITAL_0012' as checkno,
              patid, pageno,
              'Systolic BP must be greater than 80'
              as ERRMSG
      from VITAL
      where sysbp <= 80 and sysbp is not NULL
      union
      select  distinct 'VITAL_0013' as checkno,
              patid, pageno,
              'Systolic BP is less than diastolic BP'
              as ERRMSG
      from VITAL
      where sysbp<=diabp and sysbp is not NULL and diabp is not NULL
      order by checkno;
   quit;


******************************************************************************/


 /* ********** Relational Check Across Rows ********** */

   proc sql;
      select 'VITAL_0021' as checkno,
              patid, pageno,
              'The patient didn''t have BP measured twice at a visit'
              as ERRMSG
              from vital
              group by patid, pageno
              having count(*)<2
      union

      select 'DRUG_0022' as checkno,
              a.patid, a.pageno,  
              'The patient didn''t take medicine for more than 1 day'
              as ERRMSG
      from    drugday as a, drugday as b
      where   (a.patid=b.patid) and (b.strdate-a.enddate>1) and
              (a.enddate=
                      (select max(c.enddate)
                         from drugday as c
                       where (c.patid=a.patid)
                         and (c.enddate < b.strdate)))
           
      order by checkno;
   quit;

******************************************************************************/

 /* ********** Relational Check Across Tables *********** */

   proc sql;
      select 'VITAL_0031' as checkno,
              curr.patid, curr.pageno,
             'The interval between consecutive visits is > 7 days'
              as ERRMSG
              from (select distinct patid, pageno, visid, visdate
                      from VITAL
                      where visid<'3') as prev,
                   (select distinct patid, pageno, visid, visdate
                      from VITAL
                      where visid>'1') as curr
              where (input(prev.visid,2.)=input(curr.visid,2.)-1) and
                    (curr.visdate-prev.visdate>7) and
                     curr.patid=prev.patid
      union
      select 'VITAL_0032' as checkno,
              a.patid, b.pageno,
              'The patient didn''t have visit '||b.visid
              as ERRMSG
              from (select distinct patid from VITAL) as a,
                   (select distinct visid, pageno from VITAL) as b
              where b.visid||b.pageno not in (select visid||pageno 
                                                 from VITAL c where a.patid=c.patid)
      order by checkno;
   quit;

******************************************************************************/

 /* ********** Relational Check Across Rows And Tables ********** */

   proc sql;
      select  'VITAL_0041' as checkno,
              b.patid, b.pageno,
              'Computed mean of SYSBP is different from recorded one'
              as ERRMSG
              from (select patid, visid, avg(sysbp) as compmean
                      from VITAL
                      group by patid, visid) as a,
                    vitmean as b
              where a.patid=b.patid and a.visid=b.visid and
                      abs(a.compmean-b.msysbp)>=0.01;
   quit;

******************************************************************************/

 /* ****************** DATA Step Equivilent of ****************** */
 /* ********** Relational Check Across Rows And Tables ********** */

   proc sort data=VITAL;
      by patid visid;
   proc means data=VITAL noprint;
      var sysbp;
      by patid visid;
      output out=CALMEAN(drop=_type_ _freq_) mean=c_sysbp;

   data result;
      merge VITMEAN CALMEAN;
      by patid visid;
      if abs(msysbp-c_sysbp)>0.01 then do;
               ERRMSG='Computed mean of SYSBP is different from recorded one';
               checkno='VITAL_0041';
               output;
      end;
      keep checkno patid pageno ERRMSG;

      proc print data=result;
             Var checkno patid pageno ERRMSG;
   run;

 /****************************************************************************/
 /****************************************************************************/