/*-------------------------------------------------------------------*/ /* Longitudinal Data and SAS: A Programmer's Guide */ /* by Ron Cody */ /* Copyright(c) 2001 by SAS Institute Inc., Cary, NC, USA */ /* SAS Publications order # 58176 */ /* ISBN 1-58025-924-3 */ /*-------------------------------------------------------------------*/ /* */ /* This material is provided "as is" by SAS Institute Inc. There */ /* are no warranties, expressed or implied, as to merchantability or */ /* fitness for a particular purpose regarding the materials or code */ /* contained herein. The Institute is not responsible for errors */ /* in this material as it now exists or will exist, nor does the */ /* Institute provide technical support for it. */ /* */ /*-------------------------------------------------------------------*/ /* Questions or problem reports concerning this material may be */ /* addressed to the author: */ /* */ /* SAS Institute Inc. */ /* Books by Users */ /* Attn: Ron Cody */ /* SAS Campus Drive */ /* Cary, NC 27513 */ /* */ /* */ /* If you prefer, you can send email to: sasbbu@sas.com */ /* Use this for subject field: */ /* Comments for Ron Cody */ /* */ /*-------------------------------------------------------------------*/ /* Date Last Updated: January 9, 2002 */ /*-------------------------------------------------------------------*/ SAS Code and Data from the Book: Longitudinal Data and SAS Chapter 1 Program 1-1: DATA Step without a RETAIN Statement DATA WITHOUT_1; PUT "Before the INPUT statement: " _ALL_; INPUT X @@; PUT "After the INPUT statement: " _ALL_ /; DATALINES; 1 2 . 3 ; Program 1-2: DATA Step with a RETAIN Statement DATA WITH_1; RETAIN X; PUT "Before the INPUT statement: " _ALL_; INPUT X @@; PUT "After the INPUT statement: " _ALL_ /; DATALINES; 1 2 . 3 ; Program 1-3: Another Demonstration of a DATA Step that Does Not Work without a RETAIN Statement ***If there is a missing value for X, use the value from the previous observation ; ***Note: This program does NOT work as planned; DATA WITHOUT_2; PUT "Before INPUT: " _ALL_ ; INPUT X @@; IF X NE . THEN OLD_X = X; ELSE X = OLD_X; PUT "After assignment: " _ALL_ /; DATALINES; 1 2 . 3 ; Program 1-4: Adding a RETAIN Statement to Program 1-3 ***If there is a missing value for X, use the value from the previous observation; ***Note: With the added RETAIN statement, the program now works; DATA WITH_2; RETAIN OLD_X; PUT "Before INPUT: " _ALL_ ; INPUT X @@; IF X NE . THEN OLD_X = X; ELSE X = OLD_X; PUT "After assignment: " _ALL_ /; DATALINES; 1 2 . 3 ; Program 1-5: Attempting to Generate a Sequential Subject Number without Using a RETAIN Statement ***Attempting to generate a sequential SUBJECT number without using a RETAIN statement; DATA WITHOUT_3; PUT "Before the INPUT statement " _ALL_ ; INPUT X @@; SUBJECT = SUBJECT + 1; PUT "After the INPUT statement " _ALL_ /; DATALINES; 1 3 5 ; Program 1-6: Program 1-5 with a RETAIN Statement Added ***The same program with a RETAIN statement; DATA WITH_3; RETAIN SUBJECT 0; PUT "Before the INPUT statement: " _ALL_ ; INPUT X @@; SUBJECT = SUBJECT + 1; PUT "After the INPUT statement: " _ALL_ /; DATALINES; 1 3 5 ; Program 1-7: Demonstrating the SUM Statement DATA WITHOUT_4; PUT "Before the INPUT statement: " _ALL_ ; INPUT X @@; SUBJECT + 1; /* SUM statement */ PUT "After the INPUT statement: " _ALL_ /; DATALINES; 1 3 5 ; Program 1-8: Demonstrating that Variables from a SET Statement are Retained DATA ONE; INPUT X Y; DATALINES; 1 2 ; DATA TWO; IF _N_ = 1 THEN SET ONE; PUT "Before INPUT statement: " _ALL_; INPUT NEW; PUT "After INPUT statement: " _ALL_ / ; DATALINES; 3 4 5 ; Chapter 2 Program 2-1: Using the LAG Function to Compare a Value in the Present Observation to One in the Previous Observation DATA COMPARE; INPUT X; LAST_X = LAG(X); DIFF_X = X - LAST_X; DATALINES; 4 2 9 6 ; PROC PRINT DATA=COMPARE NOOBS; TITLE "Demonstration the LAG Function"; RUN; Program 2-2: Demonstrating what Happens when You Execute the LAG Function Conditionally DATA LAG2; INPUT X; IF X > 2 THEN LAG_X = LAG(X); DATALINES; 1 3 . 5 2 7 ; PROC PRINT DATA=LAG2; TITLE "Listing of LAG1 Data Set"; RUN; Program 2-3: Demonstrating the LAGn Functions DATA LAG_N; INPUT X; LAG_X = LAG(X); LAG2_X = LAG2(X); LAG3_X = LAG3(X); DATALINES; 1 2 3 4 5 ; PROC PRINT DATA=LAG_N; TITLE "Demonstrating the LAGn Family of Functions"; RUN; Program 2-4: Rewriting Program 2-1 to Make Use of the DIF Function DATA COMPARE; INPUT X; DIFF_X = DIF(X); DATALINES; 4 2 9 6 ; PROC PRINT DATA=COMPARE NOOBS; TITLE "Demonstration the DIF Function"; RUN; Chapter 3 Program 3-1: Creating a Test Data Set ***Demonstrating FIRST. and LAST. temporary variables; DATA ONE; INPUT SUBJECT SCORE; DATALINES; 1 11 2 21 3 31 1 12 4 41 1 13 2 22 4 42 4 43 ; Program 3-2: Creating the FIRST. and LAST. Temporary Variables DATA TWO; SET ONE; BY SUBJECT; FIRST = FIRST.SUBJECT; LAST = LAST.SUBJECT; RUN; PROC PRINT DATA=TWO; TITLE "Demonstrating FIRST. and LAST. Variables"; RUN; Program 3-3: FIRST. and LAST. Variables when there is More than One BY Variable DATA THREE; INFORMAT GENDER GROUP $1.; INPUT GENDER GROUP SCORE; DATALINES; M A 23 M A 24 M B 33 M B 35 M B 36 F A 41 F A 42 F A 43 F B 51 ; PROC SORT DATA=THREE; BY GENDER GROUP; RUN; DATA FOUR; SET THREE; BY GENDER GROUP; ***Create variables to demonstrate how the FIRST. and LAST. variables work with two BY variables; FIRST_GENDER = FIRST.GENDER; LAST_GENDER = LAST.GENDER; FIRST_GROUP = FIRST.GROUP; LAST_GROUP = LAST.GROUP; PROC PRINT DATA=FOUR; TITLE "Listing of Data Set FOUR"; RUN; Program 3-4: Using FIRST. and LAST. Variables to Count Observations per Subject PROC SORT DATA=ONE; BY SUBJECT; RUN; DATA COUNT; SET ONE; BY SUBJECT; IF FIRST.SUBJECT = 1 THEN NUMBER = 0; NUMBER + 1; IF LAST.SUBJECT = 1 THEN OUTPUT; KEEP SUBJECT NUMBER; RUN; PROC PRINT DATA=COUNT; TITLE "Counting Observations per Subject"; RUN; Program 3-5: Combining the Number of Observations with the Original Data DATA COMBINE; MERGE ONE COUNT; BY SUBJECT; RUN; PROC PRINT DATA=COMBINE; TITLE "Listing of Data Set COMBINE"; RUN; Chapter 4 Program 4-1: Creating a Test Data Set to Demonstrate Flags and Counters DATA LAB; INPUT PATNO VISIT_NO OUTCOME; DATALINES; 3 1 0 3 2 0 3 3 1 1 1 0 1 2 1 1 3 0 1 4 1 2 1 0 2 2 0 4 1 1 4 2 1 4 3 1 ; Program 4-2: Using a Flag Variable to "Remember" a Value from Previous Observations PROC SORT DATA=LAB; BY PATNO VISIT_NO; RUN; DATA FLAG_TEST; SET LAB; BY PATNO; RETAIN FLAG; ***If FLAG = 1 outcome was positive; ***Initialize FLAG; IF FIRST.PATNO = 1 THEN FLAG = 0; ***Turn FLAG on if outcome is positive; IF OUTCOME = 1 THEN FLAG = 1; ***Output one observation when processing the last visit for a patient; IF LAST.PATNO = 1 THEN OUTPUT; DROP OUTCOME VISIT_NO; RUN; PROC PRINT DATA=FLAG_TEST; TITLE "Listing of Data Set FLAG_TEST"; RUN; Program 4-3: Using a SUM Statement to Count the Number of Positive OUTCOMES for Each Patient ***Note: Data set lab already sorted; DATA COUNT_TEST; SET LAB; BY PATNO; ***No need to RETAIN COUNT since it is used in a SUM statement and is therefore retained automatically; ***Initialize COUNT; IF FIRST.PATNO = 1 THEN COUNT = 0; ***Increment COUNT if outcome is positive; IF OUTCOME = 1 THEN COUNT + 1; ***Output one observation when processing the last visit for a patient; IF LAST.PATNO = 1 THEN OUTPUT; DROP OUTCOME; RUN; PROC PRINT DATA=COUNT_TEST; TITLE "Listing of Data Set COUNT_TEST"; RUN; Chapter 5 Program 5-1: Using PROC MEANS to Create Means for Each TEACHER - Using a BY Statement PROC SORT DATA=TEST_SCORES; BY TEACHER; RUN; PROC MEANS DATA=TEST_SCORES NOPRINT; BY TEACHER; VAR MATH SCIENCE ENGLISH; OUTPUT OUT=TEACHER_MEANS MEAN=M_MATH M_SCIENCE M_ENGLISH; RUN; Program 5-2: Using PROC MEANS to Create Means for Each TEACHER - Using a CLASS Statement PROC MEANS DATA=TEST_SCORES NOPRINT; CLASS TEACHER; VAR MATH SCIENCE ENGLISH; OUTPUT OUT=TEACHER_MEANS MEAN=M_MATH M_SCIENCE M_ENGLISH; RUN; Program 5-3: Using PROC MEANS to Output Other Descriptive Statistics PROC MEANS DATA=TEST_SCORES NOPRINT NWAY; CLASS TEACHER; VAR MATH SCIENCE ENGLISH; OUTPUT OUT = TEACHER_MEANS (DROP = _TYPE_) MEAN = M_MATH M_SCIENCE M_ENGLISH MEDIAN = MED_MATH MED_SCIENCE MED_ENGLISH N = N_MATH N_SCIENCE N_ENGLISH; RUN; Program 5-4: Demonstrating the AUTONAME Option of PROC MEANS PROC MEANS DATA=TEST_SCORES NOPRINT NWAY; CLASS TEACHER; VAR MATH SCIENCE ENGLISH; OUTPUT OUT = TEACHER_MEANS (DROP = _TYPE_) MEAN = MEDIAN = N = / AUTONAME; RUN; Program 5-5: An alternative way of specifying descriptive statistics PROC MEANS DATA=TEST_SCORES NOPRINT NWAY; CLASS TEACHER; VAR MATH SCIENCE ENGLISH; OUTPUT OUT = TEACHER_MEANS(DROP = _TYPE_) MEAN(SCIENCE ENGLISH) = M_SCIENCE M_ENGLISH MEDIAN(MATH) = MED_MATH N = N_MATH N_SCIENCE N_ENGLISH; RUN; Program 5-6: Using an ID Variable to Include Additional Variables in the Output Data Set PROC MEANS DATA=TEST_SCORES NOPRINT NWAY; CLASS TEACHER; ID SCHOOL; VAR MATH SCIENCE ENGLISH; OUTPUT OUT=TEACHER_MEANS(DROP=_TYPE_) MEAN=M_MATH M_SCIENCE M_ENGLISH; RUN; Program 5-7: Creating a New Data Set Including the School Size DATA SCHOOL_SIZE; INPUT @1 SCHOOL $21. @23 SIZE $5.; DATALINES; FLEMING MIDDLE SCHOOL SMALL ROBERT HUNTER LARGE SAINT BARTS ACADEMY SMALL RUTGERS PREP SMALL FRANKLIN HIGH LARGE ; PROC SORT DATA=TEST_SCORES; BY SCHOOL; RUN; PROC SORT DATA=SCHOOL_SIZE; BY SCHOOL; RUN; DATA COMBINED; MERGE TEST_SCORES SCHOOL_SIZE; BY SCHOOL; RUN; PROC PRINT DATA=COMBINED; TITLE "Listing of Data Set COMBINED"; RUN; Program 5-8: Demonstrating More than One CLASS Variable PROC MEANS DATA=COMBINED NOPRINT; CLASS COUNTY SIZE; VAR MATH SCIENCE ENGLISH; OUTPUT OUT = COUNTY_BY_SIZE MEAN = M_MATH M_SCIENCE M_ENGLISH; RUN; PROC PRINT DATA=COUNTY_BY_SIZE; TITLE "Listing of Data Set COUNTY_BY_SIZE"; RUN; Program 5-9: Selecting Only Certain Breakdown of CLASS Variables DATA SELECT; SET COUNTY_BY_SIZE; WHERE _TYPE_ IN (1 3); RUN; Program 5-10: Using a TYPES Statement to Specify Breakdowns PROC MEANS DATA=COMBINED NOPRINT; CLASS COUNTY SIZE; TYPES SIZE COUNTY*SIZE; VAR MATH SCIENCE ENGLISH; OUTPUT OUT = COUNTY_SIZE MEAN = M_MATH M_SCIENCE M_ENGLISH; RUN; Program 5-11: Separating the _TYPE_ Observations Into Separate Data Sets DATA SIZE COUNTY_BY_SIZE; SET COUNTY_SIZE; IF _TYPE_ = 1 THEN OUTPUT SIZE; ELSE IF _TYPE_ = 3 THEN OUTPUT COUNTY_BY_SIZE; RUN; Program 5-12: Demonstrating the CHARTYPE Option of PROC MEANS PROC MEANS DATA=COMBINED NOPRINT CHARTYPE; CLASS COUNTY SIZE; TYPES SIZE COUNTY*SIZE; VAR MATH SCIENCE ENGLISH; OUTPUT OUT = COUNTY_SIZE MEAN = M_MATH M_SCIENCE M_ENGLISH; RUN; DATA SIZE COUNTY_BY_SIZE; SET COUNTY_SIZE; IF _TYPE_ = '01' THEN OUTPUT SIZE; ELSE IF _TYPE_ = '11' THEN OUTPUT COUNTY_BY_SIZE; RUN; Program 5-13: Using PROC MEANS to Create an Output Data Set Containing Counts PROC MEANS DATA=TEST_SCORES NOPRINT NWAY; CLASS TEACHER; VAR MATH; OUTPUT OUT = STUDENT_COUNT(DROP=DUMMY _TYPE_ RENAME=(_FREQ_ = N_STUDENTS)) MEAN = DUMMY; RUN; PROC PRINT DATA=STUDENT_COUNT; TITLE "Listing of Data Set STUDENT_COUNT"; RUN; Program 5-14: Using PROC FREQ to Create an Output Data Set Containing Counts PROC FREQ DATA=TEST_SCORES NOPRINT; TABLES TEACHER / OUT = STUDENT_COUNT(DROP = PERCENT RENAME=(COUNT = N_STUDENTS)); RUN; PROC PRINT DATA=STUDENT_COUNT; TITLE "Listing of Data Set STUDENT_COUNT"; RUN; Program 5-15: Using PROC FREQ to Output Counts for a Two-way Table PROC FREQ DATA=COMBINED NOPRINT; TABLES COUNTY*SIZE / OUT = COUNTY_SIZE(DROP = PERCENT); RUN; PROC PRINT DATA=COUNTY_SIZE; TITLE "Listing of Data Set COUNTY_SIZE"; RUN; PROC FREQ DATA=COMBINED NOPRINT; TABLES COUNTY / OUT = N_COUNTY(DROP = PERCENT); TABLES SIZE / OUT = N_SIZE(DROP = PERCENT); RUN; Chapter 6 Program 6-1: Program to Create Data Set SQL_TEST DATA SQL_TEST; INPUT SUBJECT $ DAY $ X Y Z; DATALINES; 1 MONDAY 10 20 30 1 FRIDAY 11 21 31 2 THURSDAY 30 . 34 3 MONDAY 11 22 33 3 TUESDAY 13 14 15 3 FRIDAY 14 19 21 4 FRIDAY 40 50 60 4 SATURDAY 41 42 . 5 TUESDAY 5 6 7 5 FRIDAY 6 7 8 5 SUNDAY 8 9 10 7 MONDAY 10 13 14 7 THURSDAY 14 . 22 ; PROC PRINT DATA=SQL_TEST; TITLE "Listing of Data Set SQL_TEST"; ID SUBJECT; RUN; Program 6-2: Using PROC SQL to List the Observations from a SAS Data Set PROC SQL; TITLE "Output from Program 6-2"; SELECT SUBJECT, X, Y, Z FROM SQL_TEST; QUIT; Program 6-3: Adding a GROUP BY Clause and a COUNT Function to the SQL Query PROC SQL; TITLE "Output from Program 6-3"; SELECT SUBJECT, X, Y, Z, COUNT(SUBJECT) AS NUMBER FROM SQL_TEST GROUP BY SUBJECT; QUIT; Program 6-4: Adding a HAVING Clause to Subset a Data Set PROC SQL; CREATE TABLE TWO AS SELECT * FROM SQL_TEST GROUP BY SUBJECT HAVING COUNT(SUBJECT) = 2; QUIT; Program 6-5: Using PROC SQL to Create a Macro Variable Containing a List of Subject Numbers for Subjects with Exactly Two Observations PROC SQL NOPRINT; SELECT QUOTE(SUBJECT) INTO :SUBJ_LIST SEPARATED BY " " FROM SQL_TEST GROUP BY SUBJECT HAVING COUNT(SUBJECT) = 2; QUIT; Program 6-6: Demonstrating How to Use the Previously Generated Macro Variable PROC PRINT DATA=SQL_TEST; WHERE SUBJECT IN (&SUBJ_LIST); TITLE "Subset of Subjects with Exactly Two Observations"; ID SUBJECT; RUN; Program 6-7: Using the AVG Function to Compute Subject Means PROC SQL; SELECT SUBJECT, X, AVG(X) AS AVE_X FROM SQL_TEST GROUP BY SUBJECT; QUIT; Program 6-8: Computing a Percentage Change for Each Subject PROC SQL; SELECT SUBJECT, X, AVG(X) AS AVE_X, 100 * (X - CALCULATED AVE_X)/X AS PERCENT_X FROM SQL_TEST GROUP BY SUBJECT; QUIT; Chapter 7 Program 7-1: Program to Create the DIAGNOSE Data Set DATA DIAGNOSE; INPUT @1 PATNO 2. @3 DATE MMDDYY10. @14 DX1 - DX3; FORMAT DATE MMDDYY10.; DATALINES; 1 10/21/1999 1 2 . 2 10/29/1999 2 . . 3 11/11/2000 3 . . 4 01/01/2000 1 2 3 5 02/02/2000 3 2 . 6 03/15/2000 4 . . ; Program 7-2: Creating Multiple Observations from a Single Observation without Using an Array DATA NEW_DX; SET DIAGNOSE (DROP = DATE); DX = DX1; IF DX NE . THEN OUTPUT; DX = DX2; IF DX NE . THEN OUTPUT; DX = DX3; IF DX NE . THEN OUTPUT; KEEP PATNO DX; RUN; Program 7-3: Creating Multiple Observations from a Single Observation Using an Array DATA NEW_DX; SET DIAGNOSE (DROP = DATE); ARRAY DXARRAY[3] DX1 - DX3; DO I = 1 TO 3; DX = DXARRAY[I]; IF DX NE . THEN OUTPUT; END; KEEP PATNO DX; RUN; Program 7-4: Program to Create Sample Data Set ONEPER DATA ONEPER; INPUT ID $ S1-S3; DATALINES; 01 3 4 5 02 7 8 9 03 6 5 4 ; Program 7-5: Creating Multiple Observations from a Single Observation Using an Array DATA MANYPER; SET ONEPER; ARRAY S[3]; DO TIME = 1 TO 3; SCORE = S[TIME]; OUTPUT; END; KEEP ID TIME SCORE; RUN; Program 7-6: Using a Multidimensional Array to Restructure a Data Set DATA WT_MANY; SET WT_ONE; ARRAY WTS [2,3] WT1-WT6; DO COND = 1 TO 2; DO TIME = 1 TO 3; WEIGHT = WTS[COND,TIME]; OUTPUT; END; END; DROP WT1-WT6; RUN; Program 7-7: Creating a Data Set with One Observation per Subject from a Data Set with Multiple Observations per Subject. ***Caution, this program will not work if there are any missing time values.; PROC SORT DATA=MANYPER; BY ID TIME; RUN; DATA ONEPER; ARRAY S[3] S1-S3; RETAIN S1-S3; SET MANYPER; BY ID; S[TIME] = SCORE; IF LAST.ID = 1 THEN OUTPUT; KEEP ID S1-S3; RUN; Program 7-8: Creating a Data Set with One Observation per Subject from a Data Set with Multiple Observations per Subject (corrected version) PROC SORT DATA=MANYPER2; BY ID TIME; RUN; DATA ONEPER; ARRAY S[3] S1-S3; RETAIN S1-S3; SET MANYPER2; BY ID; IF FIRST.ID = 1 THEN DO I = 1 TO 3; S[I] = .; END; S[TIME] = SCORE; IF LAST.ID = 1 THEN OUTPUT; KEEP ID S1-S3; RUN; Program 7-9: An Alternative Program to Program 7-8 that Does Not Need to Initialize Variables PROC SORT DATA=MANYPER2; BY ID TIME; RUN; DATA ONEPER; ARRAY S[3] S1-S3; DO I = 1 TO 3 UNTIL (LAST.ID); SET MANYPER2; BY ID; S[TIME] = SCORE; IF LAST.ID = 1 THEN OUTPUT; END; KEEP ID S1-S3; RUN; Program 7-10: Creating a Data Set with One Observation per Subject from a Data Set with Multiple Observations per Subject Using a Multidimensional Array PROC SORT DATA=WT_MANY; BY ID COND TIME; RUN; DATA WT_ONE; ARRAY WT[2,3] WT1-WT6; RETAIN WT1-WT6; SET WT_MANY; BY ID; IF FIRST.ID = 1 THEN DO I = 1 TO 2; DO J = 1 TO 3; WT[I,J] = .; END; END; WT[COND,TIME] = WEIGHT; IF LAST.ID = 1 THEN OUTPUT; KEEP ID WT1-WT6; RUN; Chapter 8 Program 8-1: Going from One Observation to Several Using PROC TRANSPOSE PROC SORT DATA=DIAGNOSE; BY PATNO; RUN; PROC TRANSPOSE DATA=DIAGNOSE OUT=DX_COUNT; BY PATNO; VAR DX1-DX3; RUN; Program 8-2: Modifying the PROC TRANSPOSE Statement to Drop _NAME_ and Rename COL1 PROC TRANSPOSE DATA=DIAGNOSE OUT=DX_COUNT(RENAME=(COL1=DX) DROP=_NAME_); BY PATNO; VAR DX1-DX3; RUN; Program 8-3: Eliminating the Observations with Missing Values for DX PROC TRANSPOSE DATA=DIAGNOSE OUT=DX_COUNT(RENAME=(COL1=DX) WHERE=(DX NE .) DROP=_NAME_); BY PATNO; VAR DX1-DX3; RUN; Program 8-4: Program to Create the Data Set ONEPER and Transform it into a Data Set with Several Observations per Subject DATA ONEPER; INFORMAT ID $2.; INPUT ID S1 S2 S3; DATALINES; 01 3 4 5 02 7 8 9 03 6 5 4 RUN; PROC SORT DATA=ONEPER; BY ID; RUN; PROC TRANSPOSE DATA=ONEPER OUT=TEMP(RENAME=(COL1=SCORE)); BY ID; RUN; Program 8-5: Converting the Values of _NAME_ to TIME Values DATA MANYPER; SET TEMP; TIME = INPUT(COMPRESS(_NAME_,"S"),8.); DROP _NAME_; RUN; Program 8-6: Going from One Observation per Subject to Many Observations per Subject Using PROC TRANSPOSE DATA WT_ONE; INFORMAT ID $2.; INPUT ID WT1-WT6; DATALINES; 01 155 158 162 149 148 147 02 110 112 114 107 108 109 RUN; PROC SORT DATA=WT_ONE; BY ID; RUN; PROC TRANSPOSE DATA=WT_ONE OUT=WT_COND1(RENAME=(COL1=WEIGHT)); BY ID; VAR WT1-WT6; RUN; Program 8-7: Adding the Final DATA Step to Create the COND and TIME Variables DATA WT_MANY; SET WT_COND1; TIME = INPUT(COMPRESS(_NAME_,"WT"),8.); IF TIME LE 3 THEN COND = 1; ELSE DO; COND = 2; TIME = TIME - 3; END; DROP _NAME_; RUN; Program 8-8: Program to Create Data Set MANYPER DATA MANYPER; INFORMAT ID $2.; INPUT ID $ TIME SCORE; DATALINES; 01 1 3 01 2 4 01 3 5 02 1 7 02 2 8 02 3 9 03 1 6 03 2 5 03 3 4 RUN; Program 8-9: Program to Transform a Data Set with Several Observations per Subject into a Data Set with One Observation per Subject ***Note: This program ONLY works correctly if there are no missing TIME values; PROC SORT DATA=MANYPER; BY ID TIME; RUN; PROC TRANSPOSE DATA=MANYPER OUT=ONEPER(DROP=_NAME_) PREFIX=S; BY ID; VAR SCORE; RUN; Program 8-10: Program to Create Data Set MANYPER2 (which is missing TIME=2 for ID=02) DATA MANYPER2; INFORMAT ID $2.; INPUT ID TIME SCORE; DATALINES; 01 1 3 01 2 4 01 3 5 02 1 7 02 3 9 03 1 6 03 2 5 03 3 4 RUN; Program 8-11: Fixing the Program by Using an ID Statement PROC TRANSPOSE DATA=MANYPER2 OUT=ONEPER(DROP=_NAME_) PREFIX=S; BY ID; ID TIME; VAR SCORE; RUN; Chapter 9 Program 9-1: Creating a Data Set Containing the Last Visit for Each Patient DATA LAST; SET CLINICAL; BY PATIENT; IF LAST.PATIENT = 1; RUN; Program 9-2: Creating Two Data Sets, One for the First Visit and the Other for the Last Visit for Each Patient DATA FIRST LAST; SET CLINICAL; BY PATIENT; IF FIRST.PATIENT = 1 THEN OUTPUT FIRST; IF LAST.PATIENT = 1 THEN OUTPUT LAST; RUN; Program 9-3: Computing Differences Between the First and Last Visit (using retained variables) DATA FIRST_LAST; SET CLINICAL(DROP=DX1-DX3 VITAMINS); BY PATIENT; ***Note: Data set already sorted; ***Remove patients with only one visit; IF FIRST.PATIENT = 1 AND LAST.PATIENT = 1 THEN DELETE; RETAIN FIRST_HR FIRST_SBP FIRST_DBP; IF FIRST.PATIENT = 1 THEN DO; FIRST_HR = HR; FIRST_SBP = SBP; FIRST_DBP = DBP; END; IF LAST.PATIENT = 1 THEN DO; DIFF_HR = HR - FIRST_HR; DIFF_SBP = SBP - FIRST_SBP; DIFF_DBP = DBP - FIRST_DBP; OUTPUT; END; RUN; PROC PRINT DATA=FIRST_LAST; TITLE "List of Data Set FIRST_LAST"; ID PATIENT; RUN; Program 9-4: Computing the Difference Between the First and Last Visit (using the LAG function) DATA FIRST_LAST; SET CLINICAL(DROP=DX1-DX3 VITAMINS); BY PATIENT; ***Note: Data set already sorted; ***Remove patients with only one visit; IF FIRST.PATIENT = 1 AND LAST.PATIENT = 1 THEN DELETE; IF FIRST.PATIENT = 1 OR LAST.PATIENT = 1 THEN DO; DIFF_HR = HR - LAG(HR); DIFF_SBP = SBP - LAG(SBP); DIFF_DBP = DBP - LAG(DBP); END; IF LAST.PATIENT = 1 THEN OUTPUT; RUN; Program 9-5: Computing Differences Between Every Visit (using the LAG function) DATA DIFFERENCE; SET CLINICAL(DROP=DX1-DX3 VITAMINS); BY PATIENT; ***Remove patient with only one visit; IF FIRST.PATIENT = 1 AND LAST.PATIENT = 1 THEN DELETE; DIFF_HR = HR - LAG(HR); DIFF_SBP = SBP - LAG(SBP); DIFF_DBP = DBP - LAG(DBP); IF NOT (FIRST.PATIENT = 1) THEN OUTPUT; RUN; Program 9-6: Using PROC MEANS to Count the Number of Visits for Each Patient DATA COUNT; SET CLINICAL(KEEP=PATIENT); BY PATIENT; IF FIRST.PATIENT = 1 THEN N_VISITS = 0; N_VISITS + 1; IF LAST.PATIENT = 1 THEN OUTPUT; RUN; Program 9-7: Merging the Number of Visits with the Original Data Set DATA NEW_CLINICAL; MERGE CLINICAL COUNT; BY PATIENT; RUN; Program 9-8: Using PROC FREQ to Count the Number of Visits for Each Patient PROC FREQ DATA=CLINICAL NOPRINT; TABLES PATIENT / OUT = COUNT(KEEP=PATIENT COUNT RENAME=(COUNT = N_VISITS)); RUN; Program 9-9: Using PROC MEANS to Count the Number of Visits for Each Patient PROC MEANS DATA=CLINICAL NOPRINT NWAY; CLASS PATIENT; VAR HR; OUTPUT OUT = COUNT(DROP=_TYPE_ N_HR RENAME=(_FREQ_ = N_VISITS)) N = N_HR; RUN; Program 9-10: Using PROC SQL to Count the Number of Visits for Each Patient PROC SQL; CREATE TABLE COUNT_VISITS AS SELECT PATIENT, COUNT(PATIENT) AS N_VISITS FROM CLINICAL GROUP BY PATIENT; QUIT; Program 9-11: Using a DATA Step to Select Patients with Exactly Two Visits DATA TWO; SET CLINICAL(KEEP=PATIENT); BY PATIENT; IF FIRST.PATIENT = 1 THEN N_VISITS = 0; N_VISITS + 1; IF LAST.PATIENT = 1 AND N_VISITS = 2 THEN OUTPUT; RUN; DATA TWO_VISITS; MERGE CLINICAL TWO(IN = IN_COUNT); BY PATIENT; IF IN_COUNT = 1; RUN; Program 9-12: Using PROC FREQ to Select All Patients with Two Visits PROC FREQ DATA=CLINICAL NOPRINT; TABLES PATIENT / OUT = TWO(KEEP=PATIENT COUNT RENAME=(COUNT = N_VISITS) WHERE=(N_VISITS = 2)); RUN; Program 9-13: Creating a Data Set of Patient Numbers for Patients with Exactly Two Visits, One Record per Patient PROC SQL; CREATE TABLE TWO AS SELECT PATIENT, COUNT(PATIENT) AS N_VISITS FROM CLINICAL GROUP BY PATIENT HAVING N_VISITS = 2; QUIT; Program 9-14: Creating a Data Set of All Patient Information for Patients with Exactly Two Visits, All Records per Patient PROC SQL; CREATE TABLE TWO_VISITS AS SELECT * FROM CLINICAL GROUP BY PATIENT HAVING COUNT(PATIENT) = 2; QUIT; Program 9-15: Using PROC SQL to Create a Macro Variable Containing a List of Patient Numbers for Patients with Exactly Two Visits PROC SQL NOPRINT; SELECT QUOTE(PATIENT) INTO :PT_LIST SEPARATED BY " " FROM CLINICAL GROUP BY PATIENT HAVING COUNT(PATIENT) = 2; QUIT; Program 9-16: Demonstrating How to Use the Previously Generated Macro Variable PROC PRINT DATA=CLINICAL; WHERE PATIENT IN (&PT_LIST); TITLE "Subset of Patients with Exactly Two Visits"; RUN; Program 9-17: Computing Summary Statistics for Each Patient (using PROC MEANS) PROC MEANS DATA=CLINICAL NOPRINT NWAY; CLASS PATIENT; VAR HR SBP DBP; OUTPUT OUT = PT_MEANS(DROP=_TYPE_ RENAME=(_FREQ_ = N_VISITS)) N = N_HR N_SBP N_DBP MEAN = MEAN_HR MEAN_SBP MEAN_DBP; RUN; PROC PRINT DATA=PT_MEANS; TITLE "Listing of Data Set PT_MEANS"; ID PATIENT; RUN; Program 9-18: Using PROC SQL to Compute Summary Statistics for Each Patient PROC SQL; CREATE TABLE PT_MEANS AS SELECT PATIENT, COUNT(HR) AS N_HR, COUNT(SBP) AS N_SBP, COUNT(DBP) AS N_DBP, COUNT(PATIENT) AS N_VISITS, AVG(HR) AS MEAN_HR, AVG(SBP) AS MEAN_SBP, AVG(DBP) AS MEAN_DBP FROM CLINICAL GROUP BY PATIENT; QUIT; Program 9-19: Adding the GENDER and STATE to Each Visit (after the first) DATA CLIN_EVERY; RETAIN FIRST_GENDER FIRST_STATE; SET CLIN_FIRST; BY PATIENT; IF FIRST.PATIENT = 1 THEN DO; FIRST_GENDER = GENDER; FIRST_STATE = STATE; END; ELSE DO; GENDER = FIRST_GENDER; STATE = FIRST_STATE; END; DROP FIRST_:; RUN; Program 9-20: Creating a Test Data Set of Doctor Visits DATA DOC; INPUT @1 PATIENT $3. @5 VISIT MMDDYY10. @16 DOCTOR $3.; FORMAT VISIT MMDDYY10.; DATALINES; 001 10/21/1998 ABC 001 10/29/1998 XYZ 001 12/12/1998 QED 002 01/01/1998 ABC 003 02/13/1998 QED 003 04/15/1998 MAD 005 05/06/1998 XYZ 005 05/08/1998 QED RUN; Program 9-21: Program to Look Ahead Using Multiple SET Statements PROC SORT DATA=DOC; BY PATIENT VISIT; RUN; DATA FAILURES; SET DOC; BY PATIENT; SET DOC (FIRSTOBS = 2 KEEP = VISIT RENAME = (VISIT = NEXT_VISIT)); IF LAST.PATIENT = 0 AND (NEXT_VISIT - VISIT) LT 30 THEN OUTPUT; KEEP PATIENT VISIT NEXT_VISIT DOCTOR; RUN; Program 9-22: Using a Flag to Remember Information from Previous Observations DATA VITAMIN_EVER; SET CLINICAL(KEEP = PATIENT VITAMINS); BY PATIENT; RETAIN FLAG; IF FIRST.PATIENT = 1 THEN FLAG = 0; IF VITAMINS = '1' THEN FLAG = 1; IF LAST.PATIENT = 1 THEN OUTPUT; DROP VITAMINS; RUN; Program 9-23: Using PROC FREQ to Ascertain Vitamin Use PROC FREQ DATA=CLINICAL NOPRINT; TABLES PATIENT*VITAMINS / OUT=SUMMARY(DROP = PERCENT); RUN; DATA VITAMIN_EVER; SET SUMMARY; BY PATIENT; IF VITAMINS = '1' AND COUNT GT 0 THEN USE_VITAMINS = 1; ELSE USE_VITAMINS = 0; IF LAST.PATIENT = 1 THEN OUTPUT; KEEP PATIENT USE_VITAMINS; RUN; Program 9-24: Using a SUM statement and FIRST. and LAST. Logic to Count the Number of Routine Visits for Each Patient DATA ROUTINE; SET CLINICAL(KEEP = PATIENT DX1-DX3); BY PATIENT; IF FIRST.PATIENT = 1 THEN N_ROUTINE = 0; IF DX1= '1' OR DX2 = '1' OR DX3 = '1' THEN N_ROUTINE + 1; IF LAST.PATIENT = 1 THEN OUTPUT; KEEP PATIENT N_ROUTINE; RUN; Chapter 10 Program 10-1: Using PROC MEANS and the INTCK Function to compute Weekly Averages DATA WEEK_NUM; SET OZONE; WEEK_NUMBER = INTCK('WEEK','01MAY1995'D,DATE) + 1; RUN; PROC MEANS DATA=WEEK_NUM NOPRINT NWAY; CLASS WEEK_NUMBER; VAR SPORES POLLEN OZONE MAX_TEMP; OUTPUT OUT=WEEKLY_MEANS MEAN=M_SPORES M_POLLEN M_OZONE M_MAX_TEMP; RUN; Program 10-2: Program to Plot Weekly Pollen Means SYMBOL VALUE=DOT INTERPOL=JOIN; PROC GPLOT DATA=WEEKLY_MEANS; TITLE "Mean Pollen Levels by Week"; PLOT M_POLLEN * WEEK_NUMBER; RUN; QUIT; Program 10-3: Creating a Test Data Set to Demonstrate Another Way to Group Values DATA TEST; INPUT X @@; DATALINES; 1 6 3 9 3 9 7 1 9 3 8 4 10 3 9 3 ; Program 10-4: Using the MOD Function to Group Data Values DATA GROUPING; SET TEST; GROUP + (MOD(_N_ ,4) EQ 1); RUN; Program 10-5: Computing a Moving Average for Ozone Levels (using groups of 5) DATA MOVING; SET OZONE(KEEP=DATE OZONE); OZONE_LAG24 = LAG(OZONE); OZONE_LAG48 = LAG2(OZONE); OZONE_LAG72 = LAG3(OZONE); OZONE_LAG96 = LAG4(OZONE); IF _N_ GE 5 THEN OZONE_AVE = MEAN(OF OZONE OZONE_:); RUN; SYMBOL1 VALUE = POINT INTERPOL=JOIN L=1 COLOR=BLACK; SYMBOL2 VALUE = POINT INTERPOL=JOIN L=3 COLOR=BLACK; PROC GPLOT DATA=MOVING; TITLE "Raw and Moving Average Data for Pollen in the Month of May"; WHERE DATE BETWEEN '01MAY1995'D AND '31MAY1995'D; PLOT OZONE*DATE OZONE_AVE*DATE / OVERLAY; RUN; QUIT; Chapter 11 Program 11-1: Using PROC MEANS to Compute Counts and Averages for Books Borrowed PROC MEANS DATA=LIBRARY NOPRINT CHARTYPE; CLASS ID DATE LIBRARY; TYPES ID*DATE ID*DATE*LIBRARY; VAR DATE; OUTPUT OUT=BOOKS(DROP=_FREQ_) N = N_OF_BOOKS; RUN; Program 11-2: Using PROC CHART to Display Book Frequencies by Library PROC CHART DATA = BOOKS; WHERE _TYPE_ = "111"; TITLE "Book Frequencies by Library"; VBAR N_OF_BOOKS / GROUP = LIBRARY MIDPOINTS = 0 TO 5 BY 1; RUN; Program 11-3: Computing the Book Frequency by Day of the Week and Library PROC MEANS DATA=LIBRARY NOPRINT NWAY; CLASS ID DAY LIBRARY; VAR DATE; OUTPUT OUT=BOOKS_BY_DAY(DROP = _TYPE_ _FREQ_) N = N_OF_BOOKS; RUN; PROC CHART DATA = BOOKS_BY_DAY; TITLE "Book Frequencies by Day of the Week and Library"; VBAR DAY / SUMVAR = N_OF_BOOKS TYPE = MEAN GROUP = LIBRARY DISCRETE; RUN; Program 11-4: Using PROC TABULATE to Display Book Frequencies by LC Category, Age Group and Overall PROC TABULATE DATA=LIBRARY; TITLE "Frequencies of books by LC category and Age Group"; CLASS CATEGORY AGE_GROUP; TABLES CATEGORY, (AGE_GROUP ALL)*N=" " / RTS=20; LABEL AGE_GROUP = 'Age Group' CATEGORY = 'LC Category'; KEYLABEL ALL = 'Combined'; RUN; Chapter 12 Program 12-1: Creating a Macro to List the Contents of a SAS Data Set *----------------------------------------------------------------* | Program Name: PRINT.SAS in C:\LONG\MACROS | | Purpose: Macro which prints out a complete data set or the | | first N observations. The data set name and | | (optionally) the number of observations are included | | in the title. | | Argument: %PRINT(DSN,OBS=) | | Examples: %PRINT(TEST,OBS=5) | | %PRINT(TEST) | *----------------------------------------------------------------*; %MACRO PRINT(DSN,OBS=MAX); PROC PRINT DATA=&DSN (OBS=&OBS); TITLE "Listing of data set %UPCASE(&DSN)"; %IF &OBS NE MAX %THEN TITLE2 "First &OBS Observations";; RUN; %MEND PRINT; Program 12-2: Creating a Macro to Compute the Differences Between Successive Observations in a SAS Data Set *-------------------------------------------------------------* | Macro Name: DIFF_EVERY_OBS.SAS in C:\LONG\MACROS | | Purpose: Creates a new data set with all the variables in | | the original data set plus the difference between | | the current value and the previous value for all | | variables in the VARLIST. | Arguments: IN_DSN = Input data set name | | OUT_DSN = Output data set name | | VARLIST = List of variables for differences | | Example: %DIFF_EVERY_OBS(CLINICAL,NEW,HR SBP DBP) | *-------------------------------------------------------------*; %MACRO DIFF_EVERY_OBS(IN_DSN,OUT_DSN,VARLIST); ***Create a list of variable names to hold the differences add a D_ to the beginning of each variable name; %LET TEMP = %STR( &VARLIST); %LET DLIST = %SYSFUNC(TRANWRD(&TEMP,%STR( ),%STR( D_))); DATA &OUT_DSN; SET &IN_DSN; ARRAY VARS[*] &VARLIST; ARRAY DIFF[*] &DLIST; DO I = 1 TO DIM(VARS); DIFF[I] = DIF(VARS[I]); END; DROP I; %MEND DIFF_EVERY_OBS; Program 12-3: Creating a Small Test Data Set and Calling the DIFF_EVERY_OBS Macro DATA TEST; INPUT X Y Z; DATALINES; 1 3 5 2 1 8 5 4 3 9 9 9 ; %DIFF_EVERY_OBS(TEST,OUT,X Y Z); %PRINT(OUT); Program 12-4: Creating a Macro to Compute the Difference Between the First and Last Observation for Each Subject in a SAS Data Set *----------------------------------------------------------------* | Macro Name: DIFF_FIRST_LAST.SAS in C:\LONG\MACROS | | Purpose: Using an input data set, creates a new data set | | containing the difference between the first and last | | observation for each value of an ID variable, for all | | the variables listed in the VARLIST. The names for | | the difference variables will be the names in the | | VARLIST with a "D_" add to the beginning of each name | | Arguments: IN_DSN = Input data set name | | OUT_DSN = Output data set name | | ID_VAR = ID variable | | VARLIST = List of variables for differences | | Example: %DIFF_FIRST_LAST(CLINICAL,NEW,PATIENT,HR SBP DBP) | *----------------------------------------------------------------*; %MACRO DIFF_FIRST_LAST(IN_DSN,OUT_DSN,ID_VAR,VARLIST); ***Create a list of variable names to hold the differences add a D_ to the beginning of each variable name; %LET TEMP = %STR( &VARLIST); %LET DLIST = %SYSFUNC(TRANWRD(&TEMP,%STR( ),%STR( D_))); PROC SORT DATA=&IN_DSN OUT=TEMP; BY &ID_VAR; RUN; DATA &OUT_DSN; SET &IN_DSN; BY &ID_VAR; ARRAY VARS[*] &VARLIST; ARRAY DIFF[*] &DLIST; IF FIRST.&ID_VAR = 1 OR LAST.&ID_VAR = 1 THEN DO I = 1 TO DIM(VARS); DIFF[I] = DIF(VARS[I]); END; IF LAST.&ID_VAR = 1 THEN OUTPUT; DROP I; RUN; PROC DATASETS LIBRARY=WORK; DELETE TEMP; RUN; %MEND DIFF_FIRST_LAST; Program 12-5: Creating a Macro to Compute Moving Averages for Every "N" Observations *--------------------------------------------------------------* | Macro Name: MOVING_AVE in C:\LONG\MACROS | | Purpose: Computes a moving average based on "N" observations | | Arguments: IN_DSN = Data set name | | OUT_DSN = Output data set name | | IN_VAR = Variable to compute average | | OUT_VAR = Variable to hold the moving average | | N = Number of obs for the average | | Example: %MOVING(CLINICAL,TEMP,HR,AVE_HR,5) | *--------------------------------------------------------------*; %MACRO MOVING_AVE(IN_DSN,OUT_DSN,IN_VAR,OUT_VAR,N); DATA &OUT_DSN; SET &IN_DSN; ***COMPUTE THE LAGS; _X1 = &IN_VAR; %DO I = 1 %TO &N; %LET NUM = %EVAL(&I + 1); _X&NUM = LAG&I(&IN_VAR); %END; ***If the observation number is greater than or equal to the number of values needed for the moving average, output; IF _N_ GE &N THEN DO; &OUT_VAR = MEAN (OF _X1 - _X&N); OUTPUT; END; DROP _X: ; %MEND MOVING_AVE; Program 12-6: Creating a Small Test Data Set and Calling the MOVING Macro DATA TESTAVE; INPUT X @@; DATALINES; 1 3 5 9 9 12 15 20 ; %MOVING_AVE(TESTAVE,TEMP,X,AVE_X,3); %PRINT(TEMP); Program 12-7: Creating a Macro a Compute Cell Means and Counts *--------------------------------------------------------------* | Macro Name: CELLMEANS in C:\LONG\MACROS | | Purpose: Computes means (and n's) for a list of variables, | | given a list of CLASS variables | | Arguments: IN_DSN = Data set name | | OUT_DSN = Output data set name | | CLASS = List of CLASS variables | | VARLIST = List of variables on which to compute | | means and counts | | Example: %CELLMEANS(TESTIT,TEMP,GENDER RACE,X Y) | *--------------------------------------------------------------*; %MACRO CELLMEANS(IN_DSN, OUT_DSN, CLASS, VARLIST); PROC MEANS DATA=&IN_DSN NWAY NOPRINT; CLASS &CLASS; VAR &VARLIST; OUTPUT OUT=&OUT_DSN(DROP = _TYPE_ _FREQ_) MEAN= N=/ AUTONAME; RUN; %MEND CELLMEANS; Program 12-8: Creating a Small Test Data Set and Calling the CELLMEANS Macro DATA TESTIT; INPUT RACE $ GENDER $ X Y Z; DATALINES; W M 3 4 5 W F 4 5 6 W M 4 5 6 B M 6 5 4 B M 8 7 7 B F 5 4 4 B F 4 2 3 W F 5 4 4 ; %CELLMEANS(TESTIT, TEMP, RACE GENDER, X Y); %PRINT(TEMP); Program 12-9: Creating a Macro to Count the Number of Observations per Subject *--------------------------------------------------------------* | Macro Name: COUNT_OBS in C:\LONG\MACROS | | Purpose: Counts the number of observations per subject in a | | longitudinal data set | | Arguments: IN_DSN = Data set name | | OUT_DSN = Output data set name | | IDVAR = ID Variable | | COUNTVAR = Variable to hold the count | | Example: %COUNT(CLINICAL,OUTCLIN, PATIENT, N_VISITS) | *--------------------------------------------------------------*; %MACRO COUNT_OBS(IN_DSN, OUT_DSN, IDVAR, COUNTVAR); PROC SQL; CREATE TABLE &OUT_DSN AS SELECT * , COUNT(&IDVAR) AS &COUNTVAR FROM &IN_DSN GROUP BY &IDVAR; QUIT; %MEND COUNT_OBS; Appendix Program Appendix - 1: Program to Create the TEST_SCORES Data Set DATA TEST_SCORES; INPUT COUNTY : $9. SCHOOL & $21. TEACHER : $8. MATH SCIENCE ENGLISH; DATALINES; HUNTERDON FLEMING MIDDLE SCHOOL SMITH 92 95 88 HUNTERDON FLEMING MIDDLE SCHOOL SMITH 94 89 92 HUNTERDON FLEMING MIDDLE SCHOOL SMITH . 82 84 HUNTERDON FLEMING MIDDLE SCHOOL SMITH . . 68 HUNTERDON FLEMING MIDDLE SCHOOL RIVERA 82 89 72 HUNTERDON FLEMING MIDDLE SCHOOL RIVERA 97 94 92 HUNTERDON FLEMING MIDDLE SCHOOL RIVERA . 88 . HUNTERDON ROBERT HUNTER GREGORY 80 82 94 HUNTERDON ROBERT HUNTER GREGORY 82 84 82 HUNTERDON ROBERT HUNTER WRIGHT 60 70 80 HUNTERDON ROBERT HUNTER WRIGHT 62 72 83 MIDDLESEX SAINT BARTS ACADEMY JONES 72 78 77 MIDDLESEX SAINT BARTS ACADEMY JONES 83 83 92 MIDDLESEX SAINT BARTS ACADEMY JONES 91 78 81 MIDDLESEX RUTGERS PREP MAROTTO . 99 . MIDDLESEX RUTGERS PREP MAROTTO 96 98 98 MIDDLESEX RUTGERS PREP MAROTTO 83 88 88 MIDDLESEX RUTGERS PREP MAROTTO 84 86 85 MIDDLESEX RUTGERS PREP MAROTTO 92 97 93 MIDDLESEX RUTGERS PREP FRIEDMAN 99 98 90 MIDDLESEX RUTGERS PREP FRIEDMAN 96 95 90 MIDDLESEX RUTGERS PREP FRIEDMAN 84 85 84 MIDDLESEX RUTGERS PREP PATERSON 68 72 66 MIDDLESEX RUTGERS PREP PATERSON 72 74 68 MIDDLESEX FRANKLIN HIGH PETERS 80 80 92 MIDDLESEX FRANKLIN HIGH PETERS 83 85 88 ; Program Appendix - 2: Program to Create the CLINICAL Data Set PROC FORMAT; VALUE $DXCODE '1' = 'Routine Visit' '2' = 'Cold' '3' = 'Flu' '4' = 'Ear Infection' '5' = 'Heart Problem' '6' = 'Abdominal Pain' '7' = 'Fracture' '8' = 'Breathing Problem' '9' = 'Laceration'; VALUE $YESNO '0' = 'No' '1' = 'Yes'; RUN; DATA CLINICAL; INPUT @1 PATIENT $3. @4 VISIT_DATE MMDDYY8. @12 GENDER $1. @13 STATE $2. @15 HR 3. @18 SBP 3. @21 DBP 3. @24 (DX1-DX3) ($1.) @27 VITAMINS $1.; FORMAT VISIT_DATE DATE9. DX1-DX3 $DXCODE. VITAMINS $YESNO.; LABEL PATIENT = "Patient Number" VISIT_DATE = "Visit Date" HR = "Heart Rate" SBP = "Systolic Blood Pressure" DBP = "Diastolic Blood Pressure" VITAMINS = "Is PT Taking Vitamins?"; DATALINES; 00110211998MNJ 68130 801 0 00111111998MNJ 66132 782 0 00101051999MNJ 70140 8824 0 00205061999FNJ 781901004 1 00303042000FNJ 58108 662 1 00305122000FNJ 60110 682 1 00410301999MNY 8820011079 0 00412121999MNY 1021 1 00408082000MNY 90180 982 1 00505052000FNY 48110 6658 1 00608082000FNY 861841021 1 00610102000FNY 86 1001 0 00712122000FNY 805681 ; PROC SORT DATA=CLINICAL; BY PATIENT VISIT_DATE; RUN; Program Appendix - 3: Program to create the CLIN_FIRST_ONLY Data Set PROC FORMAT; VALUE $DXCODE '1' = 'Routine Visit' '2' = 'Cold' '3' = 'Flu' '4' = 'Ear Infection' '5' = 'Heart Problem' '6' = 'Abdominal Pain' '7' = 'Fracture' '8' = 'Breathing Problem' '9' = 'Laceration'; VALUE $YESNO '0' = 'No' '1' = 'Yes'; RUN; DATA CLIN_FIRST; INPUT @1 PATIENT $3. @4 VISIT_DATE MMDDYY8. @12 GENDER $1. @13 STATE $2. @15 HR 3. @18 SBP 3. @21 DBP 3. @24 (DX1-DX3) ($1.) @27 VITAMINS $1.; FORMAT VISIT_DATE DATE9. DX1-DX3 $DXCODE. VITAMINS $YESNO.; LABEL PATIENT = "Patient Number" VISIT_DATE = "Visit Date" HR = "Heart Rate" SBP = "Systolic Blood Pressure" DBP = "Diastolic Blood Pressure" VITAMINS = "Is PT Taking Vitamins?"; DATALINES; 00110211998MNJ 68130 801 0 00111111998 66132 782 0 00101051999 70140 8824 0 00205061999FNJ 781901004 1 00303042000FNJ 58108 662 1 00305122000 60110 682 1 00410301999MNY 8820011079 0 00412121999 1021 1 00408082000 90180 982 1 00505052000FNY 48110 6658 1 00608082000FNY 861841021 1 00610102000 86 1001 0 00712122000FNY 805681 ; PROC SORT DATA=CLIN_FIRST; BY PATIENT VISIT_DATE; RUN; Program Appendix - 4 : Creating the Ozone Data Set DATA OZONE; INPUT DATE : MMDDYY10. SPORES POLLEN OZONE MAX_TEMP; FORMAT DATE MMDDYY10.; DATALINES; 05/01/1995 876 2261 36.3 62 05/02/1995 1377 2711 32.0 53 05/03/1995 800 2142 43.7 75 05/04/1995 1421 4029 42.5 71 05/05/1995 1599 6284 33.0 64 05/06/1995 1322 4228 53.6 70 05/07/1995 1322 4109 49.1 71 05/08/1995 1322 4109 39.8 70 05/09/1995 998 1915 43.4 72 05/10/1995 1134 2450 45.3 56 05/11/1995 589 1619 26.4 59 05/12/1995 1545 788 26.2 63 05/13/1995 2397 2642 51.1 75 05/14/1995 2397 2642 33.4 66 05/15/1995 2397 2642 31.8 76 05/16/1995 1565 2291 51.2 82 05/17/1995 6421 4779 34.5 71 05/18/1995 5277 658 29.1 78 05/19/1995 5854 1505 16.7 60 05/20/1995 6430 2352 51.0 79 05/21/1995 6430 2352 68.5 85 05/22/1995 6430 2352 46.5 79 05/23/1995 2152 1874 42.4 77 05/24/1995 2463 1460 71.8 90 05/25/1995 7074 1409 41.2 73 05/26/1995 4256 391 28.5 64 05/27/1995 4283 338 42.9 76 05/28/1995 4310 284 36.1 67 05/29/1995 4310 284 30.3 82 05/30/1995 4310 284 33.6 73 05/31/1995 5630 841 47.7 88 06/01/1995 6462 1096 57.3 86 06/02/1995 3564 697 42.1 80 06/03/1995 4242 477 43.9 85 06/04/1995 4242 477 42.1 82 06/05/1995 4242 477 53.2 85 06/06/1995 8609 415 43.2 81 06/07/1995 2653 75 46.6 91 06/08/1995 5721 688 60.7 89 06/09/1995 6221 747 39.9 79 06/10/1995 3261 44 31.2 74 06/11/1995 3261 44 41.1 83 06/12/1995 3261 44 29.8 73 06/13/1995 7320 61 24.1 74 06/14/1995 8353 306 20.2 75 06/15/1995 7098 341 42.5 82 06/16/1995 4829 598 62.4 86 06/17/1995 7730 198 72.7 89 06/18/1995 7730 198 87.7 93 06/19/1995 7730 198 94.6 97 06/20/1995 9441 451 81.4 96 06/21/1995 7353 347 33.4 76 06/22/1995 3499 97 29.3 78 06/23/1995 4932 81 23.5 77 06/24/1995 3532 30 26.2 79 06/25/1995 3532 30 31.0 84 06/26/1995 3532 30 39.6 83 06/27/1995 4199 28 26.5 74 06/28/1995 5397 103 29.9 72 06/29/1995 1799 29 34.1 82 06/30/1995 3522 46 41.5 84 07/01/1995 12678 36 35.9 82 07/02/1995 12678 36 32.6 80 07/03/1995 12678 36 48.9 83 07/04/1995 11277 63 72.6 84 07/05/1995 11277 63 53.6 82 07/06/1995 7853 25 45.8 85 07/07/1995 5043 39 27.3 78 07/08/1995 4216 28 70.6 86 07/09/1995 4216 28 24.7 75 07/10/1995 4216 28 51.6 86 07/11/1995 9998 44 61.0 83 07/12/1995 6041 27 70.4 81 07/13/1995 5867 21 77.6 95 07/14/1995 18174 42 94.7 98 07/15/1995 14684 24 102. 104 07/16/1995 14684 24 47.0 87 07/17/1995 14673 24 52.8 86 07/18/1995 6254 14 60.4 90 07/19/1995 24805 24 62.9 88 07/20/1995 11520 34 72.7 88 07/21/1995 17020 39 53.7 84 07/22/1995 6987 20 81.5 88 07/23/1995 6987 20 60.3 88 07/24/1995 6987 20 62.5 91 07/25/1995 11296 23 65.5 91 07/26/1995 9199 22 52.0 94 07/27/1995 14421 32 74.4 96 07/28/1995 13708 21 55.1 91 07/29/1995 13387 28 58.6 93 07/30/1995 13387 28 56.5 94 07/31/1995 13387 28 60.6 91 ; Program Appendix - 5: Program to Create the LIBRARY Data Set PROC FORMAT; VALUE $CAT 'A' = 'General Works' 'B' = 'Philosophy & Religion' 'C' - 'F' = 'History' 'G' = 'Geography' 'H' = 'Social Sciences' 'K' = 'Law' 'L' = 'Education' 'M','ML' = 'Music' 'P','PA'-'PT' = 'Literature' 'Q' = 'Science' 'R' = 'Medicine' 'T' = 'Technology'; VALUE DAYWK 1 = 'Sun.' 2 = 'Mon.' 3 = 'Tue.' 4 = 'Wed.' 5 = 'Thu.' 6 = 'Fri.' 7 = 'Sat.'; VALUE $AGEGRP 'A' = 'Adult' 'Y' = 'Youth'; RUN; DATA LIBRARY; INPUT @1 ID $5. @6 LIBRARY $10. @16 DATE MMDDYY8. @24 LC $10.; LENGTH CATEGORY $ 25; ***Extract the LC category: 1- or 2-characters before first digit; FIRST_N = INDEXC(LC,'0123456789'); ***Position of first numeral; CATEGORY = PUT(SUBSTR(LC,1,FIRST_N - 1),$CAT.); ***LIBRARY name and DATE are only entered on the first record for each visit.; ***Copy Library information from the first non-blank entry if blank; LENGTH HOLD_LIB $ 10 AGE_GROUP $ 1; RETAIN HOLD_LIB HOLD_DATE; IF LIBRARY NE " " THEN DO; HOLD_LIB = LIBRARY; HOLD_DATE = DATE; END; ELSE DO; LIBRARY = HOLD_LIB; DATE = HOLD_DATE; END; DUE_DATE = DATE + 14; DAY = WEEKDAY(DATE); AGE_GROUP = SUBSTR(ID,1,1); FORMAT DATE DUE_DATE DATE9. DAY DAYWK. AGE_GROUP $AGEGRP.; DROP HOLD_: FIRST_N; DATALINES; Y0123CLINTON 10212000H410.B5 Y0123 H415.A7 A1234FLEMINGTON10212000Q550.B10 A1234 Q550.C8 A1234 M410.C12 A2121CLINTON 10212000PA317.A9 Y8888FLEMINGTON10212000E110.A1 Y8888 F441.B9 A7654FLEMINGTON10212000ML440.B5 A8765CLINTON 10212000L550.V5 A8765 L550.D3 A8765 L550.E3 A8765 L551.A6 Y6565FLEMINGTON10232000Q220.B7 Y6565 Q220.C7 A1022CLINTON 10232000Q660.A12 A1022 Q660.B3 A1022 Q450.C4 A2888CLINTON 10232000ML120.S22 A3433CLINTON 10232000A1200.D7 A1010FLEMINGTON10232000PN450.A1 A1010 PN555.V8 A1010 PA380.C1 A1010 P555.C8 Y2033FLEMINGTON10232000ML233.D3 Y2033 M330.C8 A2345CLINTON 10242000Q22.C1 A2345 Q23.F66 Y0123CLINTON 10252000H415.D22 A9545FLEMINGTON10252000A433.B8 A9545 A450.S2 A6198CLINTON 10252000M766.D71 A7734CLINTON 10252000Q552.B88 A7734 Q455.A1 Y6644FLEMINGTON10262000P555.C43 A3344FLEMINGTON10272000M233.C5 A3344 M233.D1 A3344 ML400.C8 A1234CLINTON 10282000M550.V77 A1234 Q440.B22 A1234 Q440.F34 Y1322FLEMINGTON10302000P1200.V7 Y1322 P890.E4 Y1322 PA220.G8 A9988CLINTON 10302000M733.C3 A1747FLEMINGTON10300000L7.D8 Y0011FLEMINGTON10312000L345.F76 Y0011 L299.A3 A4949CLINTON 10312000PA444.B88 A4949 PA444.B9 A2121CLINTON 11012000A3345.X77 A2121 A300.V6 Y2345FLEMINGTON11012000Q33.A44 Y4433FLEMINGTON11012000A878.B81 A4444CLINTON 11022000M888.B9 A4444 ML500.V7 Y0123FLEMINGTON11042000PA970.V4 A2121CLINTON 11062000Q355.B8 A2121 Q355.V44 ;