*** The macro SRS_SMP selects a simple ; *** sample from a given population ; *** m_data: sample meta data ; *** *************************************************** ; %macro SRS_SMP(m_data); %local popn; * [Sec 1] Read the meta data. ; data _null_; set &m_data; call symput ('popdsn',popdsn); call symput ('smpdsn',smpdsn); call symput ('smpn' ,smpn ); run; * [Sec 2] Get the population N size. ; data _null_; call symput('popn',popn); stop; set &popdsn nobs=popn; run; %let popn=&popn; * trim leading blanks ; * [Sec 3] Add N to meta data. ; data &m_data; set &m_data; popn = &popn; run; %if 1 < &smpn < &popn %then %do; * [Sec 4] Get the simple random sample. ; %SRS(&popdsn, &smpdsn, &smpn); * [Sec 5] Sampling info in log. ; options nosymbolgen nomlogic; %put ...; %put ... population: &popdsn; %put ... sample dataset: &smpdsn; %put ...; %put ... population size N: &popn; %put ... sample size n: &smpn; %put ...; %end; %else %do; %put ... n=&smpn obs cannot be selected; %put ... from a population of N=&popn !; %end; %mend; *** The macro SRS_EST calculates the estimated ; *** population mean and se for a simple random ; *** sample ; *** m_data: cluster sample meta data set ; *** y: given analysis variable ; *** *************************************************** ; %macro SRS_EST(m_data,y); * [Sec 1] Extract meta data. ; data _null_; set &m_data; call symput ('popdsn',popdsn); call symput ('smpdsn',smpdsn); call symput ('popn',popn); run; * [Sec 2] Calculate sample statistics. ; proc means data=&smpdsn mean var n noprint; var &y; output out=stats mean=my var=vy n=ny; run; * [Sec 3] Calculate estimates. ; data _null_; set stats; mean = my; var = ((&popn-ny)/&popn)*(vy/ny); se = sqrt(var); call symput('mean',mean); call symput('se',se); call symput('my',my); call symput('vy',vy); call symput('ny',ny); run; * [Sec 4] Display estimation results. ; options nosymbolgen nomlogic; %put ...; %put ... population: &popdsn; %put ... sample dataset: &smpdsn; %put ...; %put ... sample mean = &my; %put ... sample var = &vy; %put ... sample size = &ny; %put ...; %put ... est pop mean = &mean; %put ... std error = &se; %put ...; %mend; *** The macro STR_SMP selects a stratified ; *** random sample from a given population ; *** m_data: stratification meta data ; *** srs_list: a list of the strata random samples ; *** k: the number of strata ; *** *************************************************** ; %macro STR_SMP(m_data); %local srs_list k; * [Sec 1] Read the meta data. ; data _null_; set &m_data; if input(str_n,10.) > 0 then do; count+1; if count=1 then do; call symput ('popdsn',popdsn); call symput ('smpdsn',smpdsn); call symput ('strvar',strvar); end; call symput ('str_x'!!left(count), str_x); call symput ('str_n'!!left(count), str_n); call symput ( 'k', left(count)); end; run; %if &k ne %then %do; * if k is not null ; %do i=1 %to &k; * [Sec 2] Subset pop dataset and count Ni. ; data subset; set &popdsn end=last; where &strvar = "&&str_x&i"; if last=1 then call symput('pop_n'!!trim(left(&i)), left(_n_)); run; * [Sec 3] If sample strata size > pop strata ; * size, then use whole pop starta. ; * Otherwise, sample it. ; %if %eval(&&str_n&i ge &&pop_n&i) %then %do; %let str_n&i = &&pop_n&i; data srs&i; set subset; run; %end; %else %SRS(subset, srs&i, &&str_n&i); * [Sec 4] Add strata sample name to list ; * and delete population subset ; %let srs_list = &srs_list srs&i; proc datasets library=work; delete subset; run; %end; * [Sec 5] Combine strata samples and ; * delete individual samples ; data &smpdsn; set &srs_list; proc datasets library=work; delete &srs_list; run; * [Sec 6] Rewrite meta data. ; data &m_data; popdsn="&popdsn"; smpdsn="&smpdsn"; strvar="&strvar"; %do i=1 %to &k; str_x ="&&str_x&i"; str_n =&&str_n&i; pop_n =&&pop_n&i; &strvar="&&str_x&i"; output; %end; run; * [Sec 7] Display meta data in log. ; options nosymbolgen nomlogic; %put ...; %put ... population: &popdsn; %put ... sample dataset: &smpdsn; %put ... stratified by: &strvar; %put ...; %do i=1 %to &k; %put ... strata: &&str_x&i; %put ... pop size N&i = &&pop_n&i; %put ... sample size n&i = &&str_n&i; %put ...; %end; %end; %else %put ... no strata defined; %mend; *** The macro STR_EST calculates the estimated ; *** population mean and se for a stratified ; *** random sample ; *** m_data: strata sampling meta data ; *** y: given analysis variable ; *** *************************************************** ; %macro STR_EST(m_data,y); * [Sec 1] Extract pop meta data info. ; data _null_; set &m_data end=last; if _n_=1 then do; call symput ('popdsn',popdsn); call symput ('smpdsn',smpdsn); call symput ('strvar',strvar); end; nn + pop_n; if last=1 then do; call symput ('popn',left(nn)); end; run; * [Sec 2] Calculate strata statistics. ; proc means data=&smpdsn mean var n nway noprint; class &strvar; var &y; output out=stats(drop=_type_ _freq_) mean=my var=vy n=ny; run; %put ... note: a printout of work.stats will display; %put ... individual strata sample statistics; * [Sec 3] Merge statistics with meta data ; * and calculate estimates. ; proc sql noprint; create table calc as select sum(pop_n*my)/&popn as estm, sum((pop_n)*(pop_n-ny)*(vy/ny))/&popn**2 as estv, sqrt(calculated estv) as estse from stats as s, &m_data as m where s.&strvar = m.&strvar ; select estm, estse into :mean, :se from calc ; quit; * [Sec 4] Display estimation results. ; options nosymbolgen nomlogic; %put ...; %put ... population: &popdsn; %put ... sample dataset: &smpdsn; %put ...; %put ... analysis variable: &y; %put ... stratified by: &strvar; %put ...; %put ... estimated mean = &mean; %put ... std error = &se; %put ...; %mend; *** The macro CLS_SMP selects a random ; *** sample of clusters from a population ; *** m_data: cluster meta data ; *** *************************************************** ; %macro CLS_SMP(m_data); %local whr popn; * [Sec 1] Read the meta data. ; data _null_; set &m_data; call symput ('popdsn',popdsn); call symput ('smpdsn',smpdsn); call symput ('clsvar',clsvar); call symput ('clsn' ,clsn ); run; * [Sec 2] Get unique list of possible clusters. ; proc freq data=&popdsn(keep=&clsvar) noprint; table &clsvar / out=unique(keep=&clsvar); run; * [Sec 3] Count the clusters. ; data _null_; call symput('popn',popn); stop; set unique nobs=popn; run; %let popn = &popn; * [Sec 4] Save popn in the meta data. ; data &m_data; set &m_data; popn = &popn; run; %if 1 < &clsn < &popn %then %do; * [Sec 5] Sample the clusters. ; %SRS(unique, selected, &clsn); * [Sec 6] Build a where to select clusters ; data _null_; set selected; qt="'"; call symput('part',qt!!trim(&clsvar)!!qt); call execute('%let whr=&whr &part ;'); run; * [Sec 7] Create the sample dataset. ; data &smpdsn; set &popdsn; where &clsvar in (&whr); run; * [Sec 8] Display cluster info in log. ; options nosymbolgen nomlogic; %let popn = &popn; %let clsn = &clsn; %put ...; %put ... population: &popdsn; %put ... sample dataset: &smpdsn; %put ...; %put ... # clusters in population: &popn; %put ... # clusters in sample: &clsn; %put ...; %put ... cluster variable: &clsvar; %put ... sampled clusters: &whr; %end; %else %do; %put ... &clsn clusters cannot be selected; %put ... from a population of &popn clusters!; %end; %mend; *** The macro CLS_EST calculates the estimated ; *** population mean and se for a cluster sample ; *** m_data: cluster sample meta data set ; *** y: given analysis variable ; *** *************************************************** ; %macro CLS_EST(m_data,y); * [Sec 1] Meta data info. ; data _null_; set &m_data; call symput ('popdsn',popdsn); call symput ('smpdsn',smpdsn); call symput ('clsvar',clsvar); call symput ('clsn',clsn); call symput ('popn',popn); run; * [Sec 2] Calculate cluster statistics. ; proc means data=&smpdsn sum n nway noprint; class &clsvar; var &y; output out=stats sum=yi n=mi; run; %put ... note: a printout of work.stats will display; %put ... individual cluster statistics; * [Sec 3] Calculate estimates. ; proc sql noprint; create table cal as select ym as mean, avg(mi)**2 as am2, (&popn-&clsn)/(&popn*&clsn*calculated am2) as vp1, sum((yi-ym*mi)**2)/(&clsn-1) as vp2, calculated vp1*calculated vp2 as v, sqrt(calculated v) as se from (select yi, mi, (sum(yi)/sum(mi)) as ym from stats) ; select mean, se into :mean, :se from cal ; quit; %let mean=&mean; %let se=&se; * [Sec 4] Display estimation results. ; options nosymbolgen nomlogic; %let popn = &popn; %let clsn = &clsn; %put ; %put ...; %put ... population: &popdsn; %put ... sample dataset: &smpdsn; %put ...; %put ... analysis variable: &y; %put ... cluster variable: &clsvar; %put ...; %put ... # clusters in population: &popn; %put ... # clusters in sample: &clsn; %put ...; %put ... estimated mean = &mean; %put ... std error = &se; %put ...; %mend;