*** The macro SRS_SMP selects a simple ;
*** sample from a given population ;
*** m_data: sample meta data ;
*** *************************************************** ;
%macro SRS_SMP(m_data);
%local popn;
* [Sec 1] Read the meta data. ;
data _null_;
set &m_data;
call symput ('popdsn',popdsn);
call symput ('smpdsn',smpdsn);
call symput ('smpn' ,smpn );
run;
* [Sec 2] Get the population N size. ;
data _null_;
call symput('popn',popn);
stop;
set &popdsn nobs=popn;
run;
%let popn=&popn; * trim leading blanks ;
* [Sec 3] Add N to meta data. ;
data &m_data;
set &m_data;
popn = &popn;
run;
%if 1 < &smpn < &popn %then %do;
* [Sec 4] Get the simple random sample. ;
%SRS(&popdsn, &smpdsn, &smpn);
* [Sec 5] Sampling info in log. ;
options nosymbolgen nomlogic;
%put ...;
%put ... population: &popdsn;
%put ... sample dataset: &smpdsn;
%put ...;
%put ... population size N: &popn;
%put ... sample size n: &smpn;
%put ...;
%end;
%else %do;
%put ... n=&smpn obs cannot be selected;
%put ... from a population of N=&popn !;
%end;
%mend;
*** The macro SRS_EST calculates the estimated ;
*** population mean and se for a simple random ;
*** sample ;
*** m_data: cluster sample meta data set ;
*** y: given analysis variable ;
*** *************************************************** ;
%macro SRS_EST(m_data,y);
* [Sec 1] Extract meta data. ;
data _null_;
set &m_data;
call symput ('popdsn',popdsn);
call symput ('smpdsn',smpdsn);
call symput ('popn',popn);
run;
* [Sec 2] Calculate sample statistics. ;
proc means data=&smpdsn mean var n noprint;
var &y;
output out=stats mean=my var=vy n=ny;
run;
* [Sec 3] Calculate estimates. ;
data _null_;
set stats;
mean = my;
var = ((&popn-ny)/&popn)*(vy/ny);
se = sqrt(var);
call symput('mean',mean);
call symput('se',se);
call symput('my',my);
call symput('vy',vy);
call symput('ny',ny);
run;
* [Sec 4] Display estimation results. ;
options nosymbolgen nomlogic;
%put ...;
%put ... population: &popdsn;
%put ... sample dataset: &smpdsn;
%put ...;
%put ... sample mean = &my;
%put ... sample var = &vy;
%put ... sample size = &ny;
%put ...;
%put ... est pop mean = &mean;
%put ... std error = &se;
%put ...;
%mend;
*** The macro STR_SMP selects a stratified ;
*** random sample from a given population ;
*** m_data: stratification meta data ;
*** srs_list: a list of the strata random samples ;
*** k: the number of strata ;
*** *************************************************** ;
%macro STR_SMP(m_data);
%local srs_list k;
* [Sec 1] Read the meta data. ;
data _null_;
set &m_data;
if input(str_n,10.) > 0 then do;
count+1;
if count=1 then do;
call symput ('popdsn',popdsn);
call symput ('smpdsn',smpdsn);
call symput ('strvar',strvar);
end;
call symput ('str_x'!!left(count), str_x);
call symput ('str_n'!!left(count), str_n);
call symput ( 'k', left(count));
end;
run;
%if &k ne %then %do; * if k is not null ;
%do i=1 %to &k;
* [Sec 2] Subset pop dataset and count Ni. ;
data subset;
set &popdsn end=last;
where &strvar = "&&str_x&i";
if last=1 then
call symput('pop_n'!!trim(left(&i)), left(_n_));
run;
* [Sec 3] If sample strata size > pop strata ;
* size, then use whole pop starta. ;
* Otherwise, sample it. ;
%if %eval(&&str_n&i ge &&pop_n&i) %then %do;
%let str_n&i = &&pop_n&i;
data srs&i;
set subset;
run;
%end;
%else %SRS(subset, srs&i, &&str_n&i);
* [Sec 4] Add strata sample name to list ;
* and delete population subset ;
%let srs_list = &srs_list srs&i;
proc datasets library=work;
delete subset;
run;
%end;
* [Sec 5] Combine strata samples and ;
* delete individual samples ;
data &smpdsn;
set &srs_list;
proc datasets library=work;
delete &srs_list;
run;
* [Sec 6] Rewrite meta data. ;
data &m_data;
popdsn="&popdsn";
smpdsn="&smpdsn";
strvar="&strvar";
%do i=1 %to &k;
str_x ="&&str_x&i";
str_n =&&str_n&i;
pop_n =&&pop_n&i;
&strvar="&&str_x&i";
output;
%end;
run;
* [Sec 7] Display meta data in log. ;
options nosymbolgen nomlogic;
%put ...;
%put ... population: &popdsn;
%put ... sample dataset: &smpdsn;
%put ... stratified by: &strvar;
%put ...;
%do i=1 %to &k;
%put ... strata: &&str_x&i;
%put ... pop size N&i = &&pop_n&i;
%put ... sample size n&i = &&str_n&i;
%put ...;
%end;
%end;
%else %put ... no strata defined;
%mend;
*** The macro STR_EST calculates the estimated ;
*** population mean and se for a stratified ;
*** random sample ;
*** m_data: strata sampling meta data ;
*** y: given analysis variable ;
*** *************************************************** ;
%macro STR_EST(m_data,y);
* [Sec 1] Extract pop meta data info. ;
data _null_;
set &m_data end=last;
if _n_=1 then do;
call symput ('popdsn',popdsn);
call symput ('smpdsn',smpdsn);
call symput ('strvar',strvar);
end;
nn + pop_n;
if last=1 then do;
call symput ('popn',left(nn));
end;
run;
* [Sec 2] Calculate strata statistics. ;
proc means data=&smpdsn mean var n nway noprint;
class &strvar;
var &y;
output out=stats(drop=_type_ _freq_)
mean=my var=vy n=ny;
run;
%put ... note: a printout of work.stats will display;
%put ... individual strata sample statistics;
* [Sec 3] Merge statistics with meta data ;
* and calculate estimates. ;
proc sql noprint;
create table calc as
select sum(pop_n*my)/&popn as estm,
sum((pop_n)*(pop_n-ny)*(vy/ny))/&popn**2 as estv,
sqrt(calculated estv) as estse
from stats as s, &m_data as m
where s.&strvar = m.&strvar
;
select estm, estse
into :mean, :se
from calc
;
quit;
* [Sec 4] Display estimation results. ;
options nosymbolgen nomlogic;
%put ...;
%put ... population: &popdsn;
%put ... sample dataset: &smpdsn;
%put ...;
%put ... analysis variable: &y;
%put ... stratified by: &strvar;
%put ...;
%put ... estimated mean = &mean;
%put ... std error = &se;
%put ...;
%mend;
*** The macro CLS_SMP selects a random ;
*** sample of clusters from a population ;
*** m_data: cluster meta data ;
*** *************************************************** ;
%macro CLS_SMP(m_data);
%local whr popn;
* [Sec 1] Read the meta data. ;
data _null_;
set &m_data;
call symput ('popdsn',popdsn);
call symput ('smpdsn',smpdsn);
call symput ('clsvar',clsvar);
call symput ('clsn' ,clsn );
run;
* [Sec 2] Get unique list of possible clusters. ;
proc freq data=&popdsn(keep=&clsvar) noprint;
table &clsvar / out=unique(keep=&clsvar);
run;
* [Sec 3] Count the clusters. ;
data _null_;
call symput('popn',popn);
stop;
set unique nobs=popn;
run;
%let popn = &popn;
* [Sec 4] Save popn in the meta data. ;
data &m_data;
set &m_data;
popn = &popn;
run;
%if 1 < &clsn < &popn %then %do;
* [Sec 5] Sample the clusters. ;
%SRS(unique, selected, &clsn);
* [Sec 6] Build a where to select clusters ;
data _null_;
set selected;
qt="'";
call symput('part',qt!!trim(&clsvar)!!qt);
call execute('%let whr=&whr &part ;');
run;
* [Sec 7] Create the sample dataset. ;
data &smpdsn;
set &popdsn;
where &clsvar in (&whr);
run;
* [Sec 8] Display cluster info in log. ;
options nosymbolgen nomlogic;
%let popn = &popn;
%let clsn = &clsn;
%put ...;
%put ... population: &popdsn;
%put ... sample dataset: &smpdsn;
%put ...;
%put ... # clusters in population: &popn;
%put ... # clusters in sample: &clsn;
%put ...;
%put ... cluster variable: &clsvar;
%put ... sampled clusters: &whr;
%end;
%else %do;
%put ... &clsn clusters cannot be selected;
%put ... from a population of &popn clusters!;
%end;
%mend;
*** The macro CLS_EST calculates the estimated ;
*** population mean and se for a cluster sample ;
*** m_data: cluster sample meta data set ;
*** y: given analysis variable ;
*** *************************************************** ;
%macro CLS_EST(m_data,y);
* [Sec 1] Meta data info. ;
data _null_;
set &m_data;
call symput ('popdsn',popdsn);
call symput ('smpdsn',smpdsn);
call symput ('clsvar',clsvar);
call symput ('clsn',clsn);
call symput ('popn',popn);
run;
* [Sec 2] Calculate cluster statistics. ;
proc means data=&smpdsn sum n nway noprint;
class &clsvar;
var &y;
output out=stats sum=yi n=mi;
run;
%put ... note: a printout of work.stats will display;
%put ... individual cluster statistics;
* [Sec 3] Calculate estimates. ;
proc sql noprint;
create table cal as
select ym as mean,
avg(mi)**2 as am2,
(&popn-&clsn)/(&popn*&clsn*calculated am2) as vp1,
sum((yi-ym*mi)**2)/(&clsn-1) as vp2,
calculated vp1*calculated vp2 as v,
sqrt(calculated v) as se
from (select yi, mi, (sum(yi)/sum(mi)) as ym
from stats)
;
select mean, se into :mean, :se
from cal
;
quit;
%let mean=&mean;
%let se=&se;
* [Sec 4] Display estimation results. ;
options nosymbolgen nomlogic;
%let popn = &popn;
%let clsn = &clsn;
%put ;
%put ...;
%put ... population: &popdsn;
%put ... sample dataset: &smpdsn;
%put ...;
%put ... analysis variable: &y;
%put ... cluster variable: &clsvar;
%put ...;
%put ... # clusters in population: &popn;
%put ... # clusters in sample: &clsn;
%put ...;
%put ... estimated mean = &mean;
%put ... std error = &se;
%put ...;
%mend;