*** The macro SRS_SMP selects a simple                  ;
   *** sample from a given population                      ;
   ***     m_data: sample meta data                        ;
   *** *************************************************** ;
%macro SRS_SMP(m_data);
   %local popn;

      * [Sec 1] Read the meta data.                        ;
   data _null_;
      set &m_data;
      call symput ('popdsn',popdsn);
      call symput ('smpdsn',smpdsn);
      call symput ('smpn'  ,smpn  );
   run;

      * [Sec 2] Get the population N size.                 ;
   data _null_;
      call symput('popn',popn);
      stop;
      set &popdsn nobs=popn;
   run;
   %let popn=&popn;   * trim leading blanks                ;

      * [Sec 3] Add N to meta data.                        ;
   data &m_data;
      set &m_data;
      popn = &popn;
   run;

   %if 1 < &smpn < &popn %then %do;

         * [Sec 4] Get the simple random sample.           ;
      %SRS(&popdsn, &smpdsn, &smpn);

         * [Sec 5] Sampling info in log.                   ;
      options nosymbolgen nomlogic;
      %put ...;
      %put ...        population: &popdsn;
      %put ...    sample dataset: &smpdsn;
      %put ...;
      %put ...    population size N: &popn;
      %put ...        sample size n: &smpn;
      %put ...;

   %end;
   %else %do;
      %put ...  n=&smpn obs cannot be selected;
      %put ...  from a population of N=&popn !;
   %end;
%mend;


   *** The macro SRS_EST calculates the estimated          ;
   *** population mean and se for a simple random          ;
   *** sample                                              ;
   ***     m_data: cluster sample meta data set            ;
   ***     y: given analysis variable                      ;
   *** *************************************************** ;
%macro SRS_EST(m_data,y);

      * [Sec 1] Extract meta data.                         ;
   data _null_;
      set &m_data;
      call symput ('popdsn',popdsn);
      call symput ('smpdsn',smpdsn);
      call symput ('popn',popn);
   run;

      * [Sec 2] Calculate sample statistics.               ;
   proc means data=&smpdsn mean var n noprint;
      var &y;
      output out=stats mean=my var=vy n=ny;
   run;

      * [Sec 3] Calculate estimates.                       ;
   data _null_;
      set stats;
      mean = my;
      var = ((&popn-ny)/&popn)*(vy/ny);
      se = sqrt(var);
      call symput('mean',mean);
      call symput('se',se);
      call symput('my',my);
      call symput('vy',vy);
      call symput('ny',ny);
   run;

      * [Sec 4] Display estimation results.                ;
   options nosymbolgen nomlogic;
   %put ...;
   %put ...        population: &popdsn;
   %put ...    sample dataset: &smpdsn;
   %put ...;
   %put ...       sample mean = &my;
   %put ...       sample var  = &vy;
   %put ...       sample size = &ny;
   %put ...;
   %put ...      est pop mean = &mean;
   %put ...         std error = &se;
   %put ...;
%mend;


   *** The macro STR_SMP selects a stratified              ;
   *** random sample from a given population               ;
   ***     m_data: stratification meta data                ;
   ***     srs_list: a list of the strata random samples   ;
   ***     k: the number of strata                         ;
   *** *************************************************** ;
%macro STR_SMP(m_data);
   %local srs_list k;

      * [Sec 1] Read the meta data.                        ;
   data _null_;
      set &m_data;
      if input(str_n,10.) > 0 then do;
         count+1;
         if count=1 then do;
            call symput ('popdsn',popdsn);
            call symput ('smpdsn',smpdsn);
            call symput ('strvar',strvar);
         end;
         call symput ('str_x'!!left(count), str_x);
         call symput ('str_n'!!left(count), str_n);
         call symput ( 'k', left(count));
      end;
   run;

   %if &k ne %then %do;   * if k is not null               ;

      %do i=1 %to &k;

            * [Sec 2] Subset pop dataset and count Ni.     ;
         data subset;
            set &popdsn end=last;
            where &strvar = "&&str_x&i";
            if last=1 then
               call symput('pop_n'!!trim(left(&i)), left(_n_));
         run;

            * [Sec 3] If sample strata size > pop strata   ;
            *         size, then use whole pop starta.     ;
            *         Otherwise, sample it.                ;
         %if %eval(&&str_n&i ge &&pop_n&i) %then %do;
            %let str_n&i = &&pop_n&i;
            data srs&i;
               set subset;
            run;
         %end;
         %else %SRS(subset, srs&i, &&str_n&i);

            * [Sec 4] Add strata sample name to list       ;
            *         and delete population subset         ;
         %let srs_list = &srs_list srs&i;
         proc datasets library=work;
            delete subset;
         run;

      %end;

         * [Sec 5] Combine strata samples and              ;
         *         delete individual samples               ;
      data &smpdsn;
         set &srs_list;
      proc datasets library=work;
         delete &srs_list;
      run;

         * [Sec 6] Rewrite meta data.                      ;
      data &m_data;
         popdsn="&popdsn";
         smpdsn="&smpdsn";
         strvar="&strvar";
         %do i=1 %to &k;
            str_x ="&&str_x&i";
            str_n =&&str_n&i;
            pop_n =&&pop_n&i;
            &strvar="&&str_x&i";
            output;
         %end;
      run;

         * [Sec 7] Display meta data in log.               ;
      options nosymbolgen nomlogic;
      %put ...;
      %put ...        population: &popdsn;
      %put ...    sample dataset: &smpdsn;
      %put ...     stratified by: &strvar;
      %put ...;
      %do i=1 %to &k;
         %put ...   strata: &&str_x&i;
         %put ...          pop size  N&i = &&pop_n&i;
         %put ...       sample size  n&i = &&str_n&i;
         %put ...;
      %end;
   %end;
   %else %put ... no strata defined;
%mend;


   *** The macro STR_EST calculates the estimated          ;
   *** population mean and se for a stratified             ;
   *** random sample                                       ;
   ***     m_data:  strata sampling meta data              ;
   ***     y:  given analysis variable                     ; 
   *** *************************************************** ;
%macro STR_EST(m_data,y);

      * [Sec 1] Extract pop meta data info.                ;
   data _null_;
      set &m_data end=last;
      if _n_=1 then do;
         call symput ('popdsn',popdsn);
         call symput ('smpdsn',smpdsn);
         call symput ('strvar',strvar);
      end;
      nn + pop_n;
      if last=1 then do;
         call symput ('popn',left(nn));
      end;
   run;

      * [Sec 2] Calculate strata statistics.               ;
   proc means data=&smpdsn mean var n nway noprint;
      class &strvar;
      var &y;
      output out=stats(drop=_type_ _freq_)
                 mean=my var=vy n=ny;
   run;
   %put ...  note:      a printout of work.stats will display;
   %put ...  individual strata sample statistics;

      * [Sec 3] Merge statistics with meta data            ;
      *         and calculate estimates.                   ;
   proc sql noprint;
      create table calc as
      select sum(pop_n*my)/&popn as estm,
         sum((pop_n)*(pop_n-ny)*(vy/ny))/&popn**2 as estv,
         sqrt(calculated estv) as estse
         from stats as s, &m_data as m
         where s.&strvar = m.&strvar
      ;
      select estm, estse
         into :mean, :se
         from calc
      ;
   quit;

      * [Sec 4] Display estimation results.                ;
   options nosymbolgen nomlogic;
   %put ...;
   %put ...        population: &popdsn;
   %put ...    sample dataset: &smpdsn;
   %put ...;
   %put ...     analysis variable: &y;
   %put ...             stratified by: &strvar;
   %put ...;
   %put ...    estimated mean = &mean;
   %put ...               std error = &se;
   %put ...;
%mend;


   *** The macro CLS_SMP selects a random                  ;
   *** sample of clusters from a population                ;
   ***     m_data: cluster meta data                       ;
   *** *************************************************** ;
%macro CLS_SMP(m_data);
   %local whr popn;

      * [Sec 1] Read the meta data.                        ;
   data _null_;
      set &m_data;
      call symput ('popdsn',popdsn);
      call symput ('smpdsn',smpdsn);
      call symput ('clsvar',clsvar);
      call symput ('clsn'  ,clsn  );
   run;

      * [Sec 2] Get unique list of possible clusters.      ;
   proc freq data=&popdsn(keep=&clsvar) noprint;
      table &clsvar / out=unique(keep=&clsvar);
   run;

      * [Sec 3] Count the clusters.                        ;
   data _null_;
      call symput('popn',popn);
      stop;
      set unique nobs=popn;
   run;
   %let popn = &popn;

      * [Sec 4] Save popn in the meta data.                ;
   data &m_data;
      set &m_data;
      popn = &popn;
   run;

   %if 1 < &clsn < &popn %then %do;

         * [Sec 5] Sample the clusters.                    ;
      %SRS(unique, selected, &clsn);

         * [Sec 6] Build a where to select clusters        ;
      data _null_;
         set selected;
         qt="'";
         call symput('part',qt!!trim(&clsvar)!!qt);
         call execute('%let whr=&whr &part ;');
      run;

        * [Sec 7] Create the sample dataset.               ;
     data &smpdsn;
        set &popdsn;
        where &clsvar in (&whr);
     run;

        * [Sec 8] Display cluster info in log.             ;
     options nosymbolgen nomlogic;
     %let popn = &popn;
     %let clsn = &clsn;
     %put ...;
     %put ...        population: &popdsn;
     %put ...    sample dataset: &smpdsn;
     %put ...;
     %put ...  # clusters in population: &popn;
     %put ...  # clusters in     sample: &clsn;
     %put ...;
     %put ...  cluster variable: &clsvar;
     %put ...  sampled clusters: &whr;
   %end;
   %else %do;
      %put ...  &clsn clusters cannot be selected;
      %put ...  from a population of &popn clusters!;
   %end;
%mend;


   *** The macro CLS_EST calculates the estimated          ;
   *** population mean and se for a cluster sample         ;
   ***     m_data: cluster sample meta data set            ;
   ***     y: given analysis variable                      ;
   *** *************************************************** ;
%macro CLS_EST(m_data,y);

      * [Sec 1] Meta data info.                            ;
   data _null_;
      set &m_data;
      call symput ('popdsn',popdsn);
      call symput ('smpdsn',smpdsn);
      call symput ('clsvar',clsvar);
      call symput ('clsn',clsn);
      call symput ('popn',popn);
   run;

      * [Sec 2] Calculate cluster statistics.              ;
   proc means data=&smpdsn sum n nway noprint;
      class &clsvar;
      var &y;
      output out=stats sum=yi n=mi;
   run;
   %put ...  note:  a printout of work.stats will display;
   %put ...         individual cluster statistics;

      * [Sec 3] Calculate estimates.                       ;
   proc sql noprint;
      create table cal as
      select ym as mean,
         avg(mi)**2 as am2,
         (&popn-&clsn)/(&popn*&clsn*calculated am2) as vp1,
         sum((yi-ym*mi)**2)/(&clsn-1) as vp2,
         calculated vp1*calculated vp2 as v,
         sqrt(calculated v) as se
         from (select yi, mi, (sum(yi)/sum(mi)) as ym
            from stats)
      ;
      select mean, se into :mean, :se
         from cal
      ;
      quit;
      %let mean=&mean;
      %let se=&se;

      * [Sec 4] Display estimation results.                ;
   options nosymbolgen nomlogic;
   %let popn = &popn;
   %let clsn = &clsn;
   %put ;
   %put ...;
   %put ...        population: &popdsn;
   %put ...    sample dataset: &smpdsn;
   %put ...;
   %put ...         analysis variable: &y;
   %put ...          cluster variable: &clsvar;
   %put ...;
   %put ...  # clusters in population: &popn;
   %put ...  # clusters in sample: &clsn;
   %put ...;
   %put ...    estimated mean = &mean;
   %put ...         std error = &se;
   %put ...;
%mend;