proc iml; /*----------------------------------------------------*/ /* The SIMSIZE Module */ /* */ /* This module computes the simulation size necessary */ /* to estimate a binomial proportion p within a given */ /* bound of error 99% of the time. This module is */ /* useful when p is the significance level of a */ /* simulation-based test. */ /* */ /* Input: */ /* 1) SIGLEV, a number in [0,1] that you think is */ /* close to the true binomial proportion (or */ /* significance level). If you have no idea, */ /* then you should use 0.5. */ /* 2) BOUND, your desired bound of error -- the */ /* amount added & subtracted to obtain lower */ /* and upper 99% confidence limits. */ /* */ /* Returns: */ /* 1) NSS, a positive integer that is the */ /* necessary simulation size. */ /*----------------------------------------------------*/ start simsize(siglev,bound, nss); reset noname; z=probit(0.995); nss=ceil(siglev#(1-siglev)#(z/bound)##2); print nss; reset fw=2; print "is the simulation size necessary to obtain a value within"; reset fw=5; print bound "of" siglev "99% of the time"; finish; start samp(n,seed, rvec); rvec=j(1,n,.); u=-99; do j = 1 to n; call ranuni(seed,u); rvec[1,j]=u; end; rvec=rank(rvec); finish; start order(mat0,n,k, mat1); d=mat0[,1:k]; nvec=d[+,]; add=j(n,1,.); do i = 1 to n; ind=d[i,<:>]; add[i,1]=10#nvec[1,ind]+ind; end; mat1=mat0||add; mat2=mat1; mat2[rank(add),]=mat1; mat1=mat2[,1:k+1]; finish; start getstat(test,n,k,d,r,nvec,cont,stderr, w); if test='KW' then w=sum(((r`*d)##2)/nvec); if test='JO' then do; dr=hdir(d,r); w=0; do j = 1 to k-1; drj=dr[,j]; do h = j+1 to k; drh=dr[,h]; mannwitt=0; do i = 1 to n; mannwitt=mannwitt+ sum((drj[i,1]>1e-2 & drj[i,1]1e-2 & drj[i,1]=drh)); end; w=w+mannwitt; end; end; end; if test='MC' then do; mean=cont*((d`*r)/nvec`); w=abs(mean/stderr); end; finish; /*----------------------------------------------------*/ /* The ONEWAY Module */ /* */ /* This module obtains simulation-based significance */ /* levels with 99% confidence limits for either the */ /* Kruskal-Wallis test or the Jonckheere test in the */ /* one-way layout. */ /* */ /* Input: */ /* 1) TEST, a character string that denotes which */ /* test you want to perform. The choices are */ /* 'KW' (for Kruskal-Wallis) and 'JO' (for */ /* Jonckheere). */ /* 2) NN, a positive integer specifying desired */ /* simulation size. */ /* 3) MAT, the Nx2 data matrix -- the 1st column */ /* specifies numeric class variable levels, the */ /* 2nd specifies the dependent variable values. */ /* 4) SEED, a nonnegative ingeger used in */ /* generating the simulated data. If SEED is */ /* set to 0, then the clocktime is used. SEED */ /* should be non-zero when you want to be able */ /* to reproduce your results. */ /*----------------------------------------------------*/ start oneway(test,nn,mat,seed); reset noname fw=6; test=upcase(test); pval=0; eps=1e-12; n=nrow(mat); matu=design(mat[,1])||ranktie(mat[,2]); k=ncol(matu)-1; run order(matu,n,k, mat0); d=mat0[,1:k]; r=mat0[,k+1]; nvec=d[+,]; run getstat(test,n,k,d,r,nvec,.,., w0); do i = 1 to nn; run samp(n,seed, rvec); run getstat(test,n,k,d,rvec`,nvec,.,., w); if w+eps>=w0 then pval=pval+1; end; pval=pval/nn; z=probit(0.995); se=sqrt((pval#(1-pval))/nn); inc=z#se; l=max(pval-inc,0); u=min(pval+inc,1); table=nn||pval||l||u; chead={"Sim. Size" "P-Value" "99% LCL" "99% UCL"}; rhead=test; print table[rowname=rhead colname=chead]; finish; /*----------------------------------------------------*/ /* The MULTCOMP Module */ /* */ /* This module obtains simulation-based significance */ /* levels for multiple comparisions with an overall */ /* type I error rate in the K-sample one-way layout. */ /* */ /* Input: */ /* 1) NN, a positive integer specifying desired */ /* simulation size. */ /* 2) MAT, the Nx2 data matrix -- the 1st column */ /* specifies numeric class variable levels, the */ /* 2nd specifies the dependent variable values. */ /* 3) CONT, a QxK matrix defining the Q linear */ /* combinations of interest */ /* 4) SEED, a nonnegative integer used in */ /* generating the simulated data. If SEED is */ /* set to 0, then the clocktime is used. SEED */ /* should be non-zero when you want to be able */ /* to reproduce your results. */ /*----------------------------------------------------*/ start multcomp(nn,mat,cont,seed); reset noname fw=6; q=nrow(cont); pval=j(q,1,0); eps=1e-12; n=nrow(mat); matu=design(mat[,1])||ranktie(mat[,2]); k=ncol(matu)-1; run order(matu,n,k, mat0); d=mat0[,1:k]; r=mat0[,k+1]; nvec=d[+,]; stderr=sqrt(vecdiag(cont*diag(1/nvec)*cont`)); run getstat('MC',n,k,d,r,nvec,cont,stderr, w0); do i = 1 to nn; run samp(n,seed, rvec); run getstat('MC',n,k,d,rvec`,nvec,cont,stderr, w); crit=max(w); pval=pval+((crit+eps)>=w0); end; pval=pval/nn; ch1={"Contrast"}; ch2={"P-Value"}; print cont[colname=ch1] pval[colname=ch2]; finish;