data one; input group score; cards; 1 4 1 5 1 5 2 5 2 6 2 7 ; run; proc iml; start choose(vec,mvec0,m,n, mvec1); mvec1=mvec0; do place=m to 1 by -1; if mvec0[1,place]n then do; nck=0; return; end; if k=0 | k=n then do; nck=1; return; end; if k=1 | k=n-1 then do; nck=n; return; end; mi=min(k,n-k); ma=max(k,n-k); nck=((ma+1:n)[#])/((2:mi)[#]); finish; start wwprob(ivec,m,n,mn, w,prob); w=1; do i = 1 to mn-1; if ivec[i+1,1]^=ivec[i,1] then w=w+1; end; prob=0; run comb(mn,m, f5); do j = 2 to w; wl=floor((j-1)/2); wu=floor(j/2)-1; run comb(m-1,wl, f1); run comb(n-1,wu, f2); run comb(m-1,wu, f3); run comb(n-1,wl, f4); prob=(f1#f2+f3#f4)/f5+prob; end; finish; start getsvec(test,avec,rvec, svec); mn=nrow(rvec); if test='MEAN' | test='WILCOXON' then do; svec=rvec; return; end; if test='MEDIAN' then svec=(avec>(mn+1)/2); if test='VW' then svec=probit(avec/(mn+1)); if test='SAVAGE' then do; temp=cusum(1/(mn-(1:mn)`+1))-1; svec=temp[avec,]; end; if test='LTAIL' then svec= (avec<(mn+1)/4)#(avec-(mn+1)/4)+ (avec>3#(mn+1)/4)#(avec-3#(mn+1)/4); if test='HTAIL' then svec= (avec<(mn+1)/4)#(-(mn+1)/4)+ (avec>3#(mn+1)/4)#((mn+1)/4)+ (avec>=(mn+1)/4)#(avec<=3#(mn+1)/4)#(avec-(mn+1)/2); if test='RSKEW' then svec=(avec<(mn+1)/2)#(avec-(mn+1)/2); if test='LSKEW' then svec=(avec>(mn+1)/2)#(avec-(mn+1)/2); if test='KLOTZ' | test='KLOTZMOD' then svec=(probit(avec/(mn+1)))##2; if test='AB' | test='ABMOD' then svec=abs(avec-(mn+1)/2); if test='MOOD' | test='MOODMOD' then svec=(avec-(mn+1)/2)##2; if test='CONOVER' then svec=avec##2; if test='CS' then svec=(avec/(mn+1))##4; u=unique(rvec)`; nu=nrow(u); if nu1 then svec[ind,]=sum(svec[ind,])/ci; free ind; end; end; finish; start getstat(test,m,n,mvec,svec,help, w); mn=m+n; if test='VARIANCE' then do; if min(m,n)<2 then do; print "ERROR: Both Sample Sizes Must Be >2"; return; end; m1=sum(svec[mvec,]); m2=(sum(svec)-m1)/n; m1=m1/m; s1=0; s2=0; do i = 1 to mn; if any(i=mvec)=1 then s1=s1+(svec[i,1]-m1)##2; if any(i=mvec)=0 then s2=s2+(svec[i,1]-m2)##2; end; if s2<=0 then w=99999; else w=s1/s2#(n-1)/(m-1); return; end; if test='LEPAGE2' | test='LEPCON' | test='LEPDIS' then do; s=(svec[mvec,])[+,]; means=help[,1:2]; vars=help[,3:4]; z=(s-means)/sqrt(vars); if test='LEPAGE2' then w=ssq(z); if test='LEPCON' then do; p=help[,5]; w=p#z[,1]+(1-p)#z[,2]; end; if test='LEPDIS' then do; p=help[,5]; w=p#z[,1]-(1-p)#z[,2]; end; return; end; if test='KS' | test='CM' then do; f1=j(mn,1,0); f2=j(mn,1,0); ivec=j(mn,1,0); do i = 1 to mn; if any(i=mvec)=1 then ivec[i,1]=1; end; do i = 1 to mn; yi=svec[i,1]; f1[i,1]=sum((ivec=1)&(svec<=yi)); f2[i,1]=sum((ivec=0)&(svec<=yi)); end; f1=f1/m; f2=f2/n; if test='KS' then w=(max(f1-f2))||(max(f2-f1))||(max(abs(f2-f1))); if test='CM' then w=((m#n)/(mn#mn))#ssq(f1-f2); return; end; w=sum(svec[mvec,]); if test='MEAN' then w=w/m-(help-w)/n; return; finish; /*--------------------------------------------------------*/ /* The NPAR2SAM Module */ /* */ /* This module performs various nonparametric 2-sample */ /* tests. The tests can be grouped into 3 categories */ /* (references given): */ /* */ /* A) Tests For Difference in Location */ /* WILCOXON -- Wilcoxon Rank Sum (1) */ /* MEDIAN -- Median (1) */ /* VW -- Van der Waerden (1) */ /* SAVAGE -- Savage (4) */ /* HTAIL -- Heavy-Tailed Distributions (1) */ /* LTAIL -- Light-Tailed Distributions (1) */ /* LSKEW -- Left-Skewed Distributions (1) */ /* RSKEW -- Right-Skewed Distributions (1) */ /* MEAN -- Permutations of Mean Difference (5) */ /* */ /* B) Tests For Difference in Scale */ /* AB -- Ansari-Bradley (1) */ /* ABMOD -- AB Ajusted For Location Diff. (2) */ /* MOOD -- Mood (1) */ /* MOODMOD -- Mood Adjusted For Location Diff. (2) */ /* KLOTZ -- Klotz (2) */ /* KLOTZMOD -- Klotz Adjusted For Location Diff. (2) */ /* CONOVER -- Conover Squared Ranks (3) */ /* VARIANCE -- Permutation of Variance Ratio (5) */ /* */ /* C) Tests For Semi-General Alternatives */ /* CS -- Conover-Salsburg (6) */ /* LEPAGE2 -- Lepage's 2 DF (6) */ /* LEPCON -- Lepage's 1 DF Concordant (6) */ /* LEPDIS -- Lepage's 1 DF Discordant (6) */ /* */ /* D) Tests For Completely General Alternatives */ /* KS -- Kolmogorov Smirnov (4) */ /* CM -- Cramer-von Mises (1) */ /* WW -- Wald-Wolfowitz Runs Test (7) */ /* */ /* Ref. Key: */ /* 1) Randles & Wolfe (1979) */ /* 2) Conover, Johnson, & Johnson (1981) */ /* 3) Convoer & Iman (1978) */ /* 4) Lehmann (1975) */ /* 5) Good (1994) */ /* 6) Podger & Gastwirth (1994) */ /* 7) Gibbons (1992) */ /* */ /* Input: */ /* */ /* 1) TEST, a character string that specifies the */ /* scores used. The possible choices are */ /* (listed alphabetically): */ /* AB, ABMOD, CM, CONOVER, CS, HTAIL, KLOTZ, */ /* KLOTZMOD, KS, LEPAGE2, LEPCON, LEPDIS, LSKEW, */ /* LTAIL, MEAN, MEDIAN, MOOD, MOODMOD, RSKEW, */ /* SAVAGE, VARIANCE, VW, WILCOXON, WW */ /* */ /* 2) MAT, the (M+N)x2 data matrix -- the 1st column */ /* specifies numeric class variable levels, the */ /* 2nd specifies the dependent variable values. */ /* */ /* Returns PVAL, a 1x3 vector that gives the exact */ /* significance level for lower-tailed, upper-tailed, and */ /* two-tailed hypotheses. The CM, LEPAGE2, & WW tests */ /* are used only for a two-tailed hypothesis so the first */ /* 2 components of PVAL are set to missing if one of */ /* those 3 tests is specified */ /*--------------------------------------------------------*/ start npar2sam(test,mat, pval); test=upcase(test); reset noname; rhead={"T. S." "P-Value"}; chead={"Lower-Tail" "Upper-Tail" "Two-Tail"}; print test "Test Results:"; d=design(mat[,1]); m=d[+,1]; n=d[+,2]; mn=m+n; help=.; matn=mat; if test='WW' then do; y=mat[,2]; avec=rank(y); rvec=ranktie(y); if ssq(rvec-avec)>1e-6 then do; print "SORRY: Cannot Do WW Test -- Ties in Data"; print "Please try KS or CM Test"; return; end; else do; matn[avec,]=mat; ivec=matn[,1]; run wwprob(ivec,m,n,mn, w0,prob); pval=j(1,3,.); pval[1,3]=prob; print "Observed Test Statistic=" w0; print "Significance Levels:"; print pval[colname=chead]; return; end; end; if test='ABMOD' | test='KLOTZMOD' | test='MOODMOD' | test='CONOVER' then do; do i = 1 to mn; if d[i,1]=1 then temp1=temp1//mat[i,2]; if d[i,2]=1 then temp2=temp2//mat[i,2]; end; if test^='CONOVER' then do; run median(temp1, m1); run median(temp2, m2); center=m1||m2; matn[,2]=mat[,2]-d*center`; end; if test='CONOVER' then do; m1=temp1[:]; m2=temp2[:]; center=m1||m2; matn[,2]=abs(mat[,2]-d*center`); end; end; if test='MEAN' | test='VARIANCE' | test='KS' | test='CM' then do; rvec=matn[,2]; avec=.; if test='MEAN' then help=mat[+,2]; end; else do; avec=rank(matn[,2]); rvec=ranktie(matn[,2]); end; do i = 1 to mn; if d[i,1]=1 then do; mvec=mvec||i; end; end; vec=1:mn; mvec0=vec[1,1:m]; pval=j(1,3,0); size=((max(m,n)+1:mn)[#])/((1:min(m,n))[#]); size=round(size,1); eps=1e-6; svec=.; if test='VARIANCE' | test='KS' | test='CM' then svec=rvec; if test='LEPCON' | test='LEPDIS' then do; run getsvec('VW',avec,rvec, svec1); run getsvec('KLOTZ',avec,rvec, svec2); svec=svec1||svec2; do i = 1 to mn; if d[i,1]=1 then temp1=temp1//mat[i,2]; if d[i,2]=1 then temp2=temp2//mat[i,2]; end; ybar1=temp1[:]; ybar2=temp2[:]; ldiff=abs(ybar1-ybar2); s1=ssq(temp1-ybar1)/(m-1); s2=ssq(temp2-ybar2)/(n-1); srat=sqrt(max(s1,s2)/min(s1,s2)); p=ldiff/(ldiff+srat); abar1=svec1[:]; abar2=svec2[:]; ssa1=ssq(svec1-abar1); ssa2=ssq(svec2-abar2); cbar=m/mn; ssc=m#(n/mn); m1=mn#abar1#cbar; m2=mn#abar2#cbar; v1=(ssa1#ssc)/(mn-1); v2=(ssa2#ssc)/(mn-1); help=m1||m2||v1||v2||p; end; if test='LEPAGE2' then do; run getsvec('WILCOXON',avec,rvec, svec1); run getsvec('AB',avec,rvec, svec2); svec=svec1||svec2; abar1=svec1[:]; abar2=svec2[:]; ssa1=ssq(svec1-abar1); ssa2=ssq(svec2-abar2); cbar=m/mn; ssc=m#(n/mn); m1=mn#abar1#cbar; m2=mn#abar2#cbar; v1=(ssa1#ssc)/(mn-1); v2=(ssa2#ssc)/(mn-1); help=m1||m2||v1||v2; end; if test^='VARIANCE' & test^='KS' & test^='CM' & test^='LEPAGE2' & test^='LEPCON' & test^='LEPDIS' then run getsvec(test,avec,rvec, svec); run getstat(test,m,n,mvec,svec,help, w0); do i = 1 to size; if test='KS' then do; run getstat(test,m,n,mvec0,svec,help, wivec); ind=(wivec+eps>=w0); pval=pval+ind; end; if test='CM' | test='LEPAGE2' then do; run getstat(test,m,n,mvec0,svec,help, wi); if wi+eps>=w0 then pval[1,3]=pval[1,3]+1; end; if test^='KS' & test^='CM' & test^='LEPAGE2' then do; run getstat(test,m,n,mvec0,svec,help, wi); if wi-eps<=w0 then pval[1,1]=pval[1,1]+1; if wi+eps>=w0 then pval[1,2]=pval[1,2]+1; if test='VARIANCE' then do; max0=max(w0,1/w0); min0=min(w0,1/w0); if wi+eps>=max0 | wi-eps<=min0 then pval[1,3]=pval[1,3]+1; end; if test^='VARIANCE' then do; if test='LEPCON' | test='LEPDIS' | test='MEAN' then expec=0; else expec=m#svec[:,]; if abs(wi-expec)+eps>=abs(w0-expec) then pval[1,3]=pval[1,3]+1; end; end; if i