A New View of Statistics Go to: Previous · Contents · Search · Home
On-The-Fly Sampling for DIFFERENCES IN FREQUENCIES

This SAS program checks whether the confidence interval for a frequency difference predicted by the normal approximation to the binomial distribution agrees with the true confidence interval, as determined from the sampling distribution. I found that the predicted confidence interval starts to get substantially smaller than the actual confidence interval only when both frequencies are down to 1% and the sample size is less than 100. For these low frequencies, one strategy would be to simulate as I have done here, using the frequencies observed in the first round of sampling, until you find the sample size that gives a respectable confidence interval. Then go away and sample those extra subjects, calculate etc., then do more rounds of sampling if necessary. Otherwise the "1 over root n" rule will work for everything else.

options linesize=85;
options pagesize=30;

*freq diff only.  See lower down for RR and OR as well;
*fast method, not using proc freq;
%macro whatever;
data dat1;
*retain grp1 grp2;
do trial=1 to &trialn;
grp1=0; grp2=0;
n=&startn/2;
do id=1 to n;
if 100*ranuni(0)<&startf+&deltaf then grp1=grp1+1;
end;
do id=1 to &startn/2;
if 100*ranuni(0)<&startf then grp2=grp2+1;
end;
freqdiff=(grp1-grp2)/n*100;
confint=2*1.96*100/n*sqrt(grp1*(n-grp1)/n+grp2*(n-grp2)/n);
output;
end;
keep trial freqdiff confint;

%mend;

%let startf=1; *frequency of something in one group;
%let deltaf=1; *add this to get freq in other group;
%let startn=100;  *initial sample size;
%let trialn=5000;  *number of trials;
%whatever;

proc univariate noprint data=dat1;
var freqdiff;
output out=dat2 mean=mean pctlpre=Q pctlpts=2.5 50 97.5;

proc univariate noprint data=dat1;
var confint;
output out=dat3 mean=mean pctlpre=Q pctlpts=2.5 50 97.5;

data dat7;
set dat2(in=a) dat3(in=b);
int95pc=q97_5-q2_5;
if a then variable="freqdiff";
if b then variable="confint";

proc print noobs;
var variable mean--q97_5 int95pc;
format _numeric_  4.1;
title "Sampling distn of freq stats, startf=&startf deltaf=&deltaf startn=&startn trialn=&trialn";
title2 "plus theoretical confint for freq diff based on binom-normal";
title3 "[Compare int95pc for freqdiff with mean for confint.]";

run;

/*
*all outcome stats;
*fast method;
%macro whatever;
data dat1;
*retain grp1 grp2;
do trial=1 to &trialn;
grp1=0; grp2=0;
do id=1 to &startn/2;
if 100*ranuni(0)<&startf+&deltaf then grp1=grp1+1;
end;
do id=1 to &startn/2;
if 100*ranuni(0)<&startf then grp2=grp2+1;
end;
freqdiff=(grp1-grp2)/&startn*200;
relrisk=grp1/grp2;
if relrisk=. then relrisk=500;
oddratio=grp1/(&startn/2-grp1)/grp2*(&startn/2-grp2);
if oddratio=. then oddratio=1000;
output;
end;
keep trial freqdiff relrisk oddratio;

%mend;

%let startf=15;
%let deltaf=32;
%let startn=250;
%let trialn=5000;
%whatever;

proc univariate noprint data=dat1;
var freqdiff;
output out=dat4 mean=mean pctlpre=Q pctlpts=2.5 50 97.5;

proc univariate noprint data=dat1;
var relrisk;
output out=dat5 mean=mean pctlpre=Q pctlpts=2.5 50 97.5;

proc univariate noprint data=dat1;
var oddratio;
output out=dat6 mean=mean pctlpre=Q pctlpts=2.5 50 97.5;

data dat7;
set dat4(in=a) dat5(in=b) dat6(in=c);
if a then statist="freqdiff";
if b then statist="relrisk";
if c then statist="oddratio";
int95pc=q97_5-q2_5;

proc print noobs;
var statist mean--q97_5 int95pc;
format _numeric_  4.1;
title "Sampling distn of freq stats, startf=&startf deltaf=&deltaf startn=&startn trialn=&trialn";

run;

Go to: Previous · Contents · Search · Home