Simulation for means on the fly

A New View of Statistics

© 1997 Will G Hopkins

Go to: Previous · Contents · Search · Home

On The Fly for the Effect-Size Statistic in LONGITUDINAL STUDIES WITHOUT A CONTROL GROUP
See the previous simulation for summary of findings on bias.
 
Method here is for known population between-subject variance, 
i.e. no extra variance in the effect size from the denominator.  
My simulations all use variables with unit variance, so here I put 
denominator=1;
 
As I described on the main page, I used the fact that the width of 
confidence intervals is proportional to 1/root(n).  For this method,
you also need to know the acceptable widths of the confidence interval 
for a given ES. I got those.from the boundaries of the steps 
in the magnitude scale.  Here are the confidence intervals that come 
to the boundaries of each step, and the corresponding effect sizes:
 
ES       CI
0       0.402
0.408   0.43
0.893   0.525
1.56    0.805
3.03    2.14
-0.408  0.43
-0.893  0.525
-1.56   0.805
-3.03   2.14
 
I fitted a 4th-order polynomial to these data with a graphing program
(Deltagraph). This is the curve in the figure:
CI = 3.575348E-3*ES**4 + 1.565327E-1*ES**2 + 4.015905E-1
 
[Note: When the sample SD is used to calculate effect size, 
the data and equation are slightly different.
ES      CI
0      0.402
0.405  0.43
0.88   0.525
1.54   0.805
2.9    2.14
-0.405 0.43
-0.88  0.525
-1.54  0.805
-2.9   2.14
 
CI = 6.163550E-3*ES**4 + 1.547127E-1*ES**2 + 4.029553E-1
The above equation was used for the simulations for effect size in 
cross-sectional studies and ditto longitudinal studies using the sample SD.]
 
I then used this equation to predict the target confidence interval (cipred)
for a given sample ES. I divided that into the actual confidence interval, 
squared the result, and multiplied by the current sample size to get the 
next sample size. 
 
I calculated the exact confidence interval for the given ES using 
confint=2*1.96*sdes, where sdes, the standard devation of the effect size, 
is sqrt(2/n).
 
The simulation now requires an additional parameter, the reliability 
of the dependent variable.  I ran the simulations for reliabilities 
of 0.5, 0.8, 0.9, and 0.95.
 
Right, then, here is the simulation.
 
 
options linesize=85;
options pagesize=30;
 
%macro whatever;
*effect sizee=&es, rely=&r;
data dat1;
do trial=1 to &trialn;
do id=1 to &startn;
  true=rannor(0);
  y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
  y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
  ydiff=y2-y1;
  output;
  end;
end;
keep trial ydiff id;
 
%mend;
 
%let trialn=1000; *no. of trials;
%let r=0.95; *reliability;
%let es=0.2; *effect size;
%let startn=10;  *initial sample size;
%let nmax1=20; *total size limit for 1st interation;
%let nmax2=20; *total size limit for 2nd or more interations;
%whatever;
 
data dat0;
set;
dataset="initial";
 
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
 
data dat2;
set;
iter=1;
samplees=ydiff; *denominator=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
  nnew=round(n*(confint/cipred)**2)-n;
  if nnew+n>&nmax1 then nnew=&nmax1-n;
  if nnew then do;
    do id=n+1 to nnew+n;
      true=rannor(0);
      y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
      y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
      ydiff=y2-y1;
      output;
      end;
    end;
  end;
keep trial ydiff id nnew iter;
 
*proc print;
*run;
 
 
/*
*This bit checks that I've got the formulae right, by comparing the
sampling distribution with the Becker-derived es and its conf int;
*It's slashed off when the main simulation is run;
 
proc univariate noprint;
var samplees;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;
title "Sampling distn for es in x-over, r=&r es=&es n=&startn";
title2 "for sample effect size";
 
proc print;
 
proc means mean std min max maxdec=2 data=dat2;
var samplees crrctes confint cipred;
title "Stats for es in x-over, r=&r es=&es n=&startn";
title2 "for Becker-derived es and its confidence interval";
 
run;
*/
 
 
*2nd iteration;
 
data dat1;
set dat1 dat2;
 
proc sort;
by trial;
 
*proc print;
*run;
 
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
 
data dat2;
set;
iter=2;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
  nnew=round(n*(confint/cipred)**2)-n;
  if nnew+n>&nmax2 then nnew=&nmax2-n;
  if nnew then do;
    do id=n+1 to nnew+n;
      true=rannor(0);
      y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
      y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
      ydiff=y2-y1;
      output;
      end;
    end;
  end;
keep trial ydiff id nnew iter;
 
*3nd iteration;
 
data dat1;
set dat1 dat2;
 
proc sort;
by trial;
 
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
 
data dat2;
set;
iter=3;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
  nnew=round(n*(confint/cipred)**2)-n;
*  if nnew+n>&nmax2 then nnew=&nmax2-n;
  if nnew then do;
    do id=n+1 to nnew+n;
      true=rannor(0);
      y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
      y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
      ydiff=y2-y1;
      output;
      end;
    end;
  end;
keep trial ydiff id nnew iter;
 
*4th iteration;
 
data dat1;
set dat1 dat2;
 
proc sort;
by trial;
 
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
 
data dat2;
set;
iter=4;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
  nnew=round(n*(confint/cipred)**2)-n;
*  if nnew+n>&nmax2 then nnew=&nmax2-n;
  if nnew then do;
    do id=n+1 to nnew+n;
      true=rannor(0);
      y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
      y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
      ydiff=y2-y1;
      output;
      end;
    end;
  end;
keep trial ydiff id nnew iter;
 
 
 
*output results;
 
data dat1;
set dat1 dat2;
dataset="final";
 
data datboth;
set dat0 dat1;
 
proc sort;
by dataset trial;
 
 
proc means noprint data=datboth;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by dataset trial;
 
data dat2;
set;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
confliml=samplees-confint/2;
conflimu=samplees+confint/2;
 
proc means noprint;
var n samplees confint confliml conflimu;
by dataset;
output mean=;
 
proc print noobs;
var dataset n samplees confint confliml conflimu;
format _numeric_ 5.2 n 4.;
title "ES stats rely=&r es=&es startn=&startn trials=&trialn";
title2 "nmax1=&nmax1 nmax2=&nmax2, longitudinal study, no control group";
 
data datfinal;
set dat2;
if dataset="final";
 
proc univariate noprint;
var samplees ;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;
 
proc print noobs;
format _numeric_ 5.2;
title "Sampling distn for es, r=&r es=&es n=&startn";
title2 "for final sample effect size";
 
proc means n mean std min max maxdec=0 data=datfinal;
var n;
title "Stats for final sample size  r=&r es=&es n=&startn nmax=&nmax";
 
proc sort data=dat1;
by trial iter;
 
proc means noprint;
var nnew;
output mean=;
by trial iter;
 
proc sort;
by iter;
 
proc means noprint;
var nnew;
by iter;
output n=n mean= std=std min=min max=max;
 
data;
set;
if iter;
 
proc print noobs;
var iter n nnew std min max;
format _numeric_ 5.0;
title "Number of extra observations at each iteration";
title2 "for rely=&r es=&es startn=&startn nmax1=&nmax1 nmax2=&nmax2";
 
run;
 
 
 
 
**************************;
 
*Here is a simulation using statistical significance to stop sampling;
*I set the sample-size limit to 80 for each step,  because you can
get huge predictions for the sample size for the next round;
 
 
%macro whatever;
*effect sizee=&es, rely=&r;
data dat1;
do trial=1 to &trialn;
do id=1 to &startn;
  true=rannor(0);
  y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
  y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
  ydiff=y2-y1;
  output;
  end;
end;
keep trial ydiff id;
 
%mend;
 
%let trialn=1000; *no. of trials;
%let r=0.8; *reliability;
%let es=0.41; *effect size;
%let startn=10;  *initial sample size;
*%let nmax1=20; *not used;
*%let nmax2=20; *not used;
%whatever;
 
data dat0;
set;
dataset="initial";
 
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
 
data dat2;
set;
iter=1;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
  nnew=round(n*(tail/samplees)**2)-n;
  if nnew then do;
  if n+nnew>80 then nnew=80-n;
    do id=n+1 to nnew+n;
      true=rannor(0);
      y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
      y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
      ydiff=y2-y1;
      output;
      end;
    end;
  end;
keep trial ydiff id nnew iter;
 
 
*2nd iteration;
 
data dat1;
set dat1 dat2;
 
proc sort;
by trial;
 
*proc print;
*run;
 
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
 
data dat2;
set;
iter=2;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
  nnew=round(n*(tail/samplees)**2)-n;
  if nnew then do;
  if n+nnew>80 then nnew=80-n;
    do id=n+1 to nnew+n;
      true=rannor(0);
      y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
      y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
      ydiff=y2-y1;
      output;
      end;
    end;
  end;
keep trial ydiff id nnew iter;
 
*3nd iteration;
 
data dat1;
set dat1 dat2;
 
proc sort;
by trial;
 
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
 
data dat2;
set;
iter=3;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
  nnew=round(n*(tail/samplees)**2)-n;
  if nnew then do;
  if n+nnew>80 then nnew=80-n;
    do id=n+1 to nnew+n;
      true=rannor(0);
      y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
      y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
      ydiff=y2-y1;
      output;
      end;
    end;
  end;
keep trial ydiff id nnew iter;
 
*4th iteration;
 
data dat1;
set dat1 dat2;
 
proc sort;
by trial;
 
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
 
data dat2;
set;
iter=4;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
  nnew=round(n*(tail/samplees)**2)-n;
  if nnew then do;
  if n+nnew>80 then nnew=80-n;
    do id=n+1 to nnew+n;
      true=rannor(0);
      y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
      y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
      ydiff=y2-y1;
      output;
      end;
    end;
  end;
keep trial ydiff id nnew iter;
 
 
 
*output results;
 
data dat1;
set dat1 dat2;
dataset="final";
 
data datboth;
set dat0 dat1;
 
proc sort;
by dataset trial;
 
 
proc means noprint data=datboth;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by dataset trial;
 
data dat2;
set;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
confliml=samplees-confint/2;
conflimu=samplees+confint/2;
 
proc means noprint;
var n samplees confint confliml conflimu;
by dataset;
output mean=;
 
proc print noobs;
var dataset n samplees confint confliml conflimu;
format _numeric_ 5.2 n 4.;
title "ES stats rely=&r es=&es startn=&startn trials=&trialn";
title2 "nmax1=&nmax1 nmax2=&nmax2, longitudinal study, no control group";
 
data datfinal;
set dat2;
if dataset="final";
 
proc univariate noprint;
var samplees ;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;
 
proc print noobs;
format _numeric_ 5.2;
title "Sampling distn for es, r=&r es=&es n=&startn";
title2 "for final sample effect size";
 
proc means n mean std min max maxdec=0 data=datfinal;
var n;
title "Stats for final sample size  r=&r es=&es n=&startn nmax=&nmax";
 
proc sort data=dat1;
by trial iter;
 
proc means noprint;
var nnew;
output mean=;
by trial iter;
 
proc sort;
by iter;
 
proc means noprint;
var nnew;
by iter;
output n=n mean= std=std min=min max=max;
 
data;
set;
if iter;
 
proc print noobs;
var iter n nnew std min max;
format _numeric_ 5.0;
title "Number of extra observations at each iteration";
title2 "for rely=&r es=&es startn=&startn nmax1=&nmax1 nmax2=&nmax2";
 
run;
Go to: Previous · Contents · Search · Home
A New View of Statistics	© 1997 Will G Hopkins
Go to: Previous · Contents · Search · Home