fdvar = function( n, d, alph, side="c", odcorhist, odcors, nbrk=500, ydata, rootmcor=1000, iwarn=T ){ # # Variance of the number of false discoveries. # if( missing(n) )n=nrow(ydata) if( missing(d) )d=ncol(ydata) if( missing(odcorhist) & missing(odcors) & missing(ydata) ) stop("No correlation data were supplied to fdvar") if( missing(odcorhist) & missing(odcors) & !missing(ydata) ){ odcors = corsampler(x=ydata,d1=rootmcor) } if( missing(odcorhist) & !missing(odcors) ){ odcorhist = hist( odcors, seq( from=-1.0000001, to=1.0000001, length=nbrk ), plot=F ) } if( !missing(odcorhist) ){ cbar = 0 csqbar = 0 for( j in 1:length(odcorhist$mids)){ ps = pairsig( rho=odcorhist$mids[j], n=n, alph=alph, side=side, iwarn=iwarn ) cbar = cbar + (ps-alph^2) * odcorhist$counts[j] csqbar = csqbar + (ps-alph^2)^2 * odcorhist$counts[j] } nc = sum(odcorhist$counts) cbar = cbar/nc csqbar = csqbar/nc cvar = (csqbar-cbar^2)/nc ans = d*alph*(1-alph) + d*(d-1)*cbar if( ans < 0 && iwarn)warning("Negative FD variance: consider using more correlations.") if( d*(d-1)*sqrt(abs(cvar)) > 0.1*max(ans,0) )warning("FD variance poorly determined: consider using more correlations.") } if( ans<0)plot(odcorhist$mids,odcorhist$counts) ans } critcor = function( alph, n, side="c" ){ # # Critical threshold values for correlation coefficient # between a vector of n IID N(mu,sigma^2) variables and # another arbitrary non-constant n-vector. # # For side == "c" reject at level alph if |cor| > critcor # "r" cor > critcor # "l" cor < critcor # # Note especially that for side == "l" rejection is for small cor # # Fold cases side = tolower(side) if( any( !(side %in% c("l","r","c"))))stop("side must be \"l\" or \"r\" or \"c\"") if( any(alph>1) )stop("alph must not be > 1") if( any(alph<0) )stop("alph must not be < 0") if( any(n<2) )warning("There is no critical correlation for n<2.") # Make scalar and vector inputs compatible invals = data.frame(alph=alph,n=n,side=side) alph = invals$alph; n=invals$n;side=as.character(invals$side) # Translate p values above 0.5 flipped = (side=="l" | side=="r") & alph > 0.5 flside = side flside[flipped & side=="l"] = "r" flside[flipped & side=="r"] = "l" flalph = alph flalph[flipped] = 1-alph[flipped] ans = alph*0 ans[ flside=="c" ] = qbeta( 1- flalph[flside=="c"], 1/2, (n[flside=="c"]-2)/2 )^.5 ans[ flside=="r" ] = qbeta( 1-2*flalph[flside=="r"], 1/2, (n[flside=="r"]-2)/2 )^.5 ans[ flside=="l" ] = -qbeta( 1-2*flalph[flside=="l"], 1/2, (n[flside=="l"]-2)/2 )^.5 ans } # # Adaptive quadrature to find probability of two tests simultaneously significant # pairsig = function( rho, n, alph, side="c", iwarn=T ){ side = tolower(side) # Fold cases if( ! ( side %in% c("l","r","c") )) stop(paste("Invalide side: ",side)) if( side=="c" ) return( 2*(pairsig(rho,n,alph/2,"r")+pairsig(-rho,n,alph/2,"r"))) if( rho <= -1 ) return(0) thresh = critcor( alph = alph, n=n, side="r") r2 = thresh^2 loww = 2*r2/(rho+1) if( loww >= 1 )return(0) hi = qbeta(1-1e-13,1,(n-3)/2) igrand = function(x){dbeta(x,1,(n-3)/2)*(2*acos(thresh/sqrt(x))-acos(rho))/(2*pi)} igral = integrate(igrand,loww,hi,subdivisions=10000,rel.tol=1e-9) if( igral$message != "OK" && iwarn )warning(igral$message) if( is.na(igral$value))print(igral$message) igral$value } varhattaylor = function( d, n, alph , side="c", eta, tausq ){ del = 0.01 slope = (pairsig(del,n,alph,side)-pairsig(-del,n,alph,side) )/(2*del) d*alph*(1-alph) + d*(d-1)* ( slope*eta + (alph^2-pairsig(0,n,alph,side))*( (n-1)*tausq-1)) } directsimvar = function( ydata, m=1000, alph, side="c", qsrc, xvals, plot=F, paired=T, raw=F,...){ # Direct simulation, of number of x vectors significantly correlated # to columns in ydata. x may be specified as an n-vector xvals from which # bootstrap samples are taken or as a quantile function qsrc that defaults to qbinom. # # When paired is true, the simulation is coupled to a simulation with # Gaussian x's. # # When plot is true, a plot is made of the sampled numbers of false discoveries. # # When raw is true, the simulated numbers of false discoveries are returned. # Otherwise just summary statistics are returned. n = nrow(ydata) # Nominal sample size neff = apply( !is.na(ydata),2,sum ) # Effective sample size sidesign = rep(1,length("side")) sidesign[side=="l"] = -1 if( !missing(xvals) ){ qsrc = function( u, ... ){ quantile(xvals,u)} if( length(xvals)!=n )stop("Wrong length for xvals") if( any(is.na(xvals)))stop("Missing xvals not allowed. (Missing ydata are ok)") } simdatN = matrix(rnorm(n*m),nrow=n) simdat = matrix(qsrc( pnorm(simdatN), ... ),ncol=m) simcor = t( t(abs(cor(simdat,ydata,use="pair"))) > critcor(alph=alph,n=neff,side=side) * sidesign ) nsig = apply(simcor,1,sum) if(paired){ simcorN = t( t(abs(cor(simdatN,ydata,use="pair"))) > critcor(alph=alph,n=neff,side=side) * sidesign ) nsigN = apply(simcorN,1,sum) ans = c( mean(nsigN),mean(nsig),var(nsigN),var(nsig),cor(nsigN,nsig,use="pair")) names(ans) = c("MuNorm","Mu","VarNorm","Var","CorWithNorm") }else{ ans = c( mean(nsig),var(nsig) ) names(ans) = c("Mu","Var") } if( plot )if(paired){plot(nsigN,nsig);abline(0,1)}else hist(nsig,50) if( raw && paired )ans = cbind(nsigN,nsig) if( raw && !paired )ans = nsig ans } corsampler = function(x, d1=500,d2=d1, byblock= T, m = 10000, repl=T){ # # Sample off diagonal correlations of columns in x # pairsampler = function( d, m = 10000, iid=T ){ # # Random sample from pairs of unequal # integers in 1 to d. Default (IID) samples # m pairs with replacement. If IID=F then # much slower sampling is done without replacement. # if( iid ) N = ceiling( runif(m) * d*(d-1)/2) else{ # too slow and too memory intensive to be the default if( m >= d*(d-1)/2 ) m = d*(d-1)/2 N = sample( d*(d-1)/2, m ) } j1 = 1 + floor( sqrt(2*N) - 0.5) j2 = N-(j1-1)*j1/2 j1 = j1 + 1 cbind(j1,j2) } if( byblock ){ dmax = ncol(x) if(d1+d2>dmax){d1=round(dmax/2);d2=dmax-d1} keep = sample(dmax,d1+d2) ans = cor(use="pair", x[,keep[1:d1]],x[,keep[(d1+1):(d1+d2)]]) } else{ if( repl ) ps = pairsampler(ncol(x),m,iid=T) else ps = pairsampler(ncol(x),m,iid=F) x1 = scale( x[,ps[,1]] ) x2 = scale( x[,ps[,2]] ) n = nrow(x1) ans = apply( x1*x2,2,sum,na.rm=T)/(n-1) } ans }