#SAM R associated functions # individual functions for each response type ttest.func <- function(x,y,s0=0){ n1 <- sum(y==1) n2 <- sum(y==2) p <- nrow(x) m1 <- rowMeans(x[,y==1]) m2 <- rowMeans(x[,y==2]) sd <- sqrt( ((n2-1) * varr(x[, y==2], meanx=m1) + (n1-1) * varr(x[, y==1], meanx=m2) )* (1/n1+1/n2)/(n1+n2-2) ) numer <- m2 - m1 dif.obs <- (numer)/(sd + s0) return(list(tt=dif.obs,numer=numer, sd=sd)) } onesample.ttest.func <- function(x,y,s0=0){ n <- length(y) x <- x*matrix(y,nrow=nrow(x),ncol=ncol(x),byrow=T) m <- rowMeans(x) sd <- sqrt( varr(x, meanx=m)/n ) dif.obs <- m/(sd + s0) return(list(tt=dif.obs, numer=m,sd=sd)) } paired.ttest.func <- function(x,y,s0=0,useden=T){ nc <- ncol(x)/2 o <- 1:nc o1 <- rep(0,ncol(x)/2);o2 <- o1 for(j in 1:nc){o1[j] <- (1:ncol(x))[y==-o[j]]} for(j in 1:nc){o2[j] <- (1:ncol(x))[y==o[j]]} d <- x[,o2]-x[,o1] su <- x[,o2]+x[,o1] den <- 1 sd <- NULL if(is.matrix(d)){ m <- rowMeans(d) } if(!is.matrix(d)) {m <- mean(d)} if(useden){ if(is.matrix(d)){ sd <- sqrt(varr(d, meanx=m)/nc)} if(!is.matrix(d)){sd <- sqrt(var(d)/nc)} den <- sd+s0 } dif.obs <- m/den return(list(tt=dif.obs, numer=m, sd=sd)) } cox.func <- function(x,y,status,s0=0){ scor <- coxscor(x,y, status)$scor sd <- sqrt(coxvar(x,y, status)) tt <- scor/(sd+s0) return(list(tt=tt, numer=scor, sd=sd)) } fisher.func <- function(x,y,s0=0){ #assumes y is coded 1,2... nn <- table(y) m <- matrix(0,nrow=nrow(x),ncol=length(nn)) v <- m for(j in 1:length(nn)){ m[,j] <- rowMeans(x[,y==j]) v[,j] <- (nn[j]-1)*varr(x[,y==j], meanx=m[,j]) } mbar <- rowMeans(x) mm <- m-matrix(mbar,nrow=length(mbar),ncol=length(nn)) fac <- (sum(nn)/prod(nn)) scor <- sqrt(fac*(apply(matrix(nn,nrow=nrow(m),ncol=ncol(m),byrow=T)*mm*mm,1,sum))) sd <- sqrt(apply(v,1,sum)*(1/sum(nn-1))*sum(1/nn)) tt <- scor/(sd+s0) return(list(tt=tt, numer=scor, sd=sd)) } quantitative.func <- function(x,y,s0=0){ yy <- y-mean(y) temp <- x%*%yy xx <- t(scale(t(x),scale=F)) sxx <- apply(xx^2,1,sum) scor <- temp/sxx b0hat <- mean(y)-scor*apply(x,1,mean) yhat <- matrix(b0hat,nrow=nrow(x),ncol=ncol(x))+x*matrix(scor,nrow=nrow(x),ncol=ncol(x)) ty <- matrix(y,nrow=nrow(yhat),ncol=ncol(yhat),byrow=T) sigma <- sqrt(apply( (ty-yhat)^2,1,sum)/(ncol(yhat)-2)) sd <- sigma/sqrt(sxx) tt <- scor/(sd+s0) return(list(tt=tt, numer=scor, sd=sd)) } timearea.func<- function(x, y, s0 = 0) { n <- ncol(x) xx <- 0.5 * (x[, 2:n] + x[, 1:(n - 1)]) * matrix(diff(y), nrow = nrow(x), ncol = n - 1, byrow = T) numer <- rowMeans(xx) sd <- sqrt(varr(xx, meanx=numer)/n) tt <- numer/sqrt(sd + s0) return(list(tt = tt, numer = numer, sd = sd)) } ######################################### detec11 <- function(a, del, params) { # find genes above and below the slab of half-width del n <- length(a$tt) tt <- a$tt evo <- a$evo numer <- a$tt*(a$sd+a$s0) tag <- order(tt) pup <- NULL foldchange.cond.up=rep(T,length(evo)) foldchange.cond.lo=rep(T,length(evo)) if(!is.null(a$foldchange[1]) & (params$min.foldchange>0)){ foldchange.cond.up= a$foldchange >= params$min.foldchange foldchange.cond.lo= a$foldchange <= 1/params$min.foldchange } o1 <- (1:n)[(tt[tag] - evo > del) & evo > 0 & foldchange.cond.up ] if(length(o1) > 0) { o1 <- o1[1] o11 <- o1:n o111 <- rep(F, n) o111[tag][o11] <- T pup <- (1:n)[o111] } plow <- NULL o2 <- (1:n)[(evo - tt[tag] > del) & evo < 0 & foldchange.cond.lo ] if(length(o2) > 0) { o2 <- o2[length(o2)] o22 <- 1:o2 o222 <- rep(F, n) o222[tag][o22] <- T plow <- (1:n)[o222] } return(list(plow=plow, pup=pup)) } f55 <- function(aa) { length(aa$pl) + length(aa$pu) } samr.compute.delta.table <- function(a, params, dels=NULL, nvals=50) { # computes delta table, starting with samr object "a", for nvals values of delta dels=seq(0,max(abs(sort(a$tt)-a$evo)), length=nvals) ttstar0 <- a$ttstar0 tt <- a$tt n <- a$n evo <- a$evo nsim <- ncol(ttstar0) res1 <- NULL for(ii in 1:length(dels)) { ttt <- detec11(a,dels[ii], params) cutup <- 10e9 if(length(ttt$pup>0)){ cutup <- min(a$tt[ttt$pup])} cutlow <- -10e9 if(length(ttt$plow)>0){cutlow <- max(a$tt[ttt$plow])} resup <- matrix(F, n, ncol = nsim) reslow <- resup for(j in 1:nsim) { jun <- list(evo = evo, tt = ttstar0[, j], sd = a$sdstar[ , j], s0 = a$s0, foldchange=a$foldchange.star[,j]) a2 <- detec.horiz(jun,cutlow,cutup, params) resup[a2$pup, j] <- T reslow[a2$plow, j] <- T } errup <- apply(resup, 2, sum) errlow <- apply(reslow, 2, sum) s <- sqrt(var(errup)/nsim + var(errlow)/nsim) gmed <- median(errup + errlow) g90=quantile(errup + errlow, .90) cat(c(dels[ii], errlow + errup), fill = T) cat("", fill = T) g2 <- f55(ttt) res1 <- rbind(res1, c(a$pi0*gmed, a$pi0*g90, g2, a$pi0*gmed/g2, a$pi0*g90/g2, cutlow,cutup)) } res1 <- cbind(dels, res1) dimnames(res1) <- list(NULL, c("delta", "# med false pos", "90th perc false pos", "# called", "median FDR", "90th perc FDR","cutlo","cuthi")) return(res1) } detec.horiz <- function(a,cutlow,cutup, params){ # find genes above or below horizontal cutpoints dobs <- a$tt n <- length(dobs) foldchange.cond.up=rep(T,n) foldchange.cond.lo=rep(T,n) if(!is.null(a$foldchange[1]) & (params$min.foldchange>0)){ foldchange.cond.up= a$foldchange >= params$min.foldchange foldchange.cond.lo= a$foldchange <= 1/params$min.foldchange } pup <- (1:n)[dobs> cutup & foldchange.cond.up] plow <- (1:n)[dobs< cutlow & foldchange.cond.lo] return(list(plow=plow,pup=pup)) } samr.plot <- function(a, del, params, plot=TRUE) { # make observed-expected plot # # takes foldchange into account too # also returns info needed to make plot in Excel # xval, yval and col= color for each point (1=black, 2=red for top right, 3=green for bottom left) LARGE=10e9 b <- detec11(a, del, params) bb <- c(b$pup,b$plow) b1= LARGE b0=-LARGE if(!is.null(b$pup)){b1 <- min(a$tt[b$pup])} if(!is.null(b$plow)){b0 <- max(a$tt[b$plow])} c1 <- (1:a$n)[sort(a$tt)>=b1] c0 <- (1:a$n)[sort(a$tt)<=b0] c2 <- c(c0,c1) foldchange.cond.up=rep(T,length(a$evo)) foldchange.cond.lo=rep(T,length(a$evo)) if(!is.null(a$foldchange[1]) & (params$min.foldchange>0)){ foldchange.cond.up= a$foldchange >= params$min.foldchange foldchange.cond.lo= a$foldchange <= 1/params$min.foldchange } col=rep(1,length(a$evo)) col[b$plow]=3 col[b$pup]=2 if(!is.null(a$foldchange[1]) & (params$min.foldchange>0)){ col[!foldchange.cond.lo & !foldchange.cond.up]=1 } col.ordered=col[order(a$tt)] if(plot){ ylims <- range(a$tt) xlims <- range(a$evo) plot(a$evo,sort(a$tt),xlab="expected score", ylab="observed score",ylim=ylims, xlim=xlims, type="n") points(a$evo,sort(a$tt),col=col.ordered) abline(0,1) abline(del,1,lty=2) abline(-del,1,lty=2) } return(list(xval=a$evo,yval=sort(a$tt), col=col.ordered)) } stand <- function(x, use.for.stand = 1:ncol(x)) { n <- ncol(x) xx <- sign(x) * (abs(x))^{ 1/3 } stn <- apply(xx[, use.for.stand], 1, mean) xxx <- xx - xx for(j in 1:n) { a <- lsfit(stn, xx[, j]) xxx[, j] <- (xx[, j] - a$coef[1])/a$coef[2] } x3 <- xxx^3 return(x3) } localfdr <- function(a, params, perc=.01, df=10) { # estimates local fdr at score "d", using SAM object "a" # "d" can be a vector of d scores # returns estimate of symmetric fdr # to use: first run SAM in Splus, and then pass the resulting fit object to # localfdr d=seq(min(a$tt),max(a$tt),length=200) ndscore <- length(d) dvector <- rep(NA,ndscore) ind.foldchange=rep(T,length(a$tt)) if(!is.null(a$foldchange[1]) & params$min.foldchange>0){ ind.foldchange= (a$foldchange>= params$min.foldchange) | (a$foldchange<= params$min.foldchange) } for(i in 1:ndscore) { pi0<-a$pi0 r <- sum(a$tt0) { r2 <- min(r+length(a$tt)*perc, length(a$tt)) dup <- sort(a$tt)[r2] dlow <- d[i] } o <- a$tt>dlow & a$tt< dup & ind.foldchange oo <- a$tt>dlow.sym & a$tt< dup.sym & ind.foldchange nsim <- ncol(a$ttstar) fdr <- rep(NA,nsim) fdr2 <- fdr for(j in 1:nsim) { ind.foldchange=rep(T,length(a$tt)) if(!is.null(a$foldchange[1]) & params$min.foldchange>0){ ind.foldchange=(a$foldchange.star[,j]>= params$min.foldchange) | (a$foldchange.star[,j]<= params$min.foldchange) } fdr[j] <- pi0*sum(a$ttstar0[,j]>dlow & a$ttstar0[,j]dlow.sym & a$ttstar0[,j]0)){ res.up=cbind(sig$pup+1,data$genenames[sig$pup],data$geneid[sig$pup],a$tt[sig$pup],a$numer[sig$pup],a$sd[sig$pup], a$foldchange[sig$pup],qvalues$qvalue.up, fdr.up) dimnames(res.up)=list(NULL,c("Row","Gene Name","Gene ID", "Score(d)", "Numerator(r)","Denominator(s+s0)", "Fold Change", "q-value","localfdr")) res.lo=cbind(sig$plo+1,data$genenames[sig$plo],data$geneid[sig$plo],a$tt[sig$plo],a$numer[sig$plo],a$sd[sig$plo], a$foldchange[sig$plo],qvalues$qvalue.lo, fdr.lo) } if( (a$resp.type!="twoclass" & a$resp.type!="paired") | (params$min.foldchange==0)){ res.up=cbind(sig$pup+1,data$genenames[sig$pup],data$geneid[sig$pup],a$tt[sig$pup],a$numer[sig$pup],a$sd[sig$pup], a$foldchange[sig$pup],qvalues$qvalue.up, fdr.up) dimnames(res.up)=list(NULL,c("Row","Gene Name","Gene ID", "Score(d)", "Numerator(r)","Denominator(s+s0)","q-value","localfdr")) res.lo=cbind(sig$plo+1,data$genenames[sig$plo],data$geneid[sig$plo],a$tt[sig$plo],a$numer[sig$plo],a$sd[sig$plo], a$foldchange[sig$plo],qvalues$qvalue.lo, fdr.lo) } dimnames(res.lo)=dimnames(res.up) o1=order(-a$tt[sig$pup]) res.up=res.up[o1,] o2=order(a$tt[sig$plo]) res.lo=res.lo[o2,] return(list(genes.up=res.up,genes.lo=res.lo)) } qvalue.func=function(a,sig, delta.table){ qvalue.up=rep(NA,length(sig$pup)) o1=sig$pup cutup=delta.table[,8] FDR=delta.table[,5] ii=0 for(i in o1){ o= abs(cutup-a$tt[i]) o[is.na(o)]=LARGE oo=(1:length(o))[o==min(o)] oo=oo[length(oo)] ii=ii+1 qvalue.up[ii]= FDR[oo] } qvalue.lo=rep(NA,length(sig$pup)) o2=sig$plo cutlo=delta.table[,7] ii=0 for(i in o2){ o= abs(cutlo-a$tt[i]) o[is.na(o)]=LARGE oo=(1:length(o))[o==min(o)] oo=oo[length(oo)] ii=ii+1 qvalue.lo[ii]= FDR[oo] } return(list(qvalue.lo=qvalue.lo,qvalue.up=qvalue.up)) } foldchange.twoclass=function(x,y, logged2){ if(logged2){x=2^x} n1 <- sum(y==1) n2 <- sum(y==2) p <- nrow(x) m1 <- rowMeans(x[,y==1]) m2 <- rowMeans(x[,y==2]) return(m2/m1) } foldchange.paired=function(x,y,logged2){ if(logged2){x=2^x} nc <- ncl(x)/2 o <- 1:nc o1 <- rep(0,ncol(x)/2);o2 <- o1 for(j in 1:nc){o1[j] <- (1:ncol(x))[y==-o[j]]} for(j in 1:nc){o2[j] <- (1:ncol(x))[y==o[j]]} d <- x[,o2]/x[,o1] m <- rowMeans(d) return(m) } est.s0<-function(tt,sd,s0.perc=seq(0,.99,len=40)){ # estimate s0 (exchangeability) factor for denominator. # returns the actual estimate s0 (not a percentile) a<-cut(sd,breaks=quantile(sd,seq(0,1,len=101)),labels=F) a[is.na(a)]<-1 cv.sd<-rep(0,length(s0.perc)) for(j in 1:length(s0.perc)){ w<-quantile(sd,s0.perc[j]) w[j==1]<-0 tt2<-tt*sd/(sd+w) sds<-rep(0,100) for(i in 1:100){ sds[i]<-mad(tt2[a==i]) } cv.sd[j]<-sqrt(var(sds))/mean(sds) } o=(1:length(s0.perc))[cv.sd==min(cv.sd)] s0.hat=min(sd[o]) return(list(s0.perc=s0.perc,cv.sd=cv.sd, s0.hat= s0.hat)) } coxscor <- function(x, y, ic, offset = rep(0., length(y))) { # computes cox scor function for rows of nx by n matrix x # first put everything in time order n <- length(y) nx <- nrow(x) yy <- y + (ic == 0.) * (1e-05) otag <- order(yy) y <- y[otag] ic <- ic[otag] x <- x[, otag, drop = F] #compute unique failure times, d=# of deaths at each failure time, #dd= expanded version of d to length n, s=sum of covariates at each # failure time, nn=#obs in each risk set, nno=sum(exp(offset)) at each failure time offset <- offset[otag] a <- coxstuff(x, y, ic, offset = offset) nf <- a$nf fail.times <- a$fail.times s <- a$s d <- a$d dd <- a$dd nn <- a$nn nno <- a$nno w <- rep(0., nx) for(i in (1.:nf)) { w <- w + s[, i] oo<- (1.:n)[y >= fail.times[i]] r<-rowSums(x[, oo, drop = F] * exp(offset[oo])) w<- w - (d[i]/nno[i])*r } return(list(scor = w, coxstuff.obj = a)) } coxvar <- function(x, y, ic, offset = rep(0., length(y)), coxstuff.obj = NULL) { # computes information elements (var) for cox # x is nx by n matrix of expression values nx <- nrow(x) n <- length(y) yy <- y + (ic == 0.) * (1e-06) otag <- order(yy) y <- y[otag] ic <- ic[otag] x <- x[, otag, drop = F] offset <- offset[otag] if(is.null(coxstuff.obj)) { coxstuff.obj <- coxstuff(x, y, ic, offset = offset) } nf <- coxstuff.obj$nf fail.times <- coxstuff.obj$fail.times s <- coxstuff.obj$s d <- coxstuff.obj$d dd <- coxstuff.obj$dd nn <- coxstuff.obj$nn nno <- coxstuff.obj$nno x2<- x^2 oo <- (1.:n)[y >= fail.times[1] ] sx<-(1/nno[1])*rowSums(x[, oo] * exp(offset[oo])) s<-(1/nno[1])*rowSums(x2[, oo] * exp(offset[oo])) w <- d[1] * (s - sx * sx) for(i in 2.:nf) { oo <- (1.:n)[y >= fail.times[i-1] & y < fail.times[i] ] sx<-(1/nno[i])*(nno[i-1]*sx-rowSums(x[, oo,drop=F] * exp(offset[oo]))) s<-(1/nno[i])*(nno[i-1]*s-rowSums(x2[, oo,drop=F] * exp(offset[oo]))) w <- w + d[i] * (s - sx * sx) } return(w) } coxstuff<- function(x, y, ic, offset = rep(0., length(y))) { fail.times <- unique(y[ic == 1.]) nf <- length(fail.times) n <- length(y) nn <- rep(0., nf) nno <- rep(0., nf) for(i in 1.:nf) { nn[i] <- sum(y >= fail.times[i]) nno[i] <- sum(exp(offset)[y >= fail.times[i]]) } s <- matrix(0., ncol = nf, nrow = nrow(x)) d <- rep(0., nf) #expand d out to a vector of length n for(i in 1.:nf) { o <- (1.:n)[(y == fail.times[i]) & (ic == 1.)] d[i] <- length(o) } oo <- match(y, fail.times) oo[ic==0]<-NA oo[is.na(oo)]<- max(oo[!is.na(oo)])+1 s<-t(rowsum(t(x),oo)) if(ncol(s)> nf){s<-s[,-ncol(s)]} dd <- rep(0., n) for(j in 1.:nf) { dd[(y == fail.times[j]) & (ic == 1.)] <- d[j] } return(list(fail.times=fail.times, s=s, d=d, dd=dd, nf=nf, nn=nn, nno=nno)) } samr.missrate <- function(a,quant=c(.75, .80, .85,.90,.95) ){ # estimate miss rate from sam object "a" cuts=quantile(a$tt,quant) ncuts <- length(cuts) ngenes <- rep(NA,ncuts) ngenes0 <- rep(NA,ncuts) ngenes2 <- rep(NA,ncuts) missrate <- rep(NA,ncuts) nperm=ncol(a$ttstar) for(j in 1:(ncuts-1)){ ngenes2[j] <- sum(a$tt >cuts[j] & a$tt< cuts[j+1]) ngenes0[j] <- sum(a$ttstar >cuts[j] & a$ttstar< cuts[j+1])/nperm missrate[j] <- (ngenes2[j]- a$pi0*ngenes0[j])/ngenes2[j] missrate[j] <- max(missrate[j],0) } return(list(quant=quant,cuts=cuts, missrate=missrate)) } varr <- function(x, meanx=NULL){ n <- ncol(x) p <- nrow(x) Y <-matrix(1,nrow=n,ncol=1) if(is.null(meanx)){ meanx <- rowMeans(x)} ans<- rep(1, p) xdif <- x - meanx %*% t(Y) ans <- (xdif^2) %*% rep(1/(n - 1), n) ans <- drop(ans) return(ans) }