#### CORE ROUTINES: #### ebayes.l2e: 2 GROUP DIFFERENTIAL EXPRESSION ANALYSIS VIA PARTIAL MIXTURE ESTIMATION #### qqnorm.pme: MODEL ASSESSMENT VIA QQ-NORMAL PLOT ebayes.l2e <- function(x,group,l2e=FALSE,l2e.adjP=FALSE,wl2e=TRUE,wl2e.adjP=TRUE,ttest=FALSE,wilctest=FALSE,sd.tstat=FALSE,fdr=0.05,fmethod='kernel',maxrew=10,nufix) { # INPUT # x: normalized expression data (genes in rows, biological samples in columns). If a vector is supplied it is treated as the test statistic and only partial mixture methods (L2E, WL2E) are available # group: vector of 1's and 2's indicating the group to which the columns of x belong # l2e: use L2E to estimate null distrib # l2e.adjP: use L2E to estimate null distrib, Wald test stat and Benjamini&Hochberg's p-value adjustment # wl2e: use weighted L2E to estimate null distrib # wl2e.adjP: use weighted L2E to estimate null distrib, Wald test stat and Benjamini&Hochberg's p-value adjustment # ttest: use two-sample t-tests (assuming equal variances) with Benjamini&Hochberg's p-value adjustment # wilctest: use two-sample Wilcoxon tests (asymptotic p-vals computed, with continuity correction) with BH p-value adjustment # sd.tstat: if FALSE, the test stat for L2E and WL2E is the difference between the 2 group means. If TRUE, it is divided by its SD as in a regular t-test. # fdr: desired FDR level # fmethod: 'kernel' to use kernel estimate of the overall distribution (currently it's the only option available) # maxrew: maximum number of iterative re-weightings in the weighted L2E fitting # nufix: if specified, the partial mixture fits a Student's t component with nufix degrees of freedom if (l2e==F&l2e.adjP==F&wl2e==F&wl2e.adjP==F&ttest==F&wilctest==F) stop('At least one of l2e,l2e.adjP,wl2e,wl2e.adjP,ttest or wilctest has to be set to TRUE') if (fdr>=1 | fdr<=0) stop('fdr must be between 0 and 1') if (is.matrix(x)) { if (missing(group)) stop('If x is a matrix the argument group must be provided') if (length(table(group))>2) stop('groups can only indicate 2 groups') if (sum((group!=1)&&(group!=2))>0) { group <- as.numeric(factor(group)); warning('group labels converted to 1,2') } } #Load library & Define useful functions logit <- function(x) { log((x+.1)/(1-x+.1)) } pnorm.mix <- function(x,mu,sd,probs) { colSums(probs * pnorm(matrix(rep(x,length(mu)),nrow=length(mu),byrow=T),mu,sd)) } dnorm.mix <-function(x,mu,sd,probs,log) {colSums(probs*dnorm(matrix(rep(x,length(mu)),nrow=length(mu),byrow=T),mu,sd,log))} dnct <- function(x,v,mu,sig) { return( dt((x-mu)/sqrt(sig),df=v)/sqrt(sig) ) } twilc <- function(i,x,group) { return(wilcox.test(as.numeric(x[i,group==1]),as.numeric(x[i,group==2]))$statistic) } pwilc <- function(i,x,group) { return(wilcox.test(as.numeric(x[i,group==1]),as.numeric(x[i,group==2]))$p.value) } #Obtain test statistics if (!is.vector(x)) { n <- as.integer(table(group)); m1 <- rowMeans(x[,group==1],na.rm=TRUE); m2 <- rowMeans(x[,group==2],na.rm=TRUE) v1 <- (rowMeans(x[,group==1]^2,na.rm=TRUE)-m1^2) * n[1]/(n[1]-1); v2 <- (rowMeans(x[,group==2]^2,na.rm=TRUE)-m2^2) * n[2]/(n[2]-1) s <- sqrt(((n[1]-1)*v1+(n[2]-1)*v2)/(sum(n)-2))*sqrt(1/n[1]+1/n[2]) if (sd.tstat==TRUE) { fudge <- quantile(s,probs=.01) tstat <- (m1-m2)/s; tstat[s1,1,l2e.pde); l2e.pde <- ifelse(l2e.pde<0,0,l2e.pde) r.l2e <- find.threshold(l2e.pde,fdr)$threshold rej.l2e <- ((l2e.pde>r.l2e) & (tstat>mu0.l2e)) - ((l2e.pde>r.l2e) & (tstat1,1,wl2e.pde); wl2e.pde <- ifelse(wl2e.pde<0,0,wl2e.pde) r.wl2e <- find.threshold(wl2e.pde,fdr)$threshold rej.wl2e <- ((wl2e.pde>r.wl2e) & (tstat>mu0.wl2e)) - ((wl2e.pde>r.wl2e) & (tstatmu0.l2e) - (l2e.adjPmu0.wl2e) - (wl2e.adjP0) - (ttest.adjPmean(tstat.wilc)) - (wilctest.adjP0) v <- v[!is.na(v)] if (missing(threshold)) threshold <- seq(min(v),max(v)-diff(range(v))/100,diff(range(v))/100) fdr <- fnr <- rep(NA,length(threshold)) for (i in 1:length(threshold)) { fdr[i] <- sum((v>=threshold[i])*(1-v))/sum(v>=threshold[i]) fnr[i] <- sum((v1) & (abs(fdr-fdrest)>.001)) { if (fdrest>fdr) { imin <- i } else { imax <- i } i <- round(.5*(imin+imax)) fdrest <- find.fdr(pde,pde[i])$fdr } return(list(threshold=pde[i],fdrest=fdrest)) } ### ROUTINES TO FIT UNIVARIATE WEIGHTED & RE-WEIGHTED L2E ### rewupdc <- function(x, mu0, sig0, mu, sig, w, wmax=1, maxrew=10, tol=.01) { # rewupdc: Re-weighted L2E # mu0: mean of the weighting distribution. Defaults to the mean of an non-weighted L2E fit # sig0: variance of the weighting distribution. Defaults to the var of a non-weighted L2E fit # mu: initial guess (defaults to mu0) # sig: initial guess (defaults to sig0) # w: initial guess (defaults to .9*wmax) # wmax: upper bound for w (default wmax=1) # maxrew: iterative fitting stops when maxrew fits have been performed or when the change in the parameters is smaller than tol # tol: iterative fitting stops when maxrew fits have been performed or when the change in the parameters is smaller than tol if (!is.vector(x)) stop("x must be a vector") if (missing(mu0) | missing(sig0)) { z <- mpdc(x); mu0 <- z$m; sig0 <- z$sig } if (sig0<0) stop('sig0 must be positive') if (missing(mu)) { mu <- mu0 }; if (missing(sig)) { sig <- sig0 }; if (missing(w)) { w <- .9*wmax } logit <- function(x) { log((x+.1)/(1-x+.1)) } fit <- wupdc(x,mu0,sig0) eps <- (mu-fit$m)^2 + (log(sig/fit$sig))^2 + (logit(w)-logit(fit$w))^2 mu <- fit$m; sig <- fit$sig; w <- fit$w i <- 1 while (eps>tol & i<=maxrew) { fit <- wupdc(x,mu,sig) eps <- (mu-fit$m)^2 + (log(sig/fit$sig))^2 + (logit(w)-logit(fit$w))^2 mu <- fit$m; sig <- fit$sig; w <- fit$w i <- i+1 } if ((i==maxrew) && (eps>tol)) { warning('Iteration limit reached without achieving convergence') } return(list(m=mu,sig=sig,w=w,min=fit$min,wmax=fit$wmax,niter=i)) } wupdc <- function(x, mu0, sig0, mu, sig, w, wmax=1) { ## Weighted Univariate Partial Density Component Estimation (wupdc) ## Purpose: fit model w N(mu,sig) weighted according to N(mu0,sig0) ## Inputs: ## x - n x d input data matrix (d=1 vector OK) ## mu0 - mean of the weighting normal pdf ## sig0 - var of the weighting normal pdf ## mu - input guess for mean (default is mu0) ## sig - input guess for variance (default is sig0) ## w - input guess for weight (default w=.9*wmax) ## wmax - upper bound for w (default wmax=1) ## Output: ## list containing estimated (m=mean sig=sig w=w min=min) if (!is.vector(x)) stop("x must be a vector") if (missing(mu0)) { mu0 <- mean(x) } if (missing(sig0)) { sig0 <- var(x) } if (sig0<0) stop('sig0 must be positive') if (missing(mu)) mu <- mu0 if(missing(sig)) { sig <- sig0 } else { if (sig<=0) stop("sig must be positive!") } if (missing(w)) { w <- 0.9*wmax } else { if (w <= 0 | w>=wmax) stop("w must be between 0 and wmax") } logit <- function(x) { log((x+.1)/(1-x+.1)) } pdc <- function(th) { mu <- th[1] sig <- exp(th[2]) w <- wmax/(1+exp(-th[3])) t1 <- exp((mu0*sig+2*mu*sig0)^2 /(2*sig0*sig*(sig+2*sig0)) -mu^2/sig-.5*mu0^2/sig0)/(2*pi*sqrt(sig)*sqrt(sig+2*sig0)) t2 <- mean(dnorm(x,mu0,sqrt(sig0))*dnorm(x,mu,sqrt(sig))) w^2 * t1 - 2 * w * t2 } x0 <- c(mu, log(sig), log((w/wmax)/(1-w/wmax))) ans <- nlm(pdc, x0, print.level=0, iterlim=100) m.ans <- ans$estimate[1] sig.ans <- exp(ans$estimate[2]) w.ans <- wmax/(1+exp(-ans$estimate[3])) list(m=m.ans, sig=sig.ans, w=w.ans, min=ans$minimum, wmax=wmax) } wupdc.findw <- function(x, nu, weight=TRUE, f0='t') { ## Find weight for Weighted Univariate Partial Density Component Estimation ## Purpose: fit model w N(0,1) or w t_nu, where t_nu is Student's t distribution with nu degrees ## of freedom. ## Inputs: ## x - n x d input data matrix (d=1 vector OK) ## nu - degrees of freedom (only used if f0=='t') ## weight - if set to TRUE, minimization is weighted according to N(0,1) or t_nu ## f0 - f0=='t' uses t component; f0=='normal' uses normal component ## Output: ## list containing estimated (m=mean sig=sig w=w min=min) if (!is.vector(x)) stop("x must be a vector") if (f0=='t') { if (missing(nu)) stop('t-Student fit requires the degrees of freedom nu') } if ((f0!='normal') & (f0!='t')) stop('not valid value was specified for f0') mu <- 0; sig <- 1 if (f0=='normal') { if (weight==FALSE) { a <- 1/(2*sqrt(pi*sig)); b <- 2*mean(dnorm(x,mu,sqrt(sig))) } else { m <- 3*mu/sig; v <- 3/sig a <- exp(-1.5*mu^2/sig + .5*m^2/v)/(2*pi*sig^(3/2)*sqrt(v)) b <- 2*mean(dnorm(x,mu,sqrt(sig))^2) } w <- b/(2*a) } else { if (weight==FALSE) { k <- exp(lgamma(nu/2)-lgamma((nu+1)/2)+lgamma(nu+1)-lgamma(nu+1/2)) w <- k * mean((1+(x-mu)^2/(sig*nu))^(-(nu+1)/2)) } else { k <- exp(lgamma(nu/2)-lgamma((nu+1)/2)+lgamma((3*nu+3)/2)-lgamma((3*nu+2)/2)) w <- k * mean((1+(x-mu)^2/(sig*nu))^(-nu-1)) } } return(w) }