#### CORE ROUTINES:
####   ebayes.l2e: 2 GROUP DIFFERENTIAL EXPRESSION ANALYSIS VIA PARTIAL MIXTURE ESTIMATION
####   qqnorm.pme: MODEL ASSESSMENT VIA QQ-NORMAL PLOT

ebayes.l2e <- function(x,group,l2e=FALSE,l2e.adjP=FALSE,wl2e=TRUE,wl2e.adjP=TRUE,ttest=FALSE,wilctest=FALSE,sd.tstat=FALSE,fdr=0.05,fmethod='kernel',maxrew=10,nufix) {

# INPUT
# x: normalized expression data (genes in rows, biological samples in columns). If a vector is supplied it is treated as the test statistic and only partial mixture methods (L2E, WL2E) are available
# group: vector of 1's and 2's indicating the group to which the columns of x belong
# l2e: use L2E to estimate null distrib
# l2e.adjP: use L2E to estimate null distrib, Wald test stat and Benjamini&Hochberg's p-value adjustment
# wl2e: use weighted L2E to estimate null distrib
# wl2e.adjP: use weighted L2E to estimate null distrib, Wald test stat and Benjamini&Hochberg's p-value adjustment
# ttest: use two-sample t-tests (assuming equal variances) with Benjamini&Hochberg's p-value adjustment
# wilctest: use two-sample Wilcoxon tests (asymptotic p-vals computed, with continuity correction) with BH p-value adjustment
# sd.tstat: if FALSE, the test stat for L2E and WL2E is the difference between the 2 group means. If TRUE, it is divided by its SD as in a regular t-test.
# fdr: desired FDR level
# fmethod: 'kernel' to use kernel estimate of the overall distribution (currently it's the only option available)
# maxrew: maximum number of iterative re-weightings in the weighted L2E fitting
# nufix: if specified, the partial mixture fits a Student's t component with nufix degrees of freedom
  
if (l2e==F&l2e.adjP==F&wl2e==F&wl2e.adjP==F&ttest==F&wilctest==F) stop('At least one of l2e,l2e.adjP,wl2e,wl2e.adjP,ttest or wilctest has to be set to TRUE')
if (fdr>=1 | fdr<=0) stop('fdr must be between 0 and 1')
if (is.matrix(x)) {
  if (missing(group)) stop('If x is a matrix the argument group must be provided')
  if (length(table(group))>2) stop('groups can only indicate 2 groups') 
  if (sum((group!=1)&&(group!=2))>0) { group <- as.numeric(factor(group)); warning('group labels converted to 1,2') }
}

#Load library & Define useful functions
logit <- function(x) { log((x+.1)/(1-x+.1)) }
pnorm.mix <- function(x,mu,sd,probs) {  colSums(probs * pnorm(matrix(rep(x,length(mu)),nrow=length(mu),byrow=T),mu,sd)) }
dnorm.mix <-function(x,mu,sd,probs,log) {colSums(probs*dnorm(matrix(rep(x,length(mu)),nrow=length(mu),byrow=T),mu,sd,log))}
dnct <- function(x,v,mu,sig) { return( dt((x-mu)/sqrt(sig),df=v)/sqrt(sig) ) }

twilc <- function(i,x,group) { return(wilcox.test(as.numeric(x[i,group==1]),as.numeric(x[i,group==2]))$statistic) }
pwilc <- function(i,x,group) { return(wilcox.test(as.numeric(x[i,group==1]),as.numeric(x[i,group==2]))$p.value) }


#Obtain test statistics
if (!is.vector(x)) {
  n <- as.integer(table(group)); m1 <- rowMeans(x[,group==1],na.rm=TRUE); m2 <- rowMeans(x[,group==2],na.rm=TRUE)
  v1 <- (rowMeans(x[,group==1]^2,na.rm=TRUE)-m1^2) * n[1]/(n[1]-1); v2 <- (rowMeans(x[,group==2]^2,na.rm=TRUE)-m2^2) * n[2]/(n[2]-1)
  s <- sqrt(((n[1]-1)*v1+(n[2]-1)*v2)/(sum(n)-2))*sqrt(1/n[1]+1/n[2])
  if (sd.tstat==TRUE) {
    fudge <- quantile(s,probs=.01)
    tstat <- (m1-m2)/s; tstat[s<fudge] <- (m1[s<fudge]-m2[s<fudge])/fudge
  } else {
    tstat <- m1-m2
  }
  if (ttest==T) { tstat.ttest <- (m1-m2)/s } else { tstat.ttest <- NA }
} else { tstat <- x; tstat.ttest <- NA }

#Estimate overall distribution ('density' function of 'mclust' package)
if (l2e==T | wl2e==T) {
  if (missing(nufix)) {
    f <- density(tstat,na.rm=TRUE)     #use Normal kernel
    f <- approx(f$x,f$y,xout=tstat)$y
  } else {
    library(quantreg)
    f <- akj(x=tstat,z=tstat,iker1=1)$dens  #use Cauchy kernel
  }
} else { f <- NA }

#Estimate null distribution
if (missing(nufix)) {
  if (l2e==T | l2e.adjP==T) {
    t0.l2e <- mpdc(tstat); mu0.l2e <- t0.l2e$m; sd0.l2e <- sqrt(t0.l2e$sig); w0.l2e <- t0.l2e$w
  }
  if (wl2e==T | wl2e.adjP==T) {
    t0.wl2e <- rewupdc(tstat, maxrew=maxrew, tol=.01)
    mu0.wl2e <- t0.wl2e$m; sd0.wl2e <- sqrt(t0.wl2e$sig); w0.wl2e <- t0.wl2e$w
  }
} else {
  if (l2e==T | l2e.adjP==T) {
    mu0.l2e <- 0; sd0.l2e <- 1
    w0.l2e <- wupdc.findw(tstat, nu=nufix, weight=FALSE, f0='t')
  }
  if (wl2e==T | wl2e.adjP==T) {
    mu0.wl2e <- 0; sd0.wl2e <- 1
    w0.wl2e <- wupdc.findw(tstat, nu=nufix, weight=TRUE, f0='t')
  }
}

# Determine differentially expressed genes based on posterior prob of DE
if (l2e==T) {
  if (missing(nufix)) {
    l2e.pde <- 1 - w0.l2e*dnorm(tstat,mu0.l2e,sd0.l2e)/f
  } else {
    l2e.pde <- 1 - w0.l2e*dt(tstat,df=nufix)/f
  }
  l2e.pde <- ifelse(l2e.pde>1,1,l2e.pde); l2e.pde <- ifelse(l2e.pde<0,0,l2e.pde)
  r.l2e <- find.threshold(l2e.pde,fdr)$threshold
  rej.l2e <- ((l2e.pde>r.l2e) & (tstat>mu0.l2e)) - ((l2e.pde>r.l2e) & (tstat<mu0.l2e))
} else { l2e.pde <- rej.l2e <- NA }
if (wl2e==T) {
  if (missing(nufix)) {
    wl2e.pde <- 1 - w0.wl2e*dnorm(tstat,mu0.wl2e,sd0.wl2e)/f
  } else {
    wl2e.pde <- 1 - w0.wl2e*dt(tstat,df=nufix)/f
  }
  wl2e.pde <- ifelse(wl2e.pde>1,1,wl2e.pde); wl2e.pde <- ifelse(wl2e.pde<0,0,wl2e.pde)
  r.wl2e <- find.threshold(wl2e.pde,fdr)$threshold
  rej.wl2e <- ((wl2e.pde>r.wl2e) & (tstat>mu0.wl2e)) - ((wl2e.pde>r.wl2e) & (tstat<mu0.wl2e))
} else { wl2e.pde <- rej.wl2e <- NA }

# Determine differentially expressed genes based on P-value adjustment
if ((l2e.adjP==F) & (l2e==F)) { mu0.l2e <- sd0.l2e <- w0.l2e <- NA }
if ((wl2e.adjP==F) & (wl2e==F)) { mu0.wl2e <- sd0.wl2e <- w0.wl2e <- NA }
if(l2e.adjP==T){
  wald.l2e <- (tstat-mu0.l2e)/sd0.l2e
  if (missing(nufix)) {
    l2e.adjP <- p.adjust(2*(1-pnorm(abs(wald.l2e))),method='BH')
  } else {
    l2e.adjP <- p.adjust(w0.l2e*2*(1-pt(abs(wald.l2e),df=nufix)),method='BH')
  }
  rej.l2e.adjP <- (l2e.adjP<fdr & tstat>mu0.l2e) - (l2e.adjP<fdr & tstat<mu0.l2e)
} else {l2e.adjP <- rej.l2e.adjP <- NA}
if(wl2e.adjP==T){
  wald.wl2e <- (tstat-mu0.wl2e)/sd0.wl2e
  if (missing(nufix)) { 
    wl2e.adjP <- p.adjust(2*(1-pnorm(abs(wald.wl2e))),method='BH')
  } else {
    wl2e.adjP <- p.adjust(w0.wl2e*2*(1-pt(abs(wald.wl2e),df=nufix)),method='BH')    
  }
  rej.wl2e.adjP <- (wl2e.adjP<fdr & tstat>mu0.wl2e) - (wl2e.adjP<fdr & tstat<mu0.wl2e)
} else {wl2e.adjP <- rej.wl2e.adjP <- NA}
if (ttest==T) {
  ttest.adjP <- 2*(1-pt(abs(tstat.ttest),df=sum(n)-2));
  ttest.adjP <- p.adjust(ttest.adjP,method='BH')
  rej.ttest <- (ttest.adjP<fdr & tstat>0) - (ttest.adjP<fdr & tstat<0)
} else { ttest.adjP <- rej.ttest <- NA }
if (wilctest==T) {
  tstat.wilc <- sapply(1:nrow(x),'twilc',x=x,group=group)
  wilctest.adjP <- sapply(1:nrow(x),'pwilc',x=x,group=group)
  wilctest.adjP <- p.adjust(wilctest.adjP,method='BH')
  rej.wilctest <- (wilctest.adjP<fdr & tstat.wilc>mean(tstat.wilc)) - (wilctest.adjP<fdr & tstat.wilc<mean(tstat.wilc))
} else { tstat.wilc <- wilctest.adjP <- rej.wilctest <- NA }

return(list(tstat=tstat,l2e.pde=l2e.pde,rej.l2e=rej.l2e,wl2e.pde=wl2e.pde,rej.wl2e=rej.wl2e,l2e.adjP=l2e.adjP, rej.l2e.adjP=rej.l2e.adjP, wl2e.adjP=wl2e.adjP, rej.wl2e.adjP=rej.wl2e.adjP,tstat.ttest=tstat.ttest,ttest.adjP=ttest.adjP,rej.ttest=rej.ttest,tstat.wilc=tstat.wilc,wilctest.adjP=wilctest.adjP,rej.wilctest=rej.wilctest,mu0.l2e=mu0.l2e,sd0.l2e=sd0.l2e,w0.l2e=w0.l2e,mu0.wl2e=mu0.wl2e,sd0.wl2e=sd0.wl2e,w0.wl2e=w0.wl2e,f=f))

}


qqnorm.pme <- function(x,method='WL2E',mu,sig,w,...) {
#Creates qqnormal plot to assess the partial mixture normality assumption
# x: vector with data values
# method: for the default 'WL2E' the partial mixture is estimated via iterative re-weighted L2E. For 'L2E' un-weighted L2E is used. Ignored if mu,sig and w are all specified
# mu: partial mixture estimated mean. If not specified the model is fit according to 'method'.
# sig: partial mixture estimated variance. If not specified the model is fit according to 'method'.
# w: partial mixture weight. If not specified the model is fit according to 'method'.
# ...: optional parameters passed to qqnorm

if ((method!='WL2E') & (method!='L2E')) stop('Method must be WL2E or L2E')

if ((missing(mu)) | (missing(sig)) | (missing(w))) {
  if (method=='WL2E') { fit <- rewupdc(x) } else { fit <- mpdc(x) }
  mu <- fit$m; sig <- fit$sig; w <- fit$w
}

qqnorm(x,...)
sel <- (abs(x-mu)<=quantile(abs(x-mu),probs=w))
abline(h=(range(x[sel])-mu),lty=2)

#qqnorm((x-mu)/sqrt(sig),...)
#sel <- (abs(x-mu)<=quantile(abs(x-mu),probs=w))
#abline(h=(range(x[sel])-mu)/sqrt(sig),lty=2)
  
}

ma.boxplot <- function(x,groups,m,a,ncuts=50,centerx=FALSE,scalex=FALSE,plot=TRUE,...) {
  if (!missing(groups)) {
    groups <- as.numeric(as.factor(groups))-1
    if (!is.matrix(x)) x <- as.matrix(x)
    m <- rowMeans(x[,groups==0],na.rm=TRUE) - rowMeans(x[,groups==1],na.rm=TRUE)
    a <- rowMeans(x,na.rm=TRUE)
  } else {
    if (missing(m) | missing(a)) { stop('both m and a must be specified if groups is missing') }
  }
  a <- as.numeric(cut(a,breaks=quantile(a,probs=seq(0,1,length=ncuts)),include.lowest=TRUE))
  
  if (centerx==TRUE) {
    mu.cat <- tapply(m,a,'mean')
    for (i in 1:length(table(a))) { m[a==i] <- m[a==i]-mu.cat[i] }
  }

  if (scalex==TRUE) {
    sd.cat <- tapply(m,a,'sd')
    for (i in 1:length(table(a))) { m[a==i] <- m[a==i]/sd.cat[i] }
  }

  if (plot) boxplot(m~a,...)

  return(m)

}


loess.cv <- function(x,y,f,nlearn) {
# Fits LOESS with optimal smoothing parameter wrt mean absolute error
  if (missing(nlearn)) nlearn <- round(length(x)*2/3)
  
  id <- sample(1:length(x),nlearn); sel <- rep(FALSE,length(x)); sel[id] <- TRUE
  sel[x==min(x,na.rm=TRUE)] <- TRUE; sel[x==max(x,na.rm=TRUE)] <- TRUE
  y.test <- y[!sel]; x.test <- x[!sel]; y.learn <- y[sel]; x.learn <- x[sel]
  mae <- double(length(f))
  for (i in 1:length(f)) {
    fit <- lowess(y.learn~x.learn,f=f[i])
    mae[i] <- mean(abs(y.test-approx(fit,xout=x.test)$y))
  }
  plot(f,mae,xlab='Smoothing parameter',ylab='Mean Absolute Error',type='l')
  fopt <- f[mae==min(mae)]
  fit <- lowess(y~x,f=fopt)
  return(list(fit=fit,fopt=fopt))
}


#### FINDING BAYESIAN FDR & OPTIMAL THRESHOLD FOR POSTERIOR PROB OF DIFFERENTIAL EXPRESSION #####

find.fdr <- function(v,threshold) {
# Input
#   v: vector with posterior probabilities of differential expression (missing values are removed)
#   k: vector with thresholds to declare differential expression (defaults to a sequence between min(v),max(v))
# Output: a list containing the following elements
#   threshold: the k vector
#   fdr: estimated FDR
#   fnr: estimated FNR

if (missing(v)) stop('The vector of posterior probabilities v must be specified')
if (sum(is.na(v))>0) v <- v[!is.na(v)]
if (missing(threshold)) threshold <- seq(min(v),max(v)-diff(range(v))/100,diff(range(v))/100)

fdr <- fnr <- rep(NA,length(threshold))
for (i in 1:length(threshold)) {
  fdr[i] <- sum((v>=threshold[i])*(1-v))/sum(v>=threshold[i])
  fnr[i] <- sum((v<threshold[i])*v)/sum(v<threshold[i])
}

return(list(threshold=threshold,fdr=fdr,fnr=fnr))
}

find.threshold <- function(pde,fdr) {
# Input
#   pde: vector of probabilities of differential expression
#   fdr: desired FDR
# Output:
#   threshold: threshold for probability of differential expression with estimated FDR closest to 'fdr'
#   fdrest: estimated FDR

pde <- pde[order(pde)]
imin <- 1; imax <- length(pde); i <- round(imax/2)
fdrest <- find.fdr(pde,pde[i])$fdr
while ((imax-imin>1) & (abs(fdr-fdrest)>.001)) {
  if (fdrest>fdr) { imin <- i } else { imax <- i }
  i <- round(.5*(imin+imax))
  fdrest <- find.fdr(pde,pde[i])$fdr
}
return(list(threshold=pde[i],fdrest=fdrest))

}


### ROUTINES TO FIT UNIVARIATE WEIGHTED & RE-WEIGHTED L2E ###

rewupdc <- function(x, mu0, sig0, mu, sig, w, wmax=1, maxrew=10, tol=.01) {
# rewupdc: Re-weighted L2E
# mu0: mean of the weighting distribution. Defaults to the mean of an non-weighted L2E fit
# sig0: variance of the weighting distribution. Defaults to the var of a non-weighted L2E fit
# mu: initial guess (defaults to mu0)
# sig: initial guess (defaults to sig0)
# w: initial guess (defaults to .9*wmax)
# wmax: upper bound for w (default wmax=1)
# maxrew: iterative fitting stops when maxrew fits have been performed or when the change in the parameters is smaller than tol
# tol: iterative fitting stops when maxrew fits have been performed or when the change in the parameters is smaller than tol

if (!is.vector(x)) stop("x must be a vector")
if (missing(mu0) | missing(sig0)) { z <- mpdc(x); mu0 <- z$m; sig0 <- z$sig }
if (sig0<0) stop('sig0 must be positive')
if (missing(mu)) { mu <- mu0 }; if (missing(sig)) { sig <- sig0 }; if (missing(w)) { w <- .9*wmax }

logit <- function(x) { log((x+.1)/(1-x+.1)) }

fit <- wupdc(x,mu0,sig0)
eps <- (mu-fit$m)^2 + (log(sig/fit$sig))^2 + (logit(w)-logit(fit$w))^2
mu <- fit$m; sig <- fit$sig; w <- fit$w
i <- 1
while (eps>tol & i<=maxrew) {
  fit <- wupdc(x,mu,sig)
  eps <- (mu-fit$m)^2 + (log(sig/fit$sig))^2 + (logit(w)-logit(fit$w))^2
  mu <- fit$m; sig <- fit$sig; w <- fit$w
  i <- i+1
}

if ((i==maxrew) && (eps>tol)) { warning('Iteration limit reached without achieving convergence') }

return(list(m=mu,sig=sig,w=w,min=fit$min,wmax=fit$wmax,niter=i))

}


wupdc <- function(x, mu0, sig0, mu, sig, w, wmax=1) {
## Weighted Univariate Partial Density Component Estimation (wupdc)

## Purpose:  fit model   w N(mu,sig) weighted according to N(mu0,sig0)

## Inputs:
##          x - n x d input data matrix (d=1 vector OK)
##        mu0 - mean of the weighting normal pdf
##       sig0 - var of the weighting normal pdf
##         mu - input guess for mean (default is mu0)
##        sig - input guess for variance (default is sig0)
##          w - input guess for weight (default w=.9*wmax)
##       wmax - upper bound for w (default wmax=1)
## Output:
##          list containing estimated  (m=mean sig=sig w=w min=min)

    if (!is.vector(x)) stop("x must be a vector")

    if (missing(mu0)) { mu0 <- mean(x) }
    if (missing(sig0)) { sig0 <- var(x) }
    if (sig0<0) stop('sig0 must be positive')
    if (missing(mu)) mu <- mu0
    if(missing(sig)) {
        sig <- sig0
    } else {
        if (sig<=0) stop("sig must be positive!")
    } 

    if (missing(w)) {
        w <- 0.9*wmax
    } else {
        if (w <= 0 | w>=wmax) stop("w must be between 0 and wmax")
    }

    logit <- function(x) { log((x+.1)/(1-x+.1)) }
    
    pdc <- function(th) {
      mu <- th[1]
      sig <- exp(th[2])
      w <- wmax/(1+exp(-th[3]))

      t1 <- exp((mu0*sig+2*mu*sig0)^2 /(2*sig0*sig*(sig+2*sig0)) -mu^2/sig-.5*mu0^2/sig0)/(2*pi*sqrt(sig)*sqrt(sig+2*sig0))
      t2 <- mean(dnorm(x,mu0,sqrt(sig0))*dnorm(x,mu,sqrt(sig)))

      w^2 * t1 - 2 * w * t2
    }
    
    x0 <- c(mu, log(sig), log((w/wmax)/(1-w/wmax)))
    ans <- nlm(pdc, x0, print.level=0, iterlim=100)
    m.ans <- ans$estimate[1]
    sig.ans <- exp(ans$estimate[2])
    w.ans <- wmax/(1+exp(-ans$estimate[3]))
    
    list(m=m.ans, sig=sig.ans, w=w.ans, min=ans$minimum, wmax=wmax)
}


wupdc.findw <- function(x, nu, weight=TRUE, f0='t') {
## Find weight for Weighted Univariate Partial Density Component Estimation

## Purpose:  fit model   w N(0,1) or w t_nu, where t_nu is Student's t distribution with nu degrees
##           of freedom. 

## Inputs:
##          x - n x d input data matrix (d=1 vector OK)
##         nu - degrees of freedom (only used if f0=='t')
##     weight - if set to TRUE, minimization is weighted according to N(0,1) or t_nu
##         f0 - f0=='t' uses t component; f0=='normal' uses normal component
## Output:
##          list containing estimated  (m=mean sig=sig w=w min=min)

    if (!is.vector(x)) stop("x must be a vector")
    if (f0=='t') { if (missing(nu)) stop('t-Student fit requires the degrees of freedom nu') }
    if ((f0!='normal') & (f0!='t')) stop('not valid value was specified for f0')
    mu <- 0; sig <- 1
    
    if (f0=='normal') {
      if (weight==FALSE) {
        a <- 1/(2*sqrt(pi*sig)); b <- 2*mean(dnorm(x,mu,sqrt(sig)))
      } else {
        m <- 3*mu/sig; v <- 3/sig
        a <- exp(-1.5*mu^2/sig + .5*m^2/v)/(2*pi*sig^(3/2)*sqrt(v))
        b <- 2*mean(dnorm(x,mu,sqrt(sig))^2)
      }
      w <- b/(2*a)
    } else {
      if (weight==FALSE) {
        k <- exp(lgamma(nu/2)-lgamma((nu+1)/2)+lgamma(nu+1)-lgamma(nu+1/2))
        w <- k * mean((1+(x-mu)^2/(sig*nu))^(-(nu+1)/2))
      } else {
        k <- exp(lgamma(nu/2)-lgamma((nu+1)/2)+lgamma((3*nu+3)/2)-lgamma((3*nu+2)/2))
        w <- k * mean((1+(x-mu)^2/(sig*nu))^(-nu-1))
      }
    }
    
    return(w)
}