

#' Compare multiple alpha diversity indexes between groups
#'
#' This function calculates average of alpha diversity indexes for a specific rarefaction depth, standardize diversity indexes and compare between groups
#' using linear/linear mixed effect model and adjust for covariates.
#' @param datlist the list of your dataframe.
#' @param depth the rarefaction depth of choice. Depth can be "max" (highest depth) or an order (number) of the depth in the list generated by alpha_rarefaction.py
#' @param mapfile mapping file
#' @param mapsampleid sample id in the mapping file
#' @param comvar variable for comparison
#' @param adjustvar variables that need to be adjusted in the model
#' @param personid name of variable for person id. Default is "personid" (applicable when longitudinal="yes").
#' @param longitudinal longitudinal data or one time data. Options are c("yes","no"). Default is "yes".
#' @param age.limit age upper limit for included samples. Default is 1000000 months (~no upper limit).
#' @param standardize standardization of diversity indexes before comparison or not. Default is FALSE.
#' @return list of comparison result matrices and mean diversity of all indexes for each samples (with or without standardization)
#' @keywords alpha diversity comparison
#' @export
#' @examples
#' data(alphadat)
#' data(covar.rm)
#' covar.rm$sampleid<-tolower(covar.rm$sampleid)
#' #comparison of standardized alpha diversity indexes between genders adjusting for
#' #breastfeeding and infant age at sample collection in infants <=6 months of age
#' alphacom<-alpha.compare(datlist=alphadat,depth=3,mapfile=covar.rm,
#' mapsampleid="sampleid", comvar="gender",adjustvar=c("age.sample","bf"),
#' longitudinal="yes", age.limit=6,standardize=TRUE)
#' alphacom$alphasum[,1:5]


alpha.compare<-function(datlist,depth,mapfile,mapsampleid,comvar,adjustvar,
                        personid="personid", longitudinal="yes",age.limit=1000000,standardize=FALSE){
  alphamean<-matrix(NA, nrow=length(names(datlist)),ncol=(ncol(datlist[[1]])-3))
  rownames(alphamean)<-names(datlist)
  colnames(alphamean)<-colnames(datlist[[1]])[-c(1:3)]
  for (i in 1: length(names(datlist))){
    dat.a<-datlist[[i]]
    raredepth<-unique(dat.a$sequences.per.sample)[as.numeric(as.character(depth))]
    if (depth=="max"){
      raredepth<-max(unique(dat.a$sequences.per.sample))
    }
    dat.a[,-c(1:3)]<-lapply(dat.a[,-c(1:3)],as.character)
    dat.a[,-c(1:3)]<-lapply(dat.a[,-c(1:3)],as.numeric)
    dat.mean<-plyr::ddply(dat.a, plyr::.(sequences.per.sample), plyr::colwise(mean))
    alphamean[i,]<-matrix(unlist(dat.mean[dat.mean$sequences.per.sample==raredepth,-c(1:3)]),nrow=1)
  }
  alphamean<-as.data.frame(t(alphamean))
  alphamean$sampleid<-sub('.*x', '', rownames(alphamean)) #dirty fix to remove added x to samplenames started with number e.g. Haiti data
  #standardize alpha
  alphameans<-dplyr::mutate_at(alphamean,.vars=names(datlist),.funs=function(x){(x-mean(x,na.r=T))/sd(x,na.rm=T)})
  mapfile[,mapsampleid]<-tolower(mapfile[,mapsampleid])
  #get sample only in mapfile (all age)
  alphamean<-alphamean[alphamean$sampleid %in% mapfile[,mapsampleid],]
  alphameans<-alphameans[alphameans$sampleid %in% mapfile[,mapsampleid],]
  #merge with mapfile
  #apply age limit for comparison
  mapfile<-mapfile[mapfile$age.sample<=age.limit,]
  if (standardize==FALSE){
    alphamap<-merge(mapfile,alphamean,by.x=mapsampleid,by.y="sampleid")
  }
  if (standardize==TRUE){
    alphamap<-merge(mapfile,alphameans,by.x=mapsampleid,by.y="sampleid")
  }
  if (longitudinal=="yes"){
    alphamap$personid<-as.factor(alphamap[,personid])
  }
  alphasum<-NULL
  for (j in 1: length(names(datlist))){
    #mixed model
    if (longitudinal=="yes"){
      fitsum<-try(summary(lme4::lmer(stats::as.formula(paste(names(datlist)[j],paste(c(comvar,adjustvar,"(1|personid)"),collapse="+"),sep="~")), data=alphamap)))
      #fitsum<-try(summary(lme4::glmer(stats::as.formula(paste(names(datlist)[j],paste(c(comvar,adjustvar,"(1|personid)"),collapse="+"),sep="~")), data=alphamap,family="gaussian")))
    }
    #linear model
    if (longitudinal=="no"){
      fitsum<-try(summary(stats::glm(stats::as.formula(paste(names(datlist)[j],paste(c(comvar,adjustvar),collapse="+"),sep="~")), data=alphamap,family="gaussian")))
    }
    if (class(fitsum) == "try-error") {
      warning("Error in model fit, NA introduced.\n")
      fitcoefw<-NULL
      alphasum<-plyr::rbind.fill(alphasum,fitcoefw)
    }
    if (class(fitsum) != "try-error") {
      fitcoef<-as.data.frame(fitsum$coefficients[rownames(fitsum$coefficients)!="(Intercept)",]) #remove intercept
      if (longitudinal=="yes"){
        fitcoef[,"Pr(>|t|)"]<-2*stats::pnorm(-abs(fitcoef[,"Estimate"]/fitcoef[,"Std. Error"]))
      }
      fitcoef[,"varname"]<-rownames(fitcoef)
      fitcoef[,"id"]<-names(datlist)[j]
      fitcoefw<-stats::reshape(fitcoef, idvar="id", timevar="varname", direction="wide")
      alphasum<-plyr::rbind.fill(alphasum,fitcoefw)
    }
  }
  if (standardize==TRUE){
    return(list(alphamean=alphamean,alphamean.standardized=alphameans,alphasum=alphasum))
  }
  if (standardize==FALSE){
    return(list(alphamean=alphamean,alphasum=alphasum))
  }
}

