#' Collapse a distance matrix by amalgamating populations
#'
#' This script takes a the file generated by gl.fixed.diff and generates a population recode
#' table to amalgamate populations with distance less than or equal to a specified threshold
#' The distance matrix is generated by gl.fixed.diff()
#' 
#' The script then applies the recode to the genlight object and recalculates the distance and associated matricies.
#'
#' @param fd -- name of the list of matricies produced by gl.fixed.diff() [required]
#' @param recode.table -- name of the new recode.table to receive the new population reassignments 
#' arising from the amalgamation of populations [default tmp.csv]
#' @param tloc -- threshold defining a fixed difference (e.g. 0.05 implies 95:5 vs 5:95 is fixed) [default 0]
#' @param tpop -- max number of fixed differences used amalgamating populations [default 0]
#' @param v -- verbosity: 0, silent or fatal errors; 1, begin and end; 2, progress log ; 3, progress and results summary; 5, full report [default 2]
#' @return A list containing the gl object x and the following square matricies
#'         [[1]] $gl -- the new genlight object with populations collapsed;
#'         [[2]] $fd -- raw fixed differences;
#'         [[3]] $pcfd -- percent fixed differences;
#'         [[4]] $nobs -- mean no. of individuals used in each comparison;
#'         [[5]] $nloc -- total number of loci used in each comparison;
#'         [[6]] $expobs -- if test=TRUE, the expected count of false positives for each comparison [by simulation];
#'         [[7]] $prob -- if test=TRUE, the significance of the count of fixed differences [by simulation])
#' @import adegenet
#' @export
#' @author Arthur Georges (Post to \url{https://groups.google.com/d/forum/dartr})
#' @examples
#' \donttest{
#' fd <- gl.fixed.diff(testset.gl, tloc=0.05)
#' gl <- gl.collapse(fd, recode.table="testset_recode.csv",tpop=1)
#' }

gl.collapse <- function(fd, recode.table="tmp.csv", tpop=0, tloc=0, v=2) {
  
  if (v > 0) {
    cat("Starting gl.collapse: Amalgamating populations with",tpop,"or less fixed differences\n")
  }
  
# Store the number of populations in the matrix
  npops <- dim(fd$fd)[1]
  
# Extract the column names
  pops <- variable.names(fd$fd)
  
# Initialize a list to hold the populations that differ by <= tpop
  zero.list <- list()

  # For each pair of populations
  for(i in 1:npops){
    zero.list[[i]] <- c(rownames(fd$fd)[i])
    for (j in 1:npops) {
      if (fd$fd[i,j] <= tpop) {
        zero.list[[i]] <- c(zero.list[[i]],rownames(fd$fd)[i],rownames(fd$fd)[j])
        zero.list[[i]] <- unique(zero.list[[i]])
      }
    }
    zero.list[[i]] <- sort(zero.list[[i]])
  }
  
# Pull out the unique aggregations  
  zero.list <- unique(zero.list)
  
# Amalgamate populations
  if (length(zero.list) > 1) {
  for (i in 1:(length(zero.list)-1)) {
    for (j in 2:length(zero.list)) {
      if (length(intersect(zero.list[[i]],zero.list[[j]])) > 0 ) {
        zero.list[[i]] <- union(zero.list[[i]],zero.list[[j]])
        zero.list[[j]] <- union(zero.list[[i]],zero.list[[j]])
      }
    }
  }
  for (i in 1:length(zero.list)) {
    zero.list <- unique(zero.list)
  }

  }  


# Print out the results of the aggregations 
  if(v > 1) {cat("\n\nPOPULATION GROUPINGS\n")}
  
  for (i in 1:length(zero.list)) {
    # Create a group label
      if (length(zero.list[[i]])==1) {
        replacement <- zero.list[[i]][1]
      } else {
        replacement <- paste0(zero.list[[i]][1],"+")
      }
      if(v > 1) {
        cat(paste0("Group:",replacement,"\n"))
        print(as.character(zero.list[[i]]))
        cat("\n")
      }
    # Create a dataframe with the pop names and their new group names  
      if (i==1) {
        df <- rbind(data.frame(zero.list[[i]],replacement, stringsAsFactors = FALSE))
      } else {
        df <- rbind(df,data.frame(zero.list[[i]],replacement, stringsAsFactors = FALSE))
      }
  }

# Create a recode table corresponding to the aggregations
    write.table(df, file=recode.table, sep=",", row.names=FALSE, col.names=FALSE)
  
# Recode the data file (genlight object)
  x2 <- gl.recode.pop(fd$gl, pop.recode=recode.table, v=v)
  fd2 <- gl.fixed.diff(x2,tloc=tloc,test=FALSE, v=v)
  
  # Return the matricies
  if (v > 2) {
    cat("Returning a list containing the following square matricies:\n",
        "         [[1]] $gl -- input genlight object;\n",
        "         [[2]] $fd -- raw fixed differences;\n",
        "         [[3]] $pcfd -- percent fixed differences;\n",
        "         [[4]] $nobs -- mean no. of individuals used in each comparison;\n",
        "         [[5]] $nloc -- total number of loci used in each comparison;\n",
        "         [[6]] $expobs -- if test=TRUE, the expected count of false positives for each comparison [by simulation]\n",
        "         [[7]] $prob -- if test=TRUE, the significance of the count of fixed differences [by simulation]\n")
  }
  
  if(setequal(levels(pop(x2)),levels(pop(fd$gl)))) { 
    if (v > 1) {cat(paste("\nPOPULATION GROUPINGS\n     No populations collapsed at fd <=", tpop,"\n"))}
    if (v > 0) {
      cat("Completed gl.collapse\n")
    }
    l <- list(gl=fd$gl,fd=fd2$fd,pcfd=fd2$pcfd,nobs=fd2$nobs,nloc=fd2$nloc,expobs=fd2$expobs,pval=fd2$pval)
    return(l)
  } else {
    if (v > 1) {
      cat("\nPOPULATION GROUPINGS")
      print(table(pop(x2)))
    }
    if (v > 0) {
      cat("Completed gl.collapse\n\n")
    }
    l <- list(gl=x2,fd=fd2$fd,pcfd=fd2$pcfd,nobs=fd2$nobs,nloc=fd2$nloc,expobs=fd2$expobs,pval=fd2$pval)
    return(l)
  }

}
