# gammaAccum.R

# Provide functions for calculating gamma accumulation across sites, and
# plotting the results. Used during data analysis for Scofield et al 2012
# American Naturalist 180(6) 719-732,
# http://www.jstor.org/stable/10.1086/668202. Requires as input one or more
# tables of counts in sites (rows) X sources (columns) format.

# A typical workflow might look like
#
# rga.result = runGammaAccum(tab) # where tab is site-by-source
# plotGammaAccum(rga.result)

.gammaAccum.Version = "0.1.2"

# Copyright (c) 2012 Douglas G. Scofield, Umeå Plant Science Centre, Umeå, Sweden
#
# douglas.scofield@plantphys.umu.se
# douglasgscofield@gmail.com
#
# These statistical tools were developed in collaboration with Peter Smouse
# (Rutgers University) and Victoria Sork (UCLA) and were funded by U.S. National
# Science Foundation awards NSF-DEB-0514956 and NSF-DEB-0516529.
#
# Use as you see fit. No warranty regarding this code is implied nor should be
# assumed. Send bug reports etc. to one of the above email addresses.
#
#
# FUNCTIONS PROVIDED
#
# See Scofield et al. Am. Nat, Figure 4D-F to see figures derived from using
# these functions.
#
#
# runGammaAccum(tab) : Perform a gamma diversity accumulation on the
# site-by-source data in tab. Several arguments
# control the method of accumulation and value of
# gamma calculated. Only the defaults have been
# tested; the others were developed while exploring
# the data and must be considered experimental.
# The result is returned in a list, which may be
# passed to plotGammaAccum() to plot the result.
#
# Arguments:
# tab
# Site-by-source table, as passed to pmiDiversity()
# accum.method
# "random" (default) or "proximity". If "proximity" is used,
# then distance.file must be supplied (see below)
# resample.method
# "permute" (default) or "bootstrap"; whether to resample sites
# without ("permute") or with ("bootstrap") replacement
# distance.file
# A file or data.frame containing three columns of data, with
# the header/column names being "pool", "X", and "Y",
# containing the spatial locations of the seed pools named in
# the row names of tab. Only used if the
# accum.method=="proximity"
# gamma.method
# Calculate gamma using "r" (default), "q" or "q.nielsen" method
# (see paper).
#
# plotGammaAccum(rga.result)
# : plot the gamma accumulation result from runGammaAccum()
#
# gammaAccum() : workhorse function for gamma accumulation
# gammaAccumStats() : extracts stats from result of gammaAccum()
# runGammaAccumSimple() : wrapper that runs and then return stats from gammaAccum()
#
#
#
# CHANGELOG
#
# 0.1.2: Add calculation of gamma via Nielsen et al. transform of q_gg
# 0.1.1: Minor bugfix for runGammaAccum arguments
# 0.1: First release
#
#
# TODO
#
# * Turn this into an actual R package


source("pmiDiversity.R")


#---------------------------------------------


plotGammaAccum <- function(gamma.accum,
                           plot.xmax=NA,
                           plot.ymax=NA,
                           add.regression=FALSE,
                           obs.omega=NA,
                           ...)
{
  gtr = gamma.accum$simple.results
  obs.gamma = gamma.accum$obs.gamma
  xmax <- length(gtr$mns)
  x <- 1:xmax
  lim <- c(1, max(xmax + 3, gtr$mns + gtr$SE))
  xlim <- ylim <- lim
  xlim <- c(1, ifelse(is.na(plot.xmax), xmax, plot.xmax))
  ylim <- c(1, ifelse(is.na(plot.ymax), max(gtr$mns + gtr$SE), plot.ymax))

  #par(mar=c(3.5, 3.5, 0.5, 0.5), mgp=c(2.0, 0.5, 0), las=1, xpd=NA)
  par(mar=c(2.7, 2.7, 0.5, 0.5), mgp=c(1.7, 0.4, 0), las=1, ps=10, tcl=-0.4, xpd=NA)

  pch.gamma <- 24
  pch.means <- 19
  plot.separate.gamma <- FALSE
  if (abs(gtr$mns[xmax] - obs.gamma) < 0.1) { # gamma in means sequence
    pch <- c(rep(pch.means, xmax - 1), pch.gamma)
  } else {
    pch <- pch.means
    plot.separate.gamma <- TRUE
  }
  plot(x, gtr$mns,
       pch=pch, col="black", bty="L",
       xlim=xlim,
       ylim=ylim,
       #asp=1,
       lwd=1.5,
       tcl=-0.3,
       xlab="Number of Seed Pools",
       ylab=expression("Accumulated "*gamma*" diversity"),
       bg="white",
       ...
       )
  lines(x, gtr$mns + gtr$SE, lty=2, ...)
  lines(x, gtr$mns - gtr$SE, lty=2, ...)
  if (plot.separate.gamma) {
    points(xmax, obs.gamma, pch=pch.gamma, lwd=1.5, bg="white", cex=1.2)
  }
  if (! is.na(obs.omega)) {
    text(x=(0.15*xlim[2])+1, y=(0.97*ylim[2])+1,
         substitute(bar(omega)==OMEGA, list(OMEGA=obs.omega)),
         cex=1.2)
  }
}


#---------------------------------------------


runGammaAccum <- function(tab,
                          accum.method=c("random", "proximity"),
                          resample.method=c("permute", "bootstrap"),
                          gamma.method=c("r", "q.nielsen", "q"),
                          distance.file=NA,
                          ...)
{
  accum.method = match.arg(accum.method)
  resample.method = match.arg(resample.method)
  gamma.method = match.arg(gamma.method)
  pmiD = pmiDiversity(tab)
  ans = list()
  ans$obs.gamma <- pmiD[[paste(sep="", "d.gamma.", gamma.method)]]
  ans$obs.omega.mean <- pmiD[[paste(sep="", gamma.method, ".overlap")]]
  ans$obs.delta.mean <- pmiD[[paste(sep="", gamma.method, ".divergence")]]
  ans$simple.results <- runGammaAccumSimple(tab,
                                            accum.method=accum.method,
                                            resample.method=resample.method,
                                            gamma.method=gamma.method,
                                            distance.file=distance.file,
                                            ...)
  ####
  ans
}


#---------------------------------------------


runGammaAccumSimple <- function(tab, ...)
{
  return(gammaAccumStats(gammaAccum(tab, ...)))
}


#---------------------------------------------


gammaAccumStats <- function(gammaAccum.results)
{
  mns <- unlist(lapply(gammaAccum.results,
                       function(x) { recip = 1/x; rmax = max(recip[recip != Inf]); recip[recip == Inf] = rmax; ; mean(recip) } ))
  vars <- unlist(lapply(gammaAccum.results,
                        function(x) { recip = 1/x; rmax = max(recip[recip != Inf]); recip[recip == Inf] = rmax; ; var(recip) } ))
  SE <- sqrt(vars)
  quants <- lapply(gammaAccum.results,
                   function(x) quantile(1/x, c(0.05, 0.5, 0.95)))
  ####
  return(list(mns=mns, vars=vars, SE=SE, quants=quants))
}


#---------------------------------------------


gammaAccum <- function(tab,
                       n.sites=dim(tab)[1],
                       n.resample=1000,
                       accum.method=c("random", "proximity"),
                       resample.method=c("permute", "bootstrap"),
                       distance.file=NA,
                       gamma.method=c("r", "q.nielsen", "q"))
{
  # If used, the distance.file has three columns, with the header line pool, X, Y
  accum.method <- match.arg(accum.method)
  resample.method <- match.arg(resample.method)
  gamma.method <- match.arg(gamma.method)
  pool.names <- dimnames(tab)[[1]]
  G <- dim(tab)[1]
  K <- dim(tab)[2]
  N <- sum(tab)
  if (accum.method == "proximity") {
    if (is.character(distance.file))
      gxy <- read.delim(file=distance.file)
    else if ("data.frame" %in% class(distance.file))
      gxy <- distance.file
    else
      stop("unknown type of distance information for",
          deparse(substitute(distance.file)), " with class ",
          paste(sep=", ", class(distance.file)))
    gxy <- subset(gxy, granary %in% pool.names)
  }

  ans <- list()
  for (g in 1:n.sites)
    ans[[g]] <- numeric(0)
  for (i in 1:n.resample) {
    row.order <- sample(1:G,
                        size=n.sites,
                        replace=ifelse(resample.method=="bootstrap", TRUE, FALSE))
    if (accum.method == "proximity") {
      new.row.order <- c()
      pools <- gxy[pool.names[row.order], ]
      dm <- CreateSpatialDistmat(pools$X, pools$Y, names.1=pools$pool)
      diag(dm) <- +Inf # distance to same pool is always farthest
      next.row.i <- 1
      new.row.order <- c(new.row.order, row.order[next.row.i])
      dm[, next.row.i] <- Inf # distance to same pool is always farthest
      for (r in 2:n.sites) {
        next.row.i <- which(dm[next.row.i, ] == min(dm[next.row.i, ]))[1]
        dm[, next.row.i] <- Inf
        new.row.order <- c(new.row.order, row.order[next.row.i])
      }
      row.order <- new.row.order
    }
    for (g in 1:n.sites) {
      this.accum <- apply(tab[row.order[1:g], , drop=FALSE], 2, sum)
      sum.accum <- sum(this.accum)
      this.prop <- this.accum / sum.accum
      sum.prop <- sum(this.prop * this.prop)
      ans[[g]] <- c(ans[[g]], switch(gamma.method,
        q = sum.prop,
        q.nielsen = nielsenTransform(sum.prop, sum.accum),
        r = (((sum.accum * sum.prop) - 1) / (sum.accum - 1))))
    }
  }
  return(ans)
}


#---------------------------------------------