Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 225 lines (188 sloc) 7.814 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# pmiDiversity.R

# Calculate PMI statistics (Grivet et al. 2005, Scofield et al. 2010, 2011) as
# well as alpha, beta, gamma based on both q_qq and r_gg (Scofield et al 2012
# American Naturalist 180(6) 719-732,
# http://www.jstor.org/stable/10.1086/668202), as well as q_gg adjusted
# following Nielsen et al 2003. Used during data analysis for Scofield et al
# Am Nat; earlier versions (pre-github) were used for Scofield et al 2010 J
# Ecol and for Scofield et al 2011 Oecologia. The single argument is a table
# of counts in sites (rows) X sources (columns) format. If the argument is not
# a matrix, it is converted to one, and rows are ordered numerically by rowname
# if the rownames are numeric.

.pmiDiversityVersion = "0.3"

# Copyright (c) 2012 Douglas G. Scofield, Umeå Plant Science Centre, Umeå, Sweden
#
# douglas.scofield@plantphys.umu.se
# douglasgscofield@gmail.com
#
# These statistical tools were developed in collaboration with Peter Smouse
# (Rutgers University) and Victoria Sork (UCLA) and were funded by U.S. National
# Science Foundation awards NSF-DEB-0514956 and NSF-DEB-0516529.
#
# Use as you see fit. No warranty regarding this code is implied nor should be
# assumed. Send bug reports etc. to one of the above email addresses.
#
# CHANGELOG
#
# 0.3: Pull Nielsen et al. out into separate function for use in diversity tests,
# add collaborators/funding statement
# 0.2: Finalize Nielsen et al. calculations of diversity measures
# 0.1: First release


nielsenTransform = function(q.gg, n.g)
{
    # better bias correction, Nielsen et al 2003 Mol Ecol
    (((q.gg*(n.g+1)*(n.g-2))+(3-n.g))/((n.g-1)^2))
}


pmiDiversity <- function(tab)
{
  .thisis = paste("pmiDiversity v", .pmiDiversityVersion, "Douglas G. Scofield, douglasgscofield@gmail.com")
  tab = as.matrix(tab)
  #if (! any(is.na(suppressWarnings(as.numeric(rownames(tab))))))
  # tab <- tab[order(as.numeric(rownames(tab))), ]
  G <- dim(tab)[1]
  K <- dim(tab)[2]
  N <- sum(tab)
  n.g <- apply(tab, 1, sum)
  # PMI statistics (Grivet et al 2005 Mol Ecol, Scofield et al 2010 J Ecol)

  r.gg <- numeric(G)
  for (g in 1:G) {
    tmp.numer <- (tab[g,]*tab[g,]) - tab[g,]
    tmp.denom <- (n.g[g]*n.g[g]) - n.g[g]
    r.gg[g] <- sum(tmp.numer / tmp.denom)
  }
  # for q.gh (same as r.gh), create table of relative genotype frequencies
  reltab <- sweep(tab, 1, n.g, FUN="/")
  Q.mat <- reltab %*% t(reltab)
  q.gg <- diag(Q.mat) # note biased estimator is diagonal of this matrix
  names(r.gg) <- names(q.gg)
  q.nielsen.gg <- nielsenTransform(q.gg, n.g)
  q.bar.0 <- sum(n.g * n.g * q.gg) / sum(n.g * n.g)
  q.nielsen.bar.0 <- sum(n.g * n.g * q.nielsen.gg) / sum(n.g * n.g)
  r.bar.0 <- sum((n.g*n.g*r.gg) - (n.g*r.gg)) / sum((n.g*n.g) - n.g)
  # more pairwise PMI stuff
  q.gh <- lower.tri(Q.mat)
  # pooled PMI (Scofield et al 2010 J Ecol)
  q.0.gh <- prop.y.0.gh <- matrix(0,G,G)
  dimnames(q.0.gh) <- dimnames(prop.y.0.gh) <- dimnames(Q.mat)
  for (g in 1:(G-1)) {
    for (h in (g+1):G) {
      shared.k <- tab[g,] & tab[h,]
      y.gh <- prop.y.0.gh[g,h] <- sum(tab[g,shared.k])
      y.hg <- prop.y.0.gh[h,g] <- sum(tab[h,shared.k])
      prop.y.0.gh[g,h] <- y.gh / n.g[g]
      prop.y.0.gh[h,g] <- y.hg / n.g[h]
      q.0.gh[g,h] <- (y.gh * y.hg)/(n.g[g]*n.g[h])
    }
  }

  # Diversity measures (Scofield et al Am Nat http://www.jstor.org/stable/10.1086/668202)
  # alpha
  alpha.q <- 1/q.gg
  alpha.q.unweighted.mean <- (1/G) * sum(alpha.q)
  q.gg.unweighted.mean <- mean(q.gg)
  d.alpha.q <- 1/q.gg.unweighted.mean

  alpha.q.nielsen <- 1/q.nielsen.gg
  alpha.q.nielsen.unweighted.mean <- (1/G) * sum(alpha.q.nielsen)
  q.nielsen.gg.unweighted.mean <- mean(q.nielsen.gg)
  d.alpha.q.nielsen <- 1/q.nielsen.gg.unweighted.mean

  alpha.r <- 1/r.gg
  alpha.r.unweighted.mean <- (1/G) * sum(alpha.r)
  r.gg.unweighted.mean <- mean(r.gg)
  d.alpha.r <- 1/r.gg.unweighted.mean

  # gamma
  n.k <- apply(tab, 2, sum)
  Q.k <- n.k / N
  d.gamma.q <- 1/sum(Q.k * Q.k)
  d.gamma.q.nielsen <- 1/nielsenTransform(sum(Q.k * Q.k), N)
  d.gamma.r <- 1/sum((n.k*(n.k - 1)) / (N*(N-1)))

  # form pairwise omega matrix, replace its diagonal with alpha
  q.diversity.mat <- (2 * Q.mat) / outer(q.gg, q.gg, FUN="+")
  diag(q.diversity.mat) <- alpha.q
  q.divergence.mat <- 1 - (2 * Q.mat) / outer(q.gg, q.gg, FUN="+")
  diag(q.divergence.mat) <- 0
  q.overlap.mat <- 1 - q.divergence.mat

  q.nielsen.diversity.mat <- (2 * Q.mat) / outer(q.nielsen.gg, q.nielsen.gg, FUN="+")
  diag(q.nielsen.diversity.mat) <- alpha.q.nielsen
  q.nielsen.divergence.mat <- 1 - (2 * Q.mat) / outer(q.nielsen.gg, q.nielsen.gg, FUN="+")
  diag(q.nielsen.divergence.mat) <- 0
  q.nielsen.overlap.mat <- 1 - q.nielsen.divergence.mat

  r.diversity.mat <- (2 * Q.mat) / outer(r.gg, r.gg, FUN="+")
  diag(r.diversity.mat) <- alpha.r
  r.divergence.mat <- 1 - (2 * Q.mat) / outer(r.gg, r.gg, FUN="+")
  diag(r.divergence.mat) <- 0
  r.overlap.mat <- 1 - r.divergence.mat

  ## mean overlap and divergence
  C <- Q.mat
  diag(C) <- 0

  q.overlap <- sum(C)/((G - 1)*sum(q.gg))
  q.divergence <- 1 - q.overlap

  q.nielsen.overlap <- sum(C)/((G - 1)*sum(q.nielsen.gg))
  q.nielsen.divergence <- 1 - q.nielsen.overlap

  r.overlap <- sum(C)/((G - 1)*sum(r.gg))
  r.divergence <- 1 - r.overlap

  # return value
  list(produced.by=.thisis,
     table=tab, # table passed in, as a matrix
     num.groups=G, # number of rows (sites)
     num.sources=K, # number of columns (sources)
     num.samples=N,
     num.samples.group=n.g,
     num.sources.group=apply(tab, 1, function(x)sum(x > 0)),
     num.samples.source=n.k,
     num.groups.source=apply(tab, 2, function(x)sum(x > 0)),

     # PMI measures by group/site
     q.gg=q.gg,
     q.nielsen.gg=q.nielsen.gg,
     r.gg=r.gg,

     # Matrix of q_gg and q_gh values
     Q.mat=Q.mat, # this is q.gh

     q.0.gh=q.0.gh,

     # Pooled values (Scofield et al 2010 J Ecol)
     y.gh=y.gh,
     prop.y.0.gh=prop.y.0.gh, #

     # Weighted grand means
     q.bar.0=q.bar.0,
     q.nielsen.bar.0=q.nielsen.bar.0,
     r.bar.0=r.bar.0,

     # Unweighted means
     q.gg.unweighted.mean=q.gg.unweighted.mean,
     q.nielsen.gg.unweighted.mean=q.nielsen.gg.unweighted.mean,
     r.gg.unweighted.mean=r.gg.unweighted.mean,

     # Alpha diversities at each site
     alpha.q=alpha.q,
     alpha.q.nielsen=alpha.q.nielsen,
     alpha.r=alpha.r,

     # Mean alpha diversity as reciprocal of unweighted mean site PMI
     d.alpha.q=d.alpha.q,
     d.alpha.q.nielsen=d.alpha.q.nielsen,
     d.alpha.r=d.alpha.r,

     # Mean alpha diversity as mean of individual site alpha diversities
     alpha.q.unweighted.mean=alpha.q.unweighted.mean,
     alpha.q.nielsen.unweighted.mean=alpha.q.nielsen.unweighted.mean,
     alpha.r.unweighted.mean=alpha.r.unweighted.mean,

     # Gamma diversity
     d.gamma.q=d.gamma.q,
     d.gamma.q.nielsen=d.gamma.q.nielsen,
     d.gamma.r=d.gamma.r,

     # Beta diversity
     d.beta.q=d.gamma.q/d.alpha.q,
     d.beta.q.nielsen=d.gamma.q.nielsen/d.alpha.q.nielsen,
     d.beta.r=d.gamma.r/d.alpha.r,

     # Diversity matrix: q_gg on diagonal, q_gh off-diagonal
     # Divergence matrix: 0 diagonal, delta_gh off-diagonal
     # You can construct the overlap matrix (1 diagonal,
     # omega_gh off-diagonal) via 1 - Divergence matrix
     q.diversity.mat=q.diversity.mat,
     q.divergence.mat=q.divergence.mat,
     # Mean divergence and overlap
     q.divergence=q.divergence,
     q.overlap=q.overlap,

     q.nielsen.diversity.mat=q.nielsen.diversity.mat,
     q.nielsen.divergence.mat=q.nielsen.divergence.mat,
     q.nielsen.divergence=q.nielsen.divergence,
     q.nielsen.overlap=q.nielsen.overlap,

     r.diversity.mat=r.diversity.mat,
     r.divergence.mat=r.divergence.mat,
     r.divergence=r.divergence,
     r.overlap=r.overlap
    )
}


Markdown Cheat Sheet

Format Text

Headers

# This is an <h1> tag
## This is an <h2> tag
###### This is an <h6> tag

Text styles

*This text will be italic*
_This will also be italic_
**This text will be bold**
__This will also be bold__

*You **can** combine them*

Lists

Unordered

* Item 1
* Item 2
  * Item 2a
  * Item 2b

Ordered

1. Item 1
2. Item 2
3. Item 3
   * Item 3a
   * Item 3b

Miscellaneous

Images

![GitHub Logo](/images/logo.png)
Format: ![Alt Text](url)

Links

http://github.com - automatic!
[GitHub](http://github.com)

Blockquotes

As Kanye West said:

> We're living the future so
> the present is our past.

Code Examples in Markdown

Syntax highlighting with GFM

```javascript
function fancyAlert(arg) {
  if(arg) {
    $.facebox({div:'#foo'})
  }
}
```

Or, indent your code 4 spaces

Here is a Python code example
without syntax highlighting:

    def foo:
      if not bar:
        return true

Inline code for comments

I think you should use an
`<addr>` element here instead.
Something went wrong with that request. Please try again.