Filippo Botta, Copenhagen University - filippo.botta@gmail.com

Gilles Guillot, Technical Univesity of Denmark - gilles.guillot@gmail.com

Overview

Sunder is a computer program that estimates parameters in a statistical model that quantifies the relative effect of geographic versus ecological isolation on genetic differentiation. It is a model-based alternative to the Mantel tests. The package consists mostly of five functions:

References

Installation

Open R and in the shell type:

install.packages('Sunder')

Input and output

To use Sunder, you need

Example with Bayesian computations

## loading package
## library(Sunder) # for most people on Earth
library(Sunder,lib.loc='/home/gilles/R/x86_64-pc-linux-gnu-library/3.1/Sunder/0.0.3/')
## Loading required package: mnormt
## loading toy data from package Sunder
data(toydata,package='Sunder')

## plotting data
plot(as.vector(D_G),as.vector(cor(t(gen[,,1]))),
          bg=colorRampPalette(c("blue", "red"))(dim(D_E)[1]^2)[order(order(as.vector(D_E)))],
          pch=21,
          xlab='Geographic distance',
          ylab='Empirical covariance genotypes')
## Warning: the standard deviation is zero

plot of chunk unnamed-chunk-1

#### Computing options
nit <- 10^2 ## just for the example, should be much larger, e.g. 50000
run  <- c(1,1,1)
thinning  <- 1 #  just for the example, should be larger, e.g. max(nit/10^3,1)
ud   <- c(0,1,1,0,0) 
theta.init <- c(1,2,1,1,0.01)
n.validation.set  <- dim(gen)[1]*dim(gen)[2]/10 
theta.max  <- c(10,10*max(D_G),10*max(D_E),1,0.01)
     
plot  <- TRUE
trace <- FALSE
     
#### Call main Sunder function####
output <- MCMCCV(gen,D_G,D_E,
                          nit,thinning,
                          theta.max,
                          theta.init,
                          run,ud,n.validation.set,print.pct=FALSE)
## Computations for model 'geog+envi'
## Computations for model 'geog'
## Computations for model 'envi'
mod.lik <- output$mod.lik
tvt <- output$theta
     
     
     ## plotting outputs
upd=matrix(nrow=sum(run), ncol=length(theta.init), data=1)
upd[2,3]=upd[3,2]=0
     
     
kol=c(4,2,3) 
xseq=seq(thinning,nit,thinning)
ylab=c(expression(paste(alpha)),
            expression(paste(beta[D])),
            expression(paste(beta[E])),
            expression(paste(gamma)),
            expression(paste(delta)))
     
par(mfrow=c(sum(run),length(theta.init)))
     for (j in 1:sum(run))
     {
       for(k in 1:length(theta.init))
       {
         if (sum(upd[,k]==1)>0)
         {
           if(upd[j, k]==1)
           {
             if(exists("theta"))
               ylim=c(min(tvt[k,,j],theta[k]),max(tvt[k,,j],theta[k])) else
                 ylim=c(min(tvt[k,,j]),max(tvt[k,,j]))
             plot(0, type="n",xlab="",ylab="", xlim=c(0,nit), ylim=ylim)
             lines(xseq,tvt[k,,j],col=kol[j],xlab="",ylab="")
             if(exists("theta")) abline(h=theta[k],lty=2)
             title(xlab="iterations")        
             mtext(ylab[k], side=2, line=2.3,las=1)} else plot.new()
         }
       }
     }

plot of chunk unnamed-chunk-2

## printing likelihood for various models
print(mod.lik)
##   G+E     G     E 
## -3615 -3830 -3543
print(paste('The model achieving the highest likelihood on the validation set is:',
            names(mod.lik)[order(mod.lik,decreasing=TRUE)[1]]))
## [1] "The model achieving the highest likelihood on the validation set is: E"

Example with Maximum Likelihood computations

nsite <- 200
nloc <- 1000
hap.pop.size <- 100
theta <- c(runif(n=1,.5,10),
           runif(n=1,.01,10),
           runif(n=1,.01,10),
           runif(n=1,.5,1),
           runif(n=1,.01,.1)
           )
mod <- 'G+E' 
dat <- SimSunderData(mod=mod,
                     theta=theta,
                     nsite=nsite,
                     nloc=nloc,
                     hap.pop.size=hap.pop.size,
                     nalM=2,nalm=2, #bi-allelic loci
                     var.par=1,
                     scale.par=3)
gen <- dat$gen[,,1]
D_G <- dat$D_G
D_E <- dat$D_E

res <- MLCVGauss(gen,D_G,D_E,
                 ntrain=nrow(gen)/2,
                 nresamp=3)

which.max(res$mod.lik)
## G+E 
##   1