R code for paper “Validity of covariance models for the analysis of geographical variation” by Guillot, Schilling, Porcu, Bevillacqua, Methods in Ecology and Evolution

Tree abundance data example

we show here how the use of a function that is not positive-definite as a model of covariance for spatial interpolation can lead to disastrous results. We use a subset of the abundance data of the SCGLR package.

Loading packages

if (!("SCGLR" %in% installed.packages())) install.packages("SCGLR")
if (!("PBSmapping" %in% installed.packages())) install.packages("PBSmapping")
if (!("maps" %in% installed.packages())) install.packages("maps")
if (!("oce" %in% installed.packages())) install.packages("oce")
if (!("RandomFields" %in% installed.packages())) install.packages("RandomFields")
if (!("fields" %in% installed.packages())) install.packages("fields")

Loading data

library(SCGLR)
## Loading required package: Formula Loading required package: expm Loading
## required package: Matrix Loading required package: lattice
## 
## Attaching package: 'expm'
## 
## The following object is masked from 'package:Matrix':
## 
## expm
## 
## Loading required package: ROCR Loading required package: gplots KernSmooth
## 2.23 loaded Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
## lowess
## 
## Loading required package: ggplot2
library(PBSmapping)
## ----------------------------------------------------------- PBS Mapping
## 2.66.53 -- Copyright (C) 2003-2013 Fisheries and Oceans Canada
## 
## PBS Mapping comes with ABSOLUTELY NO WARRANTY; for details see the file
## COPYING. This is free software, and you are welcome to redistribute it
## under certain conditions, as outlined in the above file.
## 
## A complete user guide 'PBSmapping-UG.pdf' is located at
## /home/gilles/R/x86_64-pc-linux-gnu-library/3.0/PBSmapping/doc/PBSmapping-UG.pdf
## 
## Packaged on 2013-05-03 Pacific Biological Station, Nanaimo
## 
## All available PBS packages can be found at
## http://code.google.com/p/pbs-software/
## 
## To see demos, type '.PBSfigs()'.
## -----------------------------------------------------------
library(maps)
library(oce)
## Loading required package: mapproj
library(RandomFields)
## ## Note that several updates of RandomFields are expected during 2011. ##
## ## Please see help("changings") for important changes.  ##
library(fields)
## Loading required package: spam Spam version 0.29-3 (2013-04-23) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction and overview
## of this package. Help for individual functions is also obtained by adding
## the suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## 
## The following objects are masked from 'package:base':
## 
## backsolve, forwardsolve
data(genus)
dim(genus)
## [1] 1000   69
coord = cbind(genus$lon, genus$lat)
colnames(coord) <- c("X", "Y")
attr(coord, "projection") <- "LL"
attr(coord, "zone") <- NA
coord.utm <- convUL(coord)
## convUL: For the UTM conversion, automatically detected zone 33. convUL:
## Converting coordinates within the northern hemisphere.
x = coord[, 1]
y = coord[, 2]

Displaying location of sampling sites

par(mai=c(2,.8,.8,.1))
data(coastlineWorld)
plot(coastlineWorld,#xlim=c(10,25),ylim=c(-10,15),cex=.3,col=2,asp=1,type="n",
     clatitude=16,clongitude=5,span=5000,
     xlab="Longitude",ylab="Latitude",cex.lab=1.3)
polygon(x=c(min(x),max(x),max(x),min(x)),y=c(min(y),min(y),max(y),max(y)),
        border=2,lwd=3,lty=1)

plot of chunk unnamed-chunk-3


plot(coastlineWorld,
     clatitude=2,clongitude=12,span=1500,
     xlab="Longitude",ylab="Latitude"
     )
points(coord,asp=1,col=2,xlim=c(10,20),ylim=c(-1,6),cex=.3)

plot of chunk unnamed-chunk-3

Computing empirical spatial auto-covariance function for a tree a genus for which the covariance seems to have a linear decay

igenus = 27
ddist = as.vector(dist(coord.utm))
bin=seq(0,max(ddist),by=max(ddist)/20)
ccov = rep(0,length(bin))
bin.count = rep(0,length(bin))
mu = mean(genus[,1+igenus])
z = genus[,1+igenus]

for(i in 1:nrow(coord)) # takes around 5 minutes on standard computer, could be improved,  be patient or have a coffee
{
  # print(i)
  for(j in i:nrow(coord))
  {
    dd = dist(coord.utm[c(i,j),])
    if(i==j)
    {
      ibin = 1
      ccov[ibin] =  ccov[ibin] + (z[i]-mu)^2
      bin.count[ibin] = bin.count[ibin]+1
    }else
    {
      ibin = min((1:length(bin))[dd<=bin])
      ccov[ibin] =  ccov[ibin] + (z[i]-mu)*(z[j]-mu)
      bin.count[ibin] = bin.count[ibin]+1
    }    
  }
}
ccov = ccov/bin.count
bin2 = c(0,(bin[-length(bin)]+bin[-1])/2)

Computing kriging variance with triangle covariance

Function required below

cov.mat = function(d, variance, scale.par, model) {
    d = as.matrix(d)
    c = matrix(nr = nrow(d), nc = ncol(d))
    if (model == "triangle") {
        sub = (d > -variance/scale.par)
        c[sub] = 0
        c[!sub] = scale.par * d[!sub] + variance
    }
    if (model == "expo") {
        c = variance * exp(-d/scale.par)
    }
    c
}

Building a grid

xmin = min(coord.utm[, 1])
xmax = max(coord.utm[, 1])
ymin = min(coord.utm[, 2])
ymax = max(coord.utm[, 2])

npix = c(50, 50)
coord.pred <- matrix(ncol = 2, nrow = npix[1] * npix[2], NA)
coord.pred[, 1] <- rep(seq(from = xmin, to = xmax, length = npix[1]), npix[2])
coord.pred[, 2] <- as.vector(matrix(nrow = npix[1], ncol = npix[2], byrow = TRUE, 
    rep(seq(from = ymin, to = ymax, length = npix[2]), npix[1])))

Interpolation with exponential covariance

sub = (bin2 < 250)
Z = genus[, 1 + igenus]
c00 = lm(ccov[sub] ~ bin2[sub])$coef[1]

ddist = as.matrix(dist(coord.utm))
D = rdist(coord.utm, coord.pred)
C0 = cov.mat(D, variance = 9820, scale.par = 100, model = "expo")
C = cov.mat(ddist, variance = 9820, scale.par = 100, model = "expo")
P = solve(C)  ## precision matrix

Z0 = P %*% C0
Z0 = t(Z) %*% Z0
S0 = P %*% C0
C0t = t(C0)
S0 = C0t %*% S0
S0 = c00 - diag(S0)

Interpolation with triangular covariance

Z = genus[, 1 + igenus]
c00 = lm(ccov[sub] ~ bin2[sub])$coef[1]

ddist = as.matrix(dist(coord.utm))
D = rdist(coord.utm, coord.pred)
C0 = cov.mat(D, variance = 9820, scale.par = lm(ccov[sub] ~ bin2[sub])$coef[2], 
    model = "triangle")
C = cov.mat(ddist, variance = 9820, scale.par = lm(ccov[sub] ~ bin2[sub])$coef[2], 
    model = "triangle")
P = solve(C)  ## precision matrix

Z0.tri = P %*% C0
Z0.tri = t(Z) %*% Z0.tri
S0.tri = P %*% C0
C0t = t(C0)
S0.tri = C0t %*% S0.tri
S0.tri = c00 - diag(S0.tri)

Plotting raw abundance data

par(mai = c(0.8, 0.8, 1, 0.1))
image.plot(as.image(x = coord.utm, Z = Z, nrow = 150, ncol = 150), col = (terrain.colors(24))[24:1], 
    main = "\n\n\n Raw abundance data \n ", xlab = "Eastings (km)", ylab = "Northings (km)", 
    cex.lab = 1.3, asp = 1)

plot of chunk unnamed-chunk-9

Plotting various covariances

par(cex = 1.3)
sub = (bin2 < 250)
plot(bin2[sub], ccov[sub], type = "b", xlab = "Geographical distance", ylab = "Empirical covariance", 
    lwd = 1)
lines(bin2[sub], ccov[sub], type = "b", lwd = 2)
h = seq(0, 250, 0.1)
abline(h = 0)
abline(v = 0)
lines(bin2[sub], cov.mat(d = bin2[sub], variance = 9820, scale.par = lm(ccov[sub] ~ 
    bin2[sub])$coef[2], model = "triangle"), col = 2, lty = 2, lwd = 2)
variance = lm(ccov[sub] ~ bin2[sub])$coef[1]
lines(h, ccov[1] * exp(-h/100), col = 3, lwd = 2.5, lty = 5)
legend(x = "topright", legend = c("Empirical covariance", "Triangle model", 
    "Exponential model"), col = c(1, 2, 3), lty = c(1, 2, 5), lwd = 2)

plot of chunk unnamed-chunk-10

Plotting prediected maps of abundance and associated prediction variance

par(mfrow=c(2,2),mai=c(.5,.5,.3,.2))

## prediction expo
image(x=seq(from=xmin,to=xmax,length=npix[1]),
      y=seq(from=ymin,to=ymax,length=npix[2]),
      z=matrix(nrow=npix[1],ncol=npix[2],byrow=FALSE,Z0),
      xlab="",ylab="",
      #main="Predicted abundance \n with exponential covariance",
      col=(terrain.colors(24))[24:1]
      )
contour(x=seq(from=xmin,to=xmax,length=npix[1]),
         y=seq(from=ymin,to=ymax,length=npix[2]),
         z=matrix(nrow=npix[1],ncol=npix[2],byrow=FALSE,Z0),
         add=TRUE,lwd=.1)
points(coord.utm,pch=16,cex=.3)

# variance expo
image(x=seq(from=xmin,to=xmax,length=npix[1]),
      y=seq(from=ymin,to=ymax,length=npix[2]),
      z=matrix(nrow=npix[1],ncol=npix[2],byrow=FALSE,S0),
      xlab="",ylab="",
      col=(heat.colors(12))[1:12])
contour(x=seq(from=xmin,to=xmax,length=npix[1]),
      y=seq(from=ymin,to=ymax,length=npix[2]),
      z=matrix(nrow=npix[1],ncol=npix[2],byrow=FALSE,S0),add=TRUE,lwd=.1)
points(coord.utm,pch=16,cex=.3)


## prediction tri
image(x=seq(from=xmin,to=xmax,length=npix[1]),
      y=seq(from=ymin,to=ymax,length=npix[2]),
      z=matrix(nrow=npix[1],ncol=npix[2],byrow=FALSE,Z0.tri),
      xlab="",ylab="",
      #main="Predicted abundance \n with exponential covariance",
      col=(terrain.colors(24))[24:1]
      )
contour(x=seq(from=xmin,to=xmax,length=npix[1]),
         y=seq(from=ymin,to=ymax,length=npix[2]),
         z=matrix(nrow=npix[1],ncol=npix[2],byrow=FALSE,Z0.tri),
         add=TRUE,lwd=.1)
points(coord.utm,pch=16,cex=.3)

# variance tri
S0.tri.alt = S0.tri
S0.tri.alt[S0.tri<0] = NA
image(x=seq(from=xmin,to=xmax,length=npix[1]),
      y=seq(from=ymin,to=ymax,length=npix[2]),
      z=matrix(nrow=npix[1],ncol=npix[2],byrow=FALSE,S0.tri.alt),
      xlab="",ylab="",
      col=(heat.colors(12))[1:12])
contour(x=seq(from=xmin,to=xmax,length=npix[1]),
      y=seq(from=ymin,to=ymax,length=npix[2]),
      z=matrix(nrow=npix[1],ncol=npix[2],byrow=FALSE,S0.tri.alt),add=TRUE,lwd=.1)
points(coord.utm,pch=16,cex=.3)

plot of chunk unnamed-chunk-11

Example of functions in the BRC family with positive-definiteness issues

BRC model on the sphere x R

Here we look for an n-points counter-example for the BRC model. The geograpical sites are located on the sphere. The environmental variable takes values in R.

if (!("matrixcalc" %in% installed.packages())) install.packages("matrixcalc")
if (!("scatterplot3d" %in% installed.packages())) install.packages("scatterplot3d")
library(scatterplot3d)
library(matrixcalc)
a0 <- 1
aD <- 1
aE <- 1
a2 <- 1.8
n = 10
bingo = FALSE
counter = 1

while (!bingo) {
    # print(counter)
    x = cbind(runif(n = n, min = -pi, max = pi), runif(n = n, min = -pi/2, max = pi/2))
    e <- rnorm(n = n)
    colnames(x) = c("Lon", "Lat")
    y = cbind(cos(x[, "Lat"]) * cos(x[, "Lon"]), cos(x[, "Lat"]) * sin(x[, "Lon"]), 
        sin(x[, "Lat"]))
    cosine = y %*% t(y)
    cosine[cosine > 1] = 1
    cosine[cosine < (-1)] = -1
    D = acos(cosine)
    diag(D) = 0
    E = as.matrix(dist(e))

    C = a0 * exp(-(aD * D + aE * E)^a2)

    bingo = !is.positive.definite(C)
    if (bingo) {
        print("BINGO!")
        # print(counter)
        print("spatial coordinates:")
        print(x)
        print("Environmental variable")
        print(e)
        print(paste("min eigen value=", min(eigen(C)$values)))
        scatterplot3d(y, highlight.3d = TRUE, main = "Location on geographical sites on the sphere")
    }
    counter = counter + 1
}
## [1] "BINGO!"
## [1] "spatial coordinates:"
##           Lon     Lat
##  [1,] -0.7975 -1.5705
##  [2,] -1.9219  0.3347
##  [3,] -0.5640 -0.4984
##  [4,]  1.0205 -1.5474
##  [5,]  2.4742 -1.4162
##  [6,] -2.8992  0.3294
##  [7,] -0.7547 -0.5210
##  [8,]  1.8417 -1.3342
##  [9,]  2.9373 -0.9172
## [10,] -2.1132 -0.8446
## [1] "Environmental variable"
##  [1] -0.620924  0.503239  2.372888 -0.038528 -0.751916  0.345032 -0.636616
##  [8] -0.029910 -0.006528  1.994396
## [1] "min eigen value= -0.0214412436862775"

plot of chunk unnamed-chunk-12

BRC model on the plan x R

Here we look for an 4-points counter-example for the BRC model (4 can be changed). The geograpical sites are located on the 2-D plan. The environmental variable takes values in R.

set.seed(1)  ## should find a counter-example at iteration 25413 with others parameters set as below
require(scatterplot3d)
require(matrixcalc)
a0 <- 1
aD <- 1
aE <- 1
a2 <- 1.8
n = 4  ## increasing this e.g. to 100 makes it easier to find a counter-example
bingo = FALSE
counter = 1
while (!bingo) {
    # print(counter)
    x = cbind(runif(n = n, min = 0, max = 1), runif(n = n, min = 0, max = 1))
    e <- rnorm(n = n)

    D = as.matrix(dist(x))
    E = as.matrix(dist(e))

    C = a0 * exp(-(aD * D + aE * E)^a2)

    bingo = !is.positive.definite(C)
    if (bingo) {
        print("BINGO!")
        print(counter)
        print("spatial coordinates:")
        print(x)
        print("Environmental variable")
        print(e)
        print(paste("min eigen value=", min(eigen(C)$values)))
        scatterplot3d(cbind(x, e), color = 2)
    }
    counter = counter + 1
}
## [1] "BINGO!"
## [1] 25413
## [1] "spatial coordinates:"
##        [,1]   [,2]
## [1,] 0.6390 0.7328
## [2,] 0.9061 0.8317
## [3,] 0.7457 0.7304
## [4,] 0.9634 0.7368
## [1] "Environmental variable"
## [1]  0.4992 -0.1113 -0.1549  0.5273
## [1] "min eigen value= -0.0078499071605211"

plot of chunk unnamed-chunk-13