SpatialPointProcessLab.knit

Bayesian Models for Ecologists

Spatial Point Process Lab

June 14, 2023

In this lab, we are going to 1.) read in telemetry data, 2.) estimate the utilization distribution (UD) using KDE for visualization of space use, 3.) prepare the data for RSF analysis, 4.) fit Beroulli and Poisson GLM approximations, 5.) fit a Bayesian SPP model to data using JAGS and Bernoulli approximation, and 6.) obtain posterior inference.

Objectives

The objectives of this lab are to:

Gain familiarity with spatial point process data and models.
Obtain experience preparing data for SPP analysis.
See how to fit Bayesian SPP models and make inference.
Compare inference with non-Bayesian implementation.

Approximating Spatial Point Process Models

In what follows, the Bayesian SPP model can be implemented as either a Bernoulli GLM:

\[\begin{align} y_i &\sim \text{Bern}(p_i) \text{ for } i=1,\ldots,m \notag \\ \text{logit}(p_i)&=\mathbf{x}'_i\boldsymbol\beta \notag \\ \boldsymbol\beta &\sim \text{N}(\boldsymbol\mu_\beta,\boldsymbol\Sigma_\beta) \notag \end{align}\]

or as a Poisson GLM: \[\begin{align} y_i &\sim \text{Pois}(\lambda_i) \text{ for } i=1,\ldots,m \notag \\ \text{log}(\lambda_i)&=\mathbf{x}'_i\boldsymbol\beta \notag \\ \boldsymbol\beta &\sim \text{N}(\boldsymbol\mu_\beta,\boldsymbol\Sigma_\beta) \notag \end{align}\]

Recall that \(m\) is the total sample size including both the \(n\) telemetry observation and the background sample of size \(m-n\) (chosen by the user to be large). Thus, \(m>>n\) provides a good approximation to the original spatial point process model and avoids calculating the integral in the likelihood directly.

Load the data and following packages after you have installed them: raster, MASS, spatstat, and vioplot.

####
####  Load Libraries
####

library(raster)
library(MASS)
library(spatstat)
library(vioplot)
load("mtnlion.RData")
names(mtnlion.df)=c("x","y","time")
n=dim(mtnlion.df)[1]

Set spatial support and obtain background sample of size \(m-n=1000\). This is for the Bernoulli regression approximation of the spatial point process model.

hr.tmp=draw.contour(mtnlion.df[,1:2],bw=NULL,alpha=.99)
n.contour=length(hr.tmp)
mat=matrix(0, nrow = n.contour, ncol = 1)
for(i in 1:n.contour) {
  mat[i]=length(hr.tmp[[i]]$x)
}
hr=owin(poly=list(list(x=rev(hr.tmp[[which.max(mat)]]$x[-1]),y=rev(hr.tmp[[which.max(mat)]]$y[-1]))))
pts=inside.owin(mtnlion.df$x,mtnlion.df$y,hr)
pts=data.frame(x=mtnlion.df$x,y=mtnlion.df$y,inout=pts)
pts=subset(pts,pts$inout=="TRUE")
pts.ppp=ppp(x=pts$x,y=pts$y,window=hr)
set.seed(101)
n.bg=1000
bg.ppp=rpoint(n.bg,win=hr)

layout(matrix(1:2,1,2,byrow=TRUE))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=rgb(0,0,0,alpha=.4),cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="telemetry data")
points(pts.ppp,pch=19,col=rgb(0,0,0,alpha=.6))
plot(bg.ppp,type="n",cex=1.5,pch=19,col=rgb(0,0,0,alpha=.4),cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="background sample")
points(bg.ppp,pch=19,col=rgb(0,0,0,alpha=.6))

We set the homerange polygon as the 99% KDE isopleth of the KDE for the telemetry data (notice that we use the `draw.contour’ function that is read in with the data set).

Get gridded count data. This is for the Poisson regression approximation of the spatial point process model.

grid.hr.TF=inside.owin(grid.centers[,1],grid.centers[,2],hr)
elev.NA.rast=elevation.rast
values(elev.NA.rast)[!grid.hr.TF]=NA

mtnlion.count.rast=elev.NA.rast
values(mtnlion.count.rast)[grid.hr.TF]=0
mtnlion.cells=cellFromXY(elev.NA.rast,mtnlion.df[,1:2])
mtnlion.table=table(mtnlion.cells)
mtnlion.cell.idx=as.numeric(attr(table(mtnlion.cells),"dimnames")$mtnlion.cells)
values(mtnlion.count.rast)[mtnlion.cell.idx]=mtnlion.table

layout(matrix(1:2,1,2))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=rgb(0,0,0,alpha=.4),cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="telemetry data")
points(pts.ppp,pch=19,col=rgb(0,0,0,alpha=.6))
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=0,cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="gridded counts")
image(mtnlion.count.rast,col=gray.colors.rev(100),add=TRUE)
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))

Plot the standardized spatial covariates:

elev.NA.rast=elevation.rast
slope.NA.rast=slope.rast
exposure.NA.rast=exposure.rast
values(elev.NA.rast)[!grid.hr.TF]=NA
values(slope.NA.rast)[!grid.hr.TF]=NA
values(exposure.NA.rast)[!grid.hr.TF]=NA
values(elev.NA.rast)[grid.hr.TF]=scale(values(elev.NA.rast)[grid.hr.TF])
values(slope.NA.rast)[grid.hr.TF]=scale(values(slope.NA.rast)[grid.hr.TF])
values(exposure.NA.rast)[grid.hr.TF]=scale(values(exposure.NA.rast)[grid.hr.TF])

layout(matrix(1:3,1,3))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=0,cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="elevation")
image(elev.NA.rast,col=gray.colors.rev(100),add=TRUE)
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=0,cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="slope")
image(slope.NA.rast,col=gray.colors.rev(100),add=TRUE)
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=0,cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="exposure")
image(exposure.NA.rast,col=gray.colors.rev(100),add=TRUE)
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))

Fit both Bernoulli and Poisson regression models using MLE and compare non-Bayesian inference.

mtnlion.count.rast=elev.NA.rast
values(mtnlion.count.rast)[grid.hr.TF]=0
mtnlion.cells=cellFromXY(elev.NA.rast,mtnlion.df[,1:2])
mtnlion.table=table(mtnlion.cells)
mtnlion.cell.idx=as.numeric(attr(table(mtnlion.cells),"dimnames")$mtnlion.cells)
values(mtnlion.count.rast)[mtnlion.cell.idx]=mtnlion.table
rsf.1.df=data.frame(y=values(mtnlion.count.rast),elev=values(elev.NA.rast),slope=values(slope.NA.rast),exposure=values(exposure.NA.rast))
cor(rsf.1.df[!is.na(rsf.1.df$y),]) # check for colinearity

##                    y       elev       slope   exposure
## y         1.00000000 -0.0970971  0.07448481 -0.0761522
## elev     -0.09709710  1.0000000  0.12040234  0.2131342
## slope     0.07448481  0.1204023  1.00000000 -0.1903686
## exposure -0.07615220  0.2131342 -0.19036861  1.0000000

bg.cells=cellFromXY(elev.NA.rast,cbind(bg.ppp$x,bg.ppp$y))
y.binary=rep(0,n+n.bg)
elev.binary=rep(0,n+n.bg)
slope.binary=rep(0,n+n.bg)
exposure.binary=rep(0,n+n.bg)
y.binary[1:n]=1
elev.binary[1:n]=values(elev.NA.rast)[mtnlion.cells]
slope.binary[1:n]=values(slope.NA.rast)[mtnlion.cells]
exposure.binary[1:n]=values(exposure.NA.rast)[mtnlion.cells]
elev.binary[-(1:n)]=values(elev.NA.rast)[bg.cells]
slope.binary[-(1:n)]=values(slope.NA.rast)[bg.cells]
exposure.binary[-(1:n)]=values(exposure.NA.rast)[bg.cells]
rsf.2.df=data.frame(y=y.binary,elev=elev.binary,slope=slope.binary,exposure=exposure.binary)

summary(glm(y~elev+slope+exposure,family=poisson(link="log"),data=rsf.1.df))

## 
## Call:
## glm(formula = y ~ elev + slope + exposure, family = poisson(link = "log"), 
##     data = rsf.1.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8486  -0.5696  -0.4847  -0.3989   3.5346  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.0524     0.1131 -18.152  < 2e-16 ***
## elev         -0.2946     0.1116  -2.639  0.00832 ** 
## slope         0.2148     0.1020   2.106  0.03519 *  
## exposure     -0.1306     0.1223  -1.068  0.28546    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 397.09  on 653  degrees of freedom
## Residual deviance: 382.66  on 650  degrees of freedom
##   (781 observations deleted due to missingness)
## AIC: 554.17
## 
## Number of Fisher Scoring iterations: 6

summary(glm(y~elev+slope+exposure,family=binomial(link="logit"),data=rsf.2.df))

## 
## Call:
## glm(formula = y ~ elev + slope + exposure, family = binomial(link = "logit"), 
##     data = rsf.2.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6941  -0.4604  -0.3975  -0.3239   2.4566  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.4506     0.1175 -20.861  < 2e-16 ***
## elev         -0.3111     0.1180  -2.637  0.00836 ** 
## slope         0.2437     0.1142   2.134  0.03287 *  
## exposure     -0.1396     0.1251  -1.116  0.26446    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 622.05  on 1066  degrees of freedom
## Residual deviance: 606.42  on 1063  degrees of freedom
##   (24 observations deleted due to missingness)
## AIC: 614.42
## 
## Number of Fisher Scoring iterations: 5

In these approximations, we ignore the intercept because it scales with either the background sample size or grid cell size. However, notice how similar the resulting point estimates for the regression slope coefficients are. Note: There are a few NA values in the data set because of missing covariates, but `glm’ omits those records automatically.

Specify the Bernoulli regression model in JAGS to approximate the spatial point process model.

rsf.noNA.df=rsf.2.df[!apply(is.na(rsf.2.df),1,any),]  # omit points w/ NA values for covariates

y=rsf.noNA.df$y
X=model.matrix(~elev+slope+exposure,data=rsf.noNA.df)
m=length(y)

library(rjags)

## Loading required package: coda

## Linked to JAGS 4.3.1

## Loaded modules: basemod,bugs

m.jags <-"
  model{
    for(i in 1:m){
      y[i] ~ dbern(p[i])
      logit(p[i]) <- b0+b1*X[i,2]+b2*X[i,3]+b3*X[i,4]
    }
    b0 ~ dnorm(mu,tau)
    b1 ~ dnorm(mu,tau)
    b2 ~ dnorm(mu,tau)
    b3 ~ dnorm(mu,tau)
}
"

mod<-textConnection(m.jags)

Fit the Bernoulli regression model to augmented data set with observed telemetry data and background sample. Check trace plots for convergence.

mu=0
tau=1/2.25
m.out<-jags.model(mod,data=list('y'=y,'m'=m,'X'=X,'mu'=mu,'tau'=tau),n.chains=1,n.adapt=0) # build model and algorithm

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1067
##    Unobserved stochastic nodes: 4
##    Total graph size: 7977
## 
## Initializing model

n.mcmc=50000
n.burn=round(.2*n.mcmc)
update(m.out,n.burn) # perform burn-in
m.samples=jags.samples(m.out,c('b0','b1','b2','b3'),n.mcmc) # fit model post-burn-in

## NOTE: Stopping adaptation

beta.post.mat=cbind(m.samples$b0[1,,1],m.samples$b1[1,,1],m.samples$b2[1,,1],m.samples$b3[1,,1])
matplot(beta.post.mat,type="l",lty=1,ylab=bquote(beta))  # trace plots

Make Bayesian inference for spatial point process model and compare to non-Bayesian inference. Try a violin plot to look at marginal posterior distributions for parameters.

vioplot(data.frame(beta.post.mat),names=expression(beta[0],beta[1],beta[2],beta[3]))
abline(h=0,col=8)

apply(beta.post.mat,2,mean) # marginal posterior means for beta

## [1] -2.4555148 -0.3084521  0.2394828 -0.1429866

apply(beta.post.mat,2,sd) # marginal posterior std devs for beta

## [1] 0.1174979 0.1168483 0.1139947 0.1253690

apply(beta.post.mat,2,quantile,c(0.025,.975)) # marginal posterior 95% CI for beta

##            [,1]        [,2]       [,3]       [,4]
## 2.5%  -2.693008 -0.54049594 0.01530283 -0.3937890
## 97.5% -2.231599 -0.08284324 0.46167554  0.0980695

Problem

Develop the Poisson Bayesian GLM to approximate the point process model. Fit it to the gridded count data above and compare the inference to the Bernoulli approximation (and the MLEs).

Prepare data for Poisson Bayesian GLM.

rsf.noNA.df=rsf.1.df[!apply(is.na(rsf.1.df),1,any),]  # omit points w/ NA values for covariates

y=rsf.noNA.df$y
X=model.matrix(~elev+slope+exposure,data=rsf.noNA.df)
m=length(y)

Set up Bayesian model in JAGS to implement Poisson regression approximation to the spatial point process model.

m.jags <-"
  model{
    for(i in 1:m){
      y[i] ~ dpois(lam[i])
      log(lam[i]) <- b0+b1*X[i,2]+b2*X[i,3]+b3*X[i,4]
    }
    b0 ~ dnorm(mu,tau)
    b1 ~ dnorm(mu,tau)
    b2 ~ dnorm(mu,tau)
    b3 ~ dnorm(mu,tau)
}
"

mod<-textConnection(m.jags)

Fit the Poisson Bayesian regression model to the gridded count data and check trace plots.

mu=0
tau=1/100
m.out<-jags.model(mod,data=list('y'=y,'m'=m,'X'=X,'mu'=mu,'tau'=tau),n.chains=1,n.adapt=0) # build model and algorithm

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 654
##    Unobserved stochastic nodes: 4
##    Total graph size: 6547
## 
## Initializing model

n.mcmc=50000
n.burn=round(.2*n.mcmc)
update(m.out,n.burn) # perform burn-in
m.samples=jags.samples(m.out,c('b0','b1','b2','b3'),n.mcmc) # fit model post-burn-in

## NOTE: Stopping adaptation

beta.post.mat=cbind(m.samples$b0[1,,1],m.samples$b1[1,,1],m.samples$b2[1,,1],m.samples$b3[1,,1])
matplot(beta.post.mat,type="l",lty=1,ylab=bquote(beta))  # trace plots

Obtain inference for the Poisson regression model and compare with Bernoulli model and with the MLE.

library(vioplot)
vioplot(data.frame(beta.post.mat),names=expression(beta[0],beta[1],beta[2],beta[3]))
abline(h=0,col=8)

apply(beta.post.mat,2,mean) # marginal posterior means for beta

## [1] -2.0759639 -0.2958426  0.2129974 -0.1355266

apply(beta.post.mat,2,sd) # marginal posterior std devs for beta

## [1] 0.1143488 0.1114723 0.1016564 0.1228646

apply(beta.post.mat,2,quantile,c(0.025,.975)) # marginal posterior 95% CI for beta

##            [,1]        [,2]       [,3]       [,4]
## 2.5%  -2.307466 -0.51709649 0.01166307 -0.3829873
## 97.5% -1.859671 -0.08049771 0.40986871  0.0990237