In this lab, we are going to 1.) read in telemetry data, 2.) estimate the utilization distribution (UD) using KDE for visualization of space use, 3.) prepare the data for RSF analysis, 4.) fit Beroulli and Poisson GLM approximations, 5.) fit a Bayesian SPP model to data using JAGS and Bernoulli approximation, and 6.) obtain posterior inference.
The objectives of this lab are to:
Gain familiarity with spatial point process data and models.
Obtain experience preparing data for SPP analysis.
See how to fit Bayesian SPP models and make inference.
Compare inference with non-Bayesian implementation.
In what follows, the Bayesian SPP model can be implemented as either a Bernoulli GLM:
\[\begin{align} y_i &\sim \text{Bern}(p_i) \text{ for } i=1,\ldots,m \notag \\ \text{logit}(p_i)&=\mathbf{x}'_i\boldsymbol\beta \notag \\ \boldsymbol\beta &\sim \text{N}(\boldsymbol\mu_\beta,\boldsymbol\Sigma_\beta) \notag \end{align}\]
or as a Poisson GLM: \[\begin{align} y_i &\sim \text{Pois}(\lambda_i) \text{ for } i=1,\ldots,m \notag \\ \text{log}(\lambda_i)&=\mathbf{x}'_i\boldsymbol\beta \notag \\ \boldsymbol\beta &\sim \text{N}(\boldsymbol\mu_\beta,\boldsymbol\Sigma_\beta) \notag \end{align}\]
Recall that \(m\) is the total sample size including both the \(n\) telemetry observation and the background sample of size \(m-n\) (chosen by the user to be large). Thus, \(m>>n\) provides a good approximation to the original spatial point process model and avoids calculating the integral in the likelihood directly.
####
#### Load Libraries
####
library(raster)
library(MASS)
library(spatstat)
library(vioplot)
load("mtnlion.RData")
names(mtnlion.df)=c("x","y","time")
n=dim(mtnlion.df)[1]
hr.tmp=draw.contour(mtnlion.df[,1:2],bw=NULL,alpha=.99)
n.contour=length(hr.tmp)
mat=matrix(0, nrow = n.contour, ncol = 1)
for(i in 1:n.contour) {
mat[i]=length(hr.tmp[[i]]$x)
}
hr=owin(poly=list(list(x=rev(hr.tmp[[which.max(mat)]]$x[-1]),y=rev(hr.tmp[[which.max(mat)]]$y[-1]))))
pts=inside.owin(mtnlion.df$x,mtnlion.df$y,hr)
pts=data.frame(x=mtnlion.df$x,y=mtnlion.df$y,inout=pts)
pts=subset(pts,pts$inout=="TRUE")
pts.ppp=ppp(x=pts$x,y=pts$y,window=hr)
set.seed(101)
n.bg=1000
bg.ppp=rpoint(n.bg,win=hr)
layout(matrix(1:2,1,2,byrow=TRUE))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=rgb(0,0,0,alpha=.4),cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="telemetry data")
points(pts.ppp,pch=19,col=rgb(0,0,0,alpha=.6))
plot(bg.ppp,type="n",cex=1.5,pch=19,col=rgb(0,0,0,alpha=.4),cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="background sample")
points(bg.ppp,pch=19,col=rgb(0,0,0,alpha=.6))
We set the homerange polygon as the 99% KDE isopleth of the KDE for the telemetry data (notice that we use the `draw.contour’ function that is read in with the data set).
grid.hr.TF=inside.owin(grid.centers[,1],grid.centers[,2],hr)
elev.NA.rast=elevation.rast
values(elev.NA.rast)[!grid.hr.TF]=NA
mtnlion.count.rast=elev.NA.rast
values(mtnlion.count.rast)[grid.hr.TF]=0
mtnlion.cells=cellFromXY(elev.NA.rast,mtnlion.df[,1:2])
mtnlion.table=table(mtnlion.cells)
mtnlion.cell.idx=as.numeric(attr(table(mtnlion.cells),"dimnames")$mtnlion.cells)
values(mtnlion.count.rast)[mtnlion.cell.idx]=mtnlion.table
layout(matrix(1:2,1,2))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=rgb(0,0,0,alpha=.4),cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="telemetry data")
points(pts.ppp,pch=19,col=rgb(0,0,0,alpha=.6))
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=0,cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="gridded counts")
image(mtnlion.count.rast,col=gray.colors.rev(100),add=TRUE)
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))
elev.NA.rast=elevation.rast
slope.NA.rast=slope.rast
exposure.NA.rast=exposure.rast
values(elev.NA.rast)[!grid.hr.TF]=NA
values(slope.NA.rast)[!grid.hr.TF]=NA
values(exposure.NA.rast)[!grid.hr.TF]=NA
values(elev.NA.rast)[grid.hr.TF]=scale(values(elev.NA.rast)[grid.hr.TF])
values(slope.NA.rast)[grid.hr.TF]=scale(values(slope.NA.rast)[grid.hr.TF])
values(exposure.NA.rast)[grid.hr.TF]=scale(values(exposure.NA.rast)[grid.hr.TF])
layout(matrix(1:3,1,3))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=0,cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="elevation")
image(elev.NA.rast,col=gray.colors.rev(100),add=TRUE)
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=0,cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="slope")
image(slope.NA.rast,col=gray.colors.rev(100),add=TRUE)
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))
plot(pts.ppp,type="n",cex=1.5,pch=19,col=0,cex.axis=1.2,cex.lab=1.5,cex.main=1.5,xlab="",ylab="",asp=TRUE,main="exposure")
image(exposure.NA.rast,col=gray.colors.rev(100),add=TRUE)
lines(hr$bdry[[1]],lwd=3,col=rgb(0,0,0,alpha=.4))
mtnlion.count.rast=elev.NA.rast
values(mtnlion.count.rast)[grid.hr.TF]=0
mtnlion.cells=cellFromXY(elev.NA.rast,mtnlion.df[,1:2])
mtnlion.table=table(mtnlion.cells)
mtnlion.cell.idx=as.numeric(attr(table(mtnlion.cells),"dimnames")$mtnlion.cells)
values(mtnlion.count.rast)[mtnlion.cell.idx]=mtnlion.table
rsf.1.df=data.frame(y=values(mtnlion.count.rast),elev=values(elev.NA.rast),slope=values(slope.NA.rast),exposure=values(exposure.NA.rast))
cor(rsf.1.df[!is.na(rsf.1.df$y),]) # check for colinearity
## y elev slope exposure
## y 1.00000000 -0.0970971 0.07448481 -0.0761522
## elev -0.09709710 1.0000000 0.12040234 0.2131342
## slope 0.07448481 0.1204023 1.00000000 -0.1903686
## exposure -0.07615220 0.2131342 -0.19036861 1.0000000
bg.cells=cellFromXY(elev.NA.rast,cbind(bg.ppp$x,bg.ppp$y))
y.binary=rep(0,n+n.bg)
elev.binary=rep(0,n+n.bg)
slope.binary=rep(0,n+n.bg)
exposure.binary=rep(0,n+n.bg)
y.binary[1:n]=1
elev.binary[1:n]=values(elev.NA.rast)[mtnlion.cells]
slope.binary[1:n]=values(slope.NA.rast)[mtnlion.cells]
exposure.binary[1:n]=values(exposure.NA.rast)[mtnlion.cells]
elev.binary[-(1:n)]=values(elev.NA.rast)[bg.cells]
slope.binary[-(1:n)]=values(slope.NA.rast)[bg.cells]
exposure.binary[-(1:n)]=values(exposure.NA.rast)[bg.cells]
rsf.2.df=data.frame(y=y.binary,elev=elev.binary,slope=slope.binary,exposure=exposure.binary)
summary(glm(y~elev+slope+exposure,family=poisson(link="log"),data=rsf.1.df))
##
## Call:
## glm(formula = y ~ elev + slope + exposure, family = poisson(link = "log"),
## data = rsf.1.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8486 -0.5696 -0.4847 -0.3989 3.5346
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.0524 0.1131 -18.152 < 2e-16 ***
## elev -0.2946 0.1116 -2.639 0.00832 **
## slope 0.2148 0.1020 2.106 0.03519 *
## exposure -0.1306 0.1223 -1.068 0.28546
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 397.09 on 653 degrees of freedom
## Residual deviance: 382.66 on 650 degrees of freedom
## (781 observations deleted due to missingness)
## AIC: 554.17
##
## Number of Fisher Scoring iterations: 6
summary(glm(y~elev+slope+exposure,family=binomial(link="logit"),data=rsf.2.df))
##
## Call:
## glm(formula = y ~ elev + slope + exposure, family = binomial(link = "logit"),
## data = rsf.2.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6941 -0.4604 -0.3975 -0.3239 2.4566
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.4506 0.1175 -20.861 < 2e-16 ***
## elev -0.3111 0.1180 -2.637 0.00836 **
## slope 0.2437 0.1142 2.134 0.03287 *
## exposure -0.1396 0.1251 -1.116 0.26446
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 622.05 on 1066 degrees of freedom
## Residual deviance: 606.42 on 1063 degrees of freedom
## (24 observations deleted due to missingness)
## AIC: 614.42
##
## Number of Fisher Scoring iterations: 5
In these approximations, we ignore the intercept because it scales with either the background sample size or grid cell size. However, notice how similar the resulting point estimates for the regression slope coefficients are. Note: There are a few NA values in the data set because of missing covariates, but `glm’ omits those records automatically.
rsf.noNA.df=rsf.2.df[!apply(is.na(rsf.2.df),1,any),] # omit points w/ NA values for covariates
y=rsf.noNA.df$y
X=model.matrix(~elev+slope+exposure,data=rsf.noNA.df)
m=length(y)
library(rjags)
## Loading required package: coda
## Linked to JAGS 4.3.1
## Loaded modules: basemod,bugs
m.jags <-"
model{
for(i in 1:m){
y[i] ~ dbern(p[i])
logit(p[i]) <- b0+b1*X[i,2]+b2*X[i,3]+b3*X[i,4]
}
b0 ~ dnorm(mu,tau)
b1 ~ dnorm(mu,tau)
b2 ~ dnorm(mu,tau)
b3 ~ dnorm(mu,tau)
}
"
mod<-textConnection(m.jags)
mu=0
tau=1/2.25
m.out<-jags.model(mod,data=list('y'=y,'m'=m,'X'=X,'mu'=mu,'tau'=tau),n.chains=1,n.adapt=0) # build model and algorithm
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1067
## Unobserved stochastic nodes: 4
## Total graph size: 7977
##
## Initializing model
n.mcmc=50000
n.burn=round(.2*n.mcmc)
update(m.out,n.burn) # perform burn-in
m.samples=jags.samples(m.out,c('b0','b1','b2','b3'),n.mcmc) # fit model post-burn-in
## NOTE: Stopping adaptation
beta.post.mat=cbind(m.samples$b0[1,,1],m.samples$b1[1,,1],m.samples$b2[1,,1],m.samples$b3[1,,1])
matplot(beta.post.mat,type="l",lty=1,ylab=bquote(beta)) # trace plots
vioplot(data.frame(beta.post.mat),names=expression(beta[0],beta[1],beta[2],beta[3]))
abline(h=0,col=8)
apply(beta.post.mat,2,mean) # marginal posterior means for beta
## [1] -2.4555148 -0.3084521 0.2394828 -0.1429866
apply(beta.post.mat,2,sd) # marginal posterior std devs for beta
## [1] 0.1174979 0.1168483 0.1139947 0.1253690
apply(beta.post.mat,2,quantile,c(0.025,.975)) # marginal posterior 95% CI for beta
## [,1] [,2] [,3] [,4]
## 2.5% -2.693008 -0.54049594 0.01530283 -0.3937890
## 97.5% -2.231599 -0.08284324 0.46167554 0.0980695
Develop the Poisson Bayesian GLM to approximate the point process model. Fit it to the gridded count data above and compare the inference to the Bernoulli approximation (and the MLEs).
rsf.noNA.df=rsf.1.df[!apply(is.na(rsf.1.df),1,any),] # omit points w/ NA values for covariates
y=rsf.noNA.df$y
X=model.matrix(~elev+slope+exposure,data=rsf.noNA.df)
m=length(y)
m.jags <-"
model{
for(i in 1:m){
y[i] ~ dpois(lam[i])
log(lam[i]) <- b0+b1*X[i,2]+b2*X[i,3]+b3*X[i,4]
}
b0 ~ dnorm(mu,tau)
b1 ~ dnorm(mu,tau)
b2 ~ dnorm(mu,tau)
b3 ~ dnorm(mu,tau)
}
"
mod<-textConnection(m.jags)
mu=0
tau=1/100
m.out<-jags.model(mod,data=list('y'=y,'m'=m,'X'=X,'mu'=mu,'tau'=tau),n.chains=1,n.adapt=0) # build model and algorithm
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 654
## Unobserved stochastic nodes: 4
## Total graph size: 6547
##
## Initializing model
n.mcmc=50000
n.burn=round(.2*n.mcmc)
update(m.out,n.burn) # perform burn-in
m.samples=jags.samples(m.out,c('b0','b1','b2','b3'),n.mcmc) # fit model post-burn-in
## NOTE: Stopping adaptation
beta.post.mat=cbind(m.samples$b0[1,,1],m.samples$b1[1,,1],m.samples$b2[1,,1],m.samples$b3[1,,1])
matplot(beta.post.mat,type="l",lty=1,ylab=bquote(beta)) # trace plots
library(vioplot)
vioplot(data.frame(beta.post.mat),names=expression(beta[0],beta[1],beta[2],beta[3]))
abline(h=0,col=8)
apply(beta.post.mat,2,mean) # marginal posterior means for beta
## [1] -2.0759639 -0.2958426 0.2129974 -0.1355266
apply(beta.post.mat,2,sd) # marginal posterior std devs for beta
## [1] 0.1143488 0.1114723 0.1016564 0.1228646
apply(beta.post.mat,2,quantile,c(0.025,.975)) # marginal posterior 95% CI for beta
## [,1] [,2] [,3] [,4]
## 2.5% -2.307466 -0.51709649 0.01166307 -0.3829873
## 97.5% -1.859671 -0.08049771 0.40986871 0.0990237