Incorporating Expert Range Maps in MaxEnt-style SDMs

Adam M. Wilson, Cory Merow, & Walter Jetz

November 2016

Load necessary libraries

library(ggplot2)
library(foreach)
registerDoSEQ()
library(dplyr)
library(tidyr)
library(bossMaps)
library(raster)

Load example data for the Solitary Tinamou

species=c("Tinamus_solitarius")


data("Tinamus_solitarius_points")
data("Tinamus_solitarius_range")
data("Tinamus_solitarius_env")

points=Tinamus_solitarius_points
range=Tinamus_solitarius_range
env=Tinamus_solitarius_env

Calculate range distances

Calculate distance-to-range: this is slow but only has to be done once per species. This can be sped up by increasing fact (at the expense of lower accuracy).

rdist = rangeDist(
  range = range,
  points = points,
  domain = env,
  domainkm = 1000,
  mask = F,
  fact = 2
  )
## Rasterizing range to ROI
## Calculating distances
## Resampling back to original resolution
## Crop environmental data to this new domain (based on domainkm)
env_domain = crop(env, rdist)

## Mask pixels with no environmenta data (over ocean, etc.)
rdist = mask(rdist, env_domain[[1]])
names(rdist) = "rangeDist"
  
plot(rdist)
plot(range, add = T)
plot(points, add = T)

Calculate Priors

Evaluate curves

Calculate which curve parameter combinations are feasible given the domain and range geometry.

rates=checkRates(rdist)
## Calculating the range distance frequency table, you can also provide this with the dists parameter.
## Fitting the expert range decay, this can take a while depending on how many values you selected...

Define curves

vars=expand.grid(
  prob=c(0.5, 0.7, 0.9),
  rate=c(0,0.01,0.1,10),
  skew=0.5,
  shift=0,
  stringsAsFactors=F)
x=seq(-150,500,len=1000)

Calculate all the curves

uvars=unique(vars[,c("rate","skew","shift")])
erd=do.call(rbind,
            lapply(1:nrow(uvars),function(i) {
              y=logistic(x,
                         parms=unlist(c(lower=0,upper=1,uvars[i,])))  
              return(cbind.data.frame(
                group=i,
                c(uvars[i,]),
                x=x,
                y=y))
            })
)  

Visualize potential decay parameters

ggplot(erd,
       aes(
       x = x,
       y = y,
       linetype = as.factor(skew),
       colour = as.factor(rate),
       group = group
       )) +
       geom_vline(aes(xintercept = 0), colour = "red") +
       geom_line() +
       xlab("Prior value (not normalized)") +
       xlab("Distance to range edge (km)")

Process priors

Calculate frequency table of distances

In order to speed up optimization of range offsets, we calculate the rangeOffset() on a frequency distribution instead of the full raster. Calculate that now with freq().

dists = freq(rdist, useNA = "no", digits = 2)
knitr::kable(head(dists))
value count
-289.97 1
-289.50 1
-288.72 1
-288.45 1
-280.91 1
-280.47 1

Calculate the priors

mcoptions <- list(preschedule = FALSE, set.seed = FALSE)
foreach(i = 1:nrow(vars), .options.multicore = mcoptions) %do% {
  ## calculate the expert range prior
  fo = paste0(species, "_prior_", paste(vars[i, ], collapse = "_"), ".grd")
  if (file.exists(fo))  return(NULL)
  expert = rangeOffset(
                      rdist,
                      parms = unlist(vars[i, ]),
                      dists = dists,
                      doNormalize = T,
                      verbose = T,
                      doWriteRaster = T,
                      filename = fo,
                      overwrite = T,
                      datatype = "FLT4S"
                      )
}

Stack the priors

fs = list.files(pattern = "prior.*grd$",
                full = T,
                recursive = F)
                
priors = stack(fs)
                
## build prior table from metadata
priorf = foreach(i = 1:nlayers(priors), 
                 .combine = bind_rows) %do% {
  t1 = metadata(priors[[i]])
  t2 = t1$parms
  names(t2) = t1$pnames
  return(data.frame(id = i, t(t2)))
  }

names(priors) = paste0("prior", priorf$id)#basename(fs[wp])

MaxEnt SDM with glm()

Assemble modeling dataset

## build single raster stack of all needed data (env and priors)
rdata = stack(env, priors)

## generate presence and non-detection datasets
pres = cbind.data.frame(
  presence = 1, 
  raster::extract(rdata, points, df =
  T, ID = F))
ns = 10000
abs = cbind.data.frame(
  presence = 0,
  ID = 1:ns,
  sampleRandom(rdata, ns))

data = rbind.data.frame(
  pres, 
  abs)

Fit models

data$weight = 1e-6
best.var = names(env)

# number of non-NA cells
nc = cellStats(!is.na(priors[[1]]), sum)

data$weight[data$presence == 0] = nc / sum(data$presence == 0)

## Set up models
formulas = paste(
  "presence/weight ~",
  " offset(log(",
  grep("prior", 
       colnames(data),
       value = T),'))+',
  paste0(sapply(
    best.var, function(ii) {
      sapply(best.var, function(jj) {
        paste0(ii, "*", jj)
    })
  }), collapse = '+'),
  '+',
  paste0('I(', best.var, '^2)', collapse = '+'),
  '+',
  paste0(best.var, collapse = ':')
)

formulas[1]
## [1] "presence/weight ~  offset(log( prior1 ))+ bio1*bio1+bio1*bio12+bio12*bio1+bio12*bio12 + I(bio1^2)+I(bio12^2) + bio1:bio12"
mods = foreach(f = formulas) %do% {
  glm(
    as.formula(f),
    family = poisson(link = log),
    data = data,
    weights = weight
  )
}

Calculate AIC

priorf$AIC=unlist(lapply(mods,function(x) AIC(x)))

Make spatial predictions

mcoptions <- list(preschedule = FALSE, set.seed = FALSE)

ptype = "response"
psi = 1:nrow(priorf)

ps = foreach(i = psi, 
  .options.multicore = mcoptions) %dopar% {
    fo = paste0(
      species,"_posterior_",
      priorf$prob[i],"_",
      priorf$rate[i],"_",
      priorf$skew[i],"_",
      priorf$shift[i],
      ".grd"
      )
  if (file.exists(fo))
    return(NULL)
  #if(!file.exists(fo)) return("NOT YET")
  normalize(
    raster::predict(
      rdata,
      mods[[i]],
      type = ptype),
    file = fo,
    overwrite = T)
}

psf = list.files(pattern = "posterior.*.grd", full = T)
ps = stack(psf)
names(ps) = sub("prior", "posterior", names(priors)[psi])

Full maps over grid of rate vs. prob

res = 1e4
p_psn = rasterVis::gplot(ps, max = res) + geom_raster(aes(fill = value))
p_psn$data$id = as.numeric(sub("prior|posterior", "", p_psn$data$variable))
p_psn$data$rate = priorf$rate[p_psn$data$id]
p_psn$data$prob = priorf$prob[p_psn$data$id]

p_psn + 
  facet_grid(prob ~ rate, labeller = label_both) +
  scale_fill_gradientn(
    colours = hcols(100, bias = .5),
    trans = "log10",
    name = "Relative Occurrence Rate\np(x|Y=1)",
    na.value = "transparent"
  ) +
  geom_polygon(
    aes(x = long, y = lat, group = group),
    data = fortify(range),
    fill = "transparent",
    col = "black",
    size = .2
  ) +
  geom_point(
    aes(x = coords.x1, y = coords.x2),
    data = as.data.frame(coordinates(points[points$presence == 1,])),
    col = "black",
    size = .5
  ) +
  geom_text(
    aes(
      -30,-40,
      label = paste0("AIC=", round(AIC),
                     ifelse(
                       round(fitbuffer) == 0, "", 
                       paste0("\nFitDist=", round(fitbuffer), " km")
                     )),
      group = NULL
    ),
    data = priorf,
    hjust = 1,
    size = 2
  ) +
  ylab("Latitude") + xlab("Longitude") +
  coord_equal()

Transects across all priors

Extract data along transects

## Extract transect
transect = SpatialLinesDataFrame(SpatialLines(list(Lines(list(
  Line(cbind(c(-51.85,-62.45), c(-26.01,-14.82)))
), ID = "a"))),
data.frame(Z = c("transect"), row.names = c("a")))


trans = do.call(
  rbind.data.frame,
  raster::extract(
    stack(rdist, rdata, ps),
  transect,
  along = T,
  cellnumbers = T
  ))

trans[, c("lon", "lat")] = coordinates(rdata)[trans$cell]
## get order to identify non-monotonic increase
trans$order = 1:nrow(trans)
## drop pixels in which range dist is decreasing as order increases
## This is to remove situation if transect starts from not-center of rangemap
## e.g. plot(order~rangeDist,data=trans)
trans = trans[trans$order > trans$order[which.min(trans$rangeDist)], ]

transl = group_by(trans, lon, lat) %>%
  gather(variable, value,-lon,-lat,-cell,-rangeDist,-order)

## separate prior/posterior data
transp = filter(transl, !variable %in% c("bio1", "bio12"))

## add prior id column
transp$type = ifelse(grepl("prior", transp$variable), "Offset", "Prediction")
transp$id = as.numeric(sub("prior|posterior", "", transp$variable))
## order levels for convenient plotting
transp$type = factor(transp$type,
                     levels = c("Prediction", "Offset"),
                     ordered = T)
## add prior information
transp$rate = priorf$rate[match(transp$id, priorf$id)]
transp$prob = priorf$prob[match(transp$id, priorf$id)]
transp = transp[order(transp$rangeDist), ]
transp$label = factor(paste0("Rate=", transp$rate, " Prob=", transp$prob), ordered =
                        T)
ggplot(transp, aes(
  x = rangeDist,
  y = value,
  colour = type,
  group = type
)) +
  scale_y_log10() +
  facet_grid(prob ~ rate, labeller = label_both) +
  geom_vline(xintercept = 0,
             linetype = "dashed",
             colour = "black") +
  geom_path() +
  xlab("Distance from range edge") +
  ylab("Relative Occurrence Rate P(X|Y=1)") +
  scale_color_manual(values = c("red", "black")) +
  ggtitle(paste(species, " prior and posterior values along transect"))