# Incorporating Expert Range Maps in MaxEnt-style SDMs

#### November 2016

library(ggplot2)
library(foreach)
registerDoSEQ()
library(dplyr)
library(tidyr)
library(bossMaps)
library(raster)

### Load example data for the Solitary Tinamou

species=c("Tinamus_solitarius")

data("Tinamus_solitarius_points")
data("Tinamus_solitarius_range")
data("Tinamus_solitarius_env")

points=Tinamus_solitarius_points
range=Tinamus_solitarius_range
env=Tinamus_solitarius_env

## Calculate range distances

Calculate distance-to-range: this is slow but only has to be done once per species. This can be sped up by increasing fact (at the expense of lower accuracy).

rdist = rangeDist(
range = range,
points = points,
domain = env,
domainkm = 1000,
fact = 2
)
## Rasterizing range to ROI
## Calculating distances
## Resampling back to original resolution
## Crop environmental data to this new domain (based on domainkm)
env_domain = crop(env, rdist)

## Mask pixels with no environmenta data (over ocean, etc.)
names(rdist) = "rangeDist"

plot(rdist)
plot(points, add = T)

# Calculate Priors

## Evaluate curves

Calculate which curve parameter combinations are feasible given the domain and range geometry.

rates=checkRates(rdist)
## Calculating the range distance frequency table, you can also provide this with the dists parameter.
## Fitting the expert range decay, this can take a while depending on how many values you selected...

## Define curves

vars=expand.grid(
prob=c(0.5, 0.7, 0.9),
rate=c(0,0.01,0.1,10),
skew=0.5,
shift=0,
stringsAsFactors=F)
x=seq(-150,500,len=1000)

## Calculate all the curves

uvars=unique(vars[,c("rate","skew","shift")])
erd=do.call(rbind,
lapply(1:nrow(uvars),function(i) {
y=logistic(x,
parms=unlist(c(lower=0,upper=1,uvars[i,])))
return(cbind.data.frame(
group=i,
c(uvars[i,]),
x=x,
y=y))
})
)  

## Visualize potential decay parameters

ggplot(erd,
aes(
x = x,
y = y,
linetype = as.factor(skew),
colour = as.factor(rate),
group = group
)) +
geom_vline(aes(xintercept = 0), colour = "red") +
geom_line() +
xlab("Prior value (not normalized)") +
xlab("Distance to range edge (km)")

# Process priors

## Calculate frequency table of distances

In order to speed up optimization of range offsets, we calculate the rangeOffset() on a frequency distribution instead of the full raster. Calculate that now with freq().

dists = freq(rdist, useNA = "no", digits = 2)
knitr::kable(head(dists))
value count
-289.97 1
-289.50 1
-288.72 1
-288.45 1
-280.91 1
-280.47 1

## Calculate the priors

mcoptions <- list(preschedule = FALSE, set.seed = FALSE)
foreach(i = 1:nrow(vars), .options.multicore = mcoptions) %do% {
## calculate the expert range prior
fo = paste0(species, "_prior_", paste(vars[i, ], collapse = "_"), ".grd")
if (file.exists(fo))  return(NULL)
expert = rangeOffset(
rdist,
parms = unlist(vars[i, ]),
dists = dists,
doNormalize = T,
verbose = T,
doWriteRaster = T,
filename = fo,
overwrite = T,
datatype = "FLT4S"
)
}

## Stack the priors

fs = list.files(pattern = "prior.*grd$", full = T, recursive = F) priors = stack(fs) ## build prior table from metadata priorf = foreach(i = 1:nlayers(priors), .combine = bind_rows) %do% { t1 = metadata(priors[[i]]) t2 = t1$parms
names(t2) = t1$pnames return(data.frame(id = i, t(t2))) } names(priors) = paste0("prior", priorf$id)#basename(fs[wp])

# MaxEnt SDM with glm()

## Assemble modeling dataset

## build single raster stack of all needed data (env and priors)
rdata = stack(env, priors)

## generate presence and non-detection datasets
pres = cbind.data.frame(
presence = 1,
raster::extract(rdata, points, df =
T, ID = F))
ns = 10000
abs = cbind.data.frame(
presence = 0,
ID = 1:ns,
sampleRandom(rdata, ns))

data = rbind.data.frame(
pres,
abs)

## Fit models

data$weight = 1e-6 best.var = names(env) # number of non-NA cells nc = cellStats(!is.na(priors[[1]]), sum) data$weight[data$presence == 0] = nc / sum(data$presence == 0)

## Set up models
formulas = paste(
"presence/weight ~",
" offset(log(",
grep("prior",
colnames(data),
value = T),'))+',
paste0(sapply(
best.var, function(ii) {
sapply(best.var, function(jj) {
paste0(ii, "*", jj)
})
}), collapse = '+'),
'+',
paste0('I(', best.var, '^2)', collapse = '+'),
'+',
paste0(best.var, collapse = ':')
)

formulas[1]
## [1] "presence/weight ~  offset(log( prior1 ))+ bio1*bio1+bio1*bio12+bio12*bio1+bio12*bio12 + I(bio1^2)+I(bio12^2) + bio1:bio12"
mods = foreach(f = formulas) %do% {
glm(
as.formula(f),
data = data,
weights = weight
)
}

priorf$AIC=unlist(lapply(mods,function(x) AIC(x))) ## Make spatial predictions mcoptions <- list(preschedule = FALSE, set.seed = FALSE) ptype = "response" psi = 1:nrow(priorf) ps = foreach(i = psi, .options.multicore = mcoptions) %dopar% { fo = paste0( species,"_posterior_", priorf$prob[i],"_",
priorf$rate[i],"_", priorf$skew[i],"_",
priorf$shift[i], ".grd" ) if (file.exists(fo)) return(NULL) #if(!file.exists(fo)) return("NOT YET") normalize( raster::predict( rdata, mods[[i]], type = ptype), file = fo, overwrite = T) } psf = list.files(pattern = "posterior.*.grd", full = T) ps = stack(psf) names(ps) = sub("prior", "posterior", names(priors)[psi]) ## Full maps over grid of rate vs.Â prob res = 1e4 p_psn = rasterVis::gplot(ps, max = res) + geom_raster(aes(fill = value)) p_psn$data$id = as.numeric(sub("prior|posterior", "", p_psn$data$variable)) p_psn$data$rate = priorf$rate[p_psn$data$id]
p_psn$data$prob = priorf$prob[p_psn$data$id] p_psn + facet_grid(prob ~ rate, labeller = label_both) + scale_fill_gradientn( colours = hcols(100, bias = .5), trans = "log10", name = "Relative Occurrence Rate\np(x|Y=1)", na.value = "transparent" ) + geom_polygon( aes(x = long, y = lat, group = group), data = fortify(range), fill = "transparent", col = "black", size = .2 ) + geom_point( aes(x = coords.x1, y = coords.x2), data = as.data.frame(coordinates(points[points$presence == 1,])),
col = "black",
size = .5
) +
geom_text(
aes(
-30,-40,
label = paste0("AIC=", round(AIC),
ifelse(
round(fitbuffer) == 0, "",
paste0("\nFitDist=", round(fitbuffer), " km")
)),
group = NULL
),
data = priorf,
hjust = 1,
size = 2
) +
ylab("Latitude") + xlab("Longitude") +
coord_equal()

## Transects across all priors

### Extract data along transects

## Extract transect
transect = SpatialLinesDataFrame(SpatialLines(list(Lines(list(
Line(cbind(c(-51.85,-62.45), c(-26.01,-14.82)))
), ID = "a"))),
data.frame(Z = c("transect"), row.names = c("a")))

trans = do.call(
rbind.data.frame,
raster::extract(
stack(rdist, rdata, ps),
transect,
along = T,
cellnumbers = T
))

trans[, c("lon", "lat")] = coordinates(rdata)[trans$cell] ## get order to identify non-monotonic increase trans$order = 1:nrow(trans)
## drop pixels in which range dist is decreasing as order increases
## This is to remove situation if transect starts from not-center of rangemap
## e.g. plot(order~rangeDist,data=trans)
trans = trans[trans$order > trans$order[which.min(trans$rangeDist)], ] transl = group_by(trans, lon, lat) %>% gather(variable, value,-lon,-lat,-cell,-rangeDist,-order) ## separate prior/posterior data transp = filter(transl, !variable %in% c("bio1", "bio12")) ## add prior id column transp$type = ifelse(grepl("prior", transp$variable), "Offset", "Prediction") transp$id = as.numeric(sub("prior|posterior", "", transp$variable)) ## order levels for convenient plotting transp$type = factor(transp$type, levels = c("Prediction", "Offset"), ordered = T) ## add prior information transp$rate = priorf$rate[match(transp$id, priorf$id)] transp$prob = priorf$prob[match(transp$id, priorf$id)] transp = transp[order(transp$rangeDist), ]
transp$label = factor(paste0("Rate=", transp$rate, " Prob=", transp\$prob), ordered =
T)
ggplot(transp, aes(
x = rangeDist,
y = value,
colour = type,
group = type
)) +
scale_y_log10() +
facet_grid(prob ~ rate, labeller = label_both) +
geom_vline(xintercept = 0,
linetype = "dashed",
colour = "black") +
geom_path() +
xlab("Distance from range edge") +
ylab("Relative Occurrence Rate P(X|Y=1)") +
scale_color_manual(values = c("red", "black")) +
ggtitle(paste(species, " prior and posterior values along transect"))