The LORI model with covariates

lori is a two-fold extension of the log-bilinear model. First, it allows to incorporate general covariates. Secondly, it includes a regularization through the nuclear norm penalty of the interaction matrix. The model is described as follows. Let \(Y\in\mathbb{R}^{n\times p}\) be a contingency table modeled as Poisson variables. Let \(X\in\mathbb{R}^{n\times p}\) be a parameter matrix such that for all \(1\leq i\leq n\) and \(1\leq j\leq p\), and denoting \(\mathcal{P}(l)\) the Poisson distribution of parameter \(l\): \[Y_{ij}|X_{ij} \sim \mathcal{P}(\mathrm{e}^X_{ij}).\] Let \(R\in\mathbb{R}^{m_1\times K_1}\) (resp. \(C\in\mathbb{R}^{K_2\times m_2}\)) be matrices of known row (resp. column) covariates, and \(\alpha \in \mathbb{R}^{m_1\times K_2}\) (resp. \(\beta \in \mathbb{R}^{K_1\times m_2}\)) be unknown parameters. We model the matrix \(X\) as follows: \begin{equation} \label{eq:log-bilinear-cov} \bar{X}=R\mu C{\top}+\alpha{\top}C{\top}+R\beta+\Theta. \end{equation} In ecology, the row features \(R\) embed geographical information about the environments (slope, temperature, etc.), and \(C\) codes physical traits about species (height, mass, etc.). \(R\mu C^{\top}\) incorporates \textit{interactions} between covariates, \(R\beta\) (resp. \(C\alpha\)) contains \textit{interactions} between environment covariates and species (resp. species covariates and environments), which are not explained by the species covariates (resp. environment covariates). It actually corresponds to \textit{interactions}, because the linear combination of the environments covariates (the main effect) is different for every species, \textit{i.e} \[(R\beta)_{ij} = \sum_{k=1}^{K_1}R_{ik}\beta_{kj}.\] Lastly, \(\Theta\) corresponds to the interaction unexplained by the known covariates \(R\) and \(C\). We perform the estimation by maximizing a penalized Poisson negative log-likelihood and obtain the estimator \[(\hat \mu,\hat{\alpha},\hat{\beta},\hat{\Theta})=\text{argmin}_{\mu,\alpha,\beta,\Theta}\quad \Phi(R\mu C^{\top}+ \alpha^{\top}C^{\top},R\beta,\Theta)+\gamma\left\|{\Theta}\right\|_*,\ s.t.\quad \alpha R = 0, \beta C = 0, R^{\top}\Theta = 0, \Theta C = 0.\]

\[\Phi(X)=-\frac{1}{m_1m_2}\sum_{i=1}^{m_1}\sum_{j=1}^{m_2}\left( Y_{ij}X_{ij} - \exp(X_{ij})\right).\]

The Aravo dataset

The Aravo dataset contains the abundances of 82 species of Alpine plants across 75 environments. Species traits and environmental variables - i.e. covariates - are also available. We use this dataset to provide code examples to use the lori package and to illustrate the advantage of using lori to regularize and incorporate covariates.

library(ade4)
data(aravo)
library(lori)
library(FactoMineR)
## 
## Attaching package: 'FactoMineR'
## The following object is masked from 'package:ade4':
## 
##     reconst
# Reduce size of data so that code runs quicker (remove lines to perform full experiment -> 45')
Y <- aravo$spe[1:35, 1:40]
R <- aravo$env[1:35, ]
C <- aravo$traits[1:40, ]
data(aravo)
m1 <- 35
m2 <- 40

# Code categorical variables as indicators
R$ZoogDno <- rep(0,m1)
R$ZoogDsome <- rep(0,m1)
R$ZoogDhigh <- rep(0,m1)
R$ZoogDno[R$ZoogD=="no"] <- 1
R$ZoogDsome[R$ZoogD=="some"] <- 1
R$ZoogDhigh[R$ZoogD=="high"] <- 1

R$Form1 <- rep(0,m1)
R$Form2 <- rep(0,m1)
R$Form3 <- rep(0,m1)
R$Form4 <- rep(0,m1)
R$Form5 <- rep(0,m1)
R$Form1[R$Form==1] <- 1
R$Form2[R$Form==2] <- 1
R$Form3[R$Form==3] <- 1
R$Form4[R$Form==4] <- 1
R$Form5[R$Form==5] <- 1

R <- R[, c(1,2,4,6:14)]

Compute the estimator without looking at the covariates

# Run LORI algorithm without using covariates 
qut_no_covariates = lori(Y)
## [1] "computing empirical distribution..."
## [1] "starting optimization..."
## [1] "iteration: 1000"
## [1] "objective step: -6.45213216188267e-10"
## [1] "iteration: 2000"
## [1] "objective step: -4.48346137993383e-10"
## [1] "iteration: 3000"
## [1] "objective step: -3.23840176896795e-10"
## [1] "iteration: 4000"
## [1] "objective step: -2.39543607172266e-10"
## [1] "iteration: 5000"
## [1] "objective step: -1.80303771912804e-10"
## [1] "iteration: 6000"
## [1] "objective step: -1.37594824423104e-10"
## [1] "iteration: 7000"
## [1] "objective step: -1.06193387416909e-10"
## [1] "iteration: 8000"
## [1] "objective step: -8.27359292188135e-11"
## [1] "iteration: 9000"
## [1] "objective step: -6.49773568284218e-11"
## [1] "iteration: 10000"
## [1] "objective step: -5.13797893120227e-11"
## [1] "iteration: 11000"
## [1] "objective step: -4.08667544249397e-11"
## [1] "iteration: 12000"
## [1] "objective step: -3.26698668118297e-11"
## [1] "iteration: 13000"
## [1] "objective step: -2.6232793715053e-11"
## [1] "iteration: 14000"
## [1] "objective step: -2.1145862838523e-11"
## [1] "iteration: 15000"
## [1] "objective step: -1.71040959173752e-11"
## [1] "iteration: 16000"
## [1] "objective step: -1.38774547409071e-11"
## [1] "iteration: 17000"
## [1] "objective step: -1.12910791827403e-11"
## [1] "iteration: 18000"
## [1] "objective step: -9.21052123459276e-12"
## [1] "iteration: 19000"
## [1] "objective step: -7.53141993214967e-12"
## [1] "iteration: 20000"
## [1] "objective step: -6.17250695000848e-12"
## [1] "model converged !"

Draw correlation and scatter plots

plot_interaction(qut_no_covariates$Theta, selectRow = "contrib 10", selectCol = "contrib 10")

plot of chunk plots-qut-rc

plot_interaction(cbind(qut_no_covariates$Theta, R), quanti.sup = 41:52, choix = "quanti.sup")

plot of chunk plots-qut-rc

# Define projection on aravo covariates
Xr <- R
Xr <- qr(Xr)
Xr <- qr.Q(Xr)
Xc <- C
Xc <- qr(Xc)
Xc <- qr.Q(Xc)

aravo_projection=function(X){
  X = as.matrix(X)
  p = function(X, Xr, Xc){
  return(X - Xr%*%t(Xr)%*%X - X%*%Xc%*%t(Xc) + Xr%*%t(Xr)%*%X%*%Xc%*%t(Xc))
}
  return(p(X, aravo$R, aravo$C))
}


# Run algorithm with QUT using the known covariates 
#------------------------------------------------------------------------------------
# this takes a while with entire data set (45 minutes)
qut_covariates = lori(Y, R, C)
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "computing empirical distribution..."
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "model converged !"
## [1] "starting optimization..."
## [1] "iteration: 1000"
## [1] "objective step: -2.43639441954713e-10"
## [1] "iteration: 2000"
## [1] "objective step: -8.88873419313541e-11"
## [1] "iteration: 3000"
## [1] "objective step: -4.36595204433843e-11"
## [1] "iteration: 4000"
## [1] "objective step: -2.74152922585813e-11"
## [1] "iteration: 5000"
## [1] "objective step: -2.0148716028956e-11"
## [1] "model converged !"
plot_interaction(qut_no_covariates$Theta, selectRow = "contrib 10", selectCol = "contrib 10")

plot of chunk qut-covariates

plot_interaction(cbind(qut_no_covariates$Theta, R), quanti.sup = 41:52, choix = "quanti.sup")

plot of chunk qut-covariates