Licensed under the Creative Commons attribution-noncommercial license. Please share & remix noncommercially, mentioning its origin.

library(rstan)
library(Deriv)
library(TMB)
library(emdbook)
library(nimble)

calculating derivatives

by hand (ugh)
deriv(..., function.args=...)
- returns

(f1 <- deriv(~ 1/(a+b*exp(-x)), c("a","b"),
             function.arg=c("a","b","x")))

## function (a, b, x) 
## {
##     .expr2 <- exp(-x)
##     .expr4 <- a + b * .expr2
##     .expr6 <- .expr4^2
##     .value <- 1/.expr4
##     .grad <- array(0, c(length(.value), 2L), list(NULL, c("a", 
##         "b")))
##     .grad[, "a"] <- -(1/.expr6)
##     .grad[, "b"] <- -(.expr2/.expr6)
##     attr(.value, "gradient") <- .grad
##     .value
## }

f1_g <- function(a,b,x) {
  attr(f1(a,b,x),"grad")
}

(wasteful but I don’t know of a better pattern)

The Deriv package is much more flexible than the built-in deriv() function: allows user-specified rules

log_dpois_symb <- ~ x*log(lambda)-lambda/lfactorial(x)
## deriv(log_dpois_symb,"lambda")
library("Deriv")
drule[["dpois"]] <- alist(lambda=x/lambda - 1/lfactorial(x),
                          x=log(lambda)-lambda*-digamma(x+1)/lfactorial(x)^2)
Deriv(~dpois(y,1/(1+exp(a+b*x))),c("a","b"))

## {
##     .e2 <- exp(a + b * x)
##     .e3 <- 1 + .e2
##     .e4 <- .e3^2
##     .e7 <- y * .e3 - 1/lfactorial(y)
##     c(a = -(.e2 * .e7/.e4), b = -(x * .e2 * .e7/.e4))
## }

automatic differentiation

“magic” algorithm (Wikipedia) (Griewank 1989; Griewank 1992)
computing objective function and all derivatives for not much more than just the objective function cost
automated version of the chain rule

deriv(~x^2+y^2, c("x","y"))

## expression({
##     .value <- x^2 + y^2
##     .grad <- array(0, c(length(.value), 2L), list(NULL, c("x", 
##         "y")))
##     .grad[, "x"] <- 2 * x
##     .grad[, "y"] <- 2 * y
##     attr(.value, "gradient") <- .grad
##     .value
## })

Laplace approximation

built into TMB
also INLA

TMB

Template Model Builder
autodiff+Laplace
compile() + dyn.load() + MakeADFun() + optimize

TMB challenges

installing compilation tools
vectorization or not?
parameter types

Tadpoles in TMB

writeLines(con="tadpole1.cpp",
           "// Tadpole example
#include <TMB.hpp>
// next two lines are magic
template<class Type> 
Type objective_function<Type>::operator() ()
{
  DATA_VECTOR(Initial);
  DATA_VECTOR(Killed);
  PARAMETER(a); // scalar parameters
  PARAMETER(h);
  Type nll = 0; // negative log-likelihood
  for (int i=0; i < Killed.size(); i++) {
      nll -= dbinom(Killed(i), Initial(i), a/(1+a*h*Initial(i)), true);
  }
  return nll;
}
")

TMB documentation for probability distributions (dbinom, dnorm, …)
TMB convenience functions (invlogit)

compile("tadpole1.cpp")

## [1] 0

dyn.load(dynlib("tadpole1"))
data(ReedfrogFuncresp)
obj <- MakeADFun(data=as.list(ReedfrogFuncresp),
                 parameters=list(a=1, h=1),
                 DLL="tadpole1")

## Constructing atomic D_lgamma

## try out function and gradient
obj$fn(a=1,h=1)

## [1] 488.6209

obj$gr(a=1,h=1)

## outer mgc:  196.8852

##           [,1]     [,2]
## [1,] -5.584761 196.8852

res <- do.call("optim",obj)  ## could also use nlminb etc.

## outer mgc:  196.8852 
## outer mgc:  278.1104 
## outer mgc:  655.7366 
## outer mgc:  989.1003 
## outer mgc:  2499.558 
## outer mgc:  856.1056 
## outer mgc:  427.8839 
## outer mgc:  277.574 
## outer mgc:  211.8005 
## outer mgc:  211.2134 
## outer mgc:  37.91147 
## outer mgc:  1.874676 
## outer mgc:  0.152229

nlminb(start=obj$par,objective=obj$fn,gradient=obj$gr)

## outer mgc:  196.8852

## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation

## outer mgc:  217.0108

## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation

## outer mgc:  1020.448

## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation

## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation

## outer mgc:  1037.339

## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation

## outer mgc:  747.8333

## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation

## outer mgc:  523.6315 
## outer mgc:  510.3677 
## outer mgc:  146.4597 
## outer mgc:  227.4284 
## outer mgc:  176.7625 
## outer mgc:  93.02825 
## outer mgc:  4.113549 
## outer mgc:  1.221435 
## outer mgc:  0.211021 
## outer mgc:  0.004228406 
## outer mgc:  4.758414e-05

## $par
##          a          h 
## 0.52589291 0.01660326 
## 
## $objective
## [1] 46.72136
## 
## $convergence
## [1] 0
## 
## $iterations
## [1] 15
## 
## $evaluations
## function gradient 
##       27       16 
## 
## $message
## [1] "relative convergence (4)"

Inference:

vv <- sdreport(obj)$cov.fixed
sds <- sqrt(diag(vv))
tmbprofile(obj,"a")
tmbroot(obj,"a")

exercise:

Stan

Stan function reference

##     data {        int<lower=1> N;        int<lower=0> Initial[N];        int<lower=0> Killed[N];     } parameters {  real <lower=0>a;    real <lower=0>h;     }     model {        for (i in 1:N) {           Killed[i] ~ binomial(Initial[i], a/(1+a*h*Initial[i]));        } }

m <- stan_model(file = "tadpole.stan", model_name = "tadpole")

fit <- optimizing(m, data = c(as.list(ReedfrogFuncresp),
                              N= nrow(ReedfrogFuncresp)))

NIMBLE

hollingprob <- nimbleFunction(
 run = function(x = integer(1), x0=integer(1), a = double(), h = double()) {
   returnType(double())
   return(-sum(dbinom(x, prob=a/(1+a*h*x0), size=x0, log=TRUE)))
 })
with(ReedfrogFuncresp,hollingprob(Killed, Initial, a=0.01,h=1))

## [1] 730.8627

cc <- compileNimble(hollingprob, showCompilerOutput=TRUE)

## make[1]: Entering directory '/tmp/RtmpUNGxBP/nimble_generatedCode'
## g++ -std=gnu++11 -I"/usr/local/lib/R/include" -DNDEBUG -DR_NO_REMAP   -DEIGEN_MPL2_ONLY=1 -I"/usr/local/lib/R/site-library/nimble/include" -Wno-misleading-indentation -Wno-ignored-attributes -Wno-deprecated-declarations  -I/usr/local/include  -fpic  -g -O2  -c P_1_rcFun_4.cpp -o P_1_rcFun_4.o
## g++ -std=gnu++11 -shared -L/usr/local/lib -o P_1_rcFun_4_05_15_23_33_16.so P_1_rcFun_4.o -L/usr/local/lib/R/site-library/nimble/CppCode -lnimble -Wl,-rpath /usr/local/lib/R/site-library/nimble/CppCode -L/usr/local/lib/R/lib -lRlapack -L/usr/local/lib/R/lib -lRblas
## make[1]: Leaving directory '/tmp/RtmpUNGxBP/nimble_generatedCode'

with(ReedfrogFuncresp,cc(Killed, Initial, a=0.01,h=1))

## [1] 730.8627

other choices

greta: R -> Tensorflow interface. Hamiltonian Monte Carlo only
NIMBLE: modeling language -> MCMC, particle filtering,
Rcpp: C++ in R; make objective function faster …

Griewank, Andreas. 1989. “On Automatic Differentiation.” Mathematical Programming: Recent Developments and Applications 6 (6): 83–107. https://pdfs.semanticscholar.org/0342/12770af42af1db1018807398dbde1308a991.pdf.

———. 1992. “Achieving Logarithmic Growth of Temporal and Spatial Complexity in Reverse Automatic Differentiation.” Optimization Methods and Software 1 (1): 35–54.

Advanced toolboxes for nonlinear optimization

Ben Bolker

18:57 16 May 2019