cc Licensed under the Creative Commons attribution-noncommercial license. Please share & remix noncommercially, mentioning its origin.

library(rstan)
library(Deriv)
library(TMB)
library(emdbook)
library(nimble)

calculating derivatives

(f1 <- deriv(~ 1/(a+b*exp(-x)), c("a","b"),
             function.arg=c("a","b","x")))
## function (a, b, x) 
## {
##     .expr2 <- exp(-x)
##     .expr4 <- a + b * .expr2
##     .expr6 <- .expr4^2
##     .value <- 1/.expr4
##     .grad <- array(0, c(length(.value), 2L), list(NULL, c("a", 
##         "b")))
##     .grad[, "a"] <- -(1/.expr6)
##     .grad[, "b"] <- -(.expr2/.expr6)
##     attr(.value, "gradient") <- .grad
##     .value
## }
f1_g <- function(a,b,x) {
  attr(f1(a,b,x),"grad")
}

(wasteful but I don’t know of a better pattern)

The Deriv package is much more flexible than the built-in deriv() function: allows user-specified rules

log_dpois_symb <- ~ x*log(lambda)-lambda/lfactorial(x)
## deriv(log_dpois_symb,"lambda")
library("Deriv")
drule[["dpois"]] <- alist(lambda=x/lambda - 1/lfactorial(x),
                          x=log(lambda)-lambda*-digamma(x+1)/lfactorial(x)^2)
Deriv(~dpois(y,1/(1+exp(a+b*x))),c("a","b"))
## {
##     .e2 <- exp(a + b * x)
##     .e3 <- 1 + .e2
##     .e4 <- .e3^2
##     .e7 <- y * .e3 - 1/lfactorial(y)
##     c(a = -(.e2 * .e7/.e4), b = -(x * .e2 * .e7/.e4))
## }

automatic differentiation

deriv(~x^2+y^2, c("x","y"))
## expression({
##     .value <- x^2 + y^2
##     .grad <- array(0, c(length(.value), 2L), list(NULL, c("x", 
##         "y")))
##     .grad[, "x"] <- 2 * x
##     .grad[, "y"] <- 2 * y
##     attr(.value, "gradient") <- .grad
##     .value
## })

Laplace approximation

TMB

TMB challenges


Tadpoles in TMB

writeLines(con="tadpole1.cpp",
           "// Tadpole example
#include <TMB.hpp>
// next two lines are magic
template<class Type> 
Type objective_function<Type>::operator() ()
{
  DATA_VECTOR(Initial);
  DATA_VECTOR(Killed);
  PARAMETER(a); // scalar parameters
  PARAMETER(h);
  Type nll = 0; // negative log-likelihood
  for (int i=0; i < Killed.size(); i++) {
      nll -= dbinom(Killed(i), Initial(i), a/(1+a*h*Initial(i)), true);
  }
  return nll;
}
")
compile("tadpole1.cpp")
## [1] 0
dyn.load(dynlib("tadpole1"))
data(ReedfrogFuncresp)
obj <- MakeADFun(data=as.list(ReedfrogFuncresp),
                 parameters=list(a=1, h=1),
                 DLL="tadpole1")
## Constructing atomic D_lgamma
## try out function and gradient
obj$fn(a=1,h=1)
## [1] 488.6209
obj$gr(a=1,h=1)
## outer mgc:  196.8852
##           [,1]     [,2]
## [1,] -5.584761 196.8852
res <- do.call("optim",obj)  ## could also use nlminb etc.
## outer mgc:  196.8852 
## outer mgc:  278.1104 
## outer mgc:  655.7366 
## outer mgc:  989.1003 
## outer mgc:  2499.558 
## outer mgc:  856.1056 
## outer mgc:  427.8839 
## outer mgc:  277.574 
## outer mgc:  211.8005 
## outer mgc:  211.2134 
## outer mgc:  37.91147 
## outer mgc:  1.874676 
## outer mgc:  0.152229
nlminb(start=obj$par,objective=obj$fn,gradient=obj$gr)
## outer mgc:  196.8852
## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation
## outer mgc:  217.0108
## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation
## outer mgc:  1020.448
## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation

## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation
## outer mgc:  1037.339
## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation
## outer mgc:  747.8333
## Warning in nlminb(start = obj$par, objective = obj$fn, gradient = obj$gr):
## NA/NaN function evaluation
## outer mgc:  523.6315 
## outer mgc:  510.3677 
## outer mgc:  146.4597 
## outer mgc:  227.4284 
## outer mgc:  176.7625 
## outer mgc:  93.02825 
## outer mgc:  4.113549 
## outer mgc:  1.221435 
## outer mgc:  0.211021 
## outer mgc:  0.004228406 
## outer mgc:  4.758414e-05
## $par
##          a          h 
## 0.52589291 0.01660326 
## 
## $objective
## [1] 46.72136
## 
## $convergence
## [1] 0
## 
## $iterations
## [1] 15
## 
## $evaluations
## function gradient 
##       27       16 
## 
## $message
## [1] "relative convergence (4)"

Inference:

vv <- sdreport(obj)$cov.fixed
sds <- sqrt(diag(vv))
tmbprofile(obj,"a")
tmbroot(obj,"a")

exercise:

Stan

Stan function reference

##     data {        int<lower=1> N;        int<lower=0> Initial[N];        int<lower=0> Killed[N];     } parameters {  real <lower=0>a;    real <lower=0>h;     }     model {        for (i in 1:N) {           Killed[i] ~ binomial(Initial[i], a/(1+a*h*Initial[i]));        } }
m <- stan_model(file = "tadpole.stan", model_name = "tadpole")
fit <- optimizing(m, data = c(as.list(ReedfrogFuncresp),
                              N= nrow(ReedfrogFuncresp)))

NIMBLE

hollingprob <- nimbleFunction(
 run = function(x = integer(1), x0=integer(1), a = double(), h = double()) {
   returnType(double())
   return(-sum(dbinom(x, prob=a/(1+a*h*x0), size=x0, log=TRUE)))
 })
with(ReedfrogFuncresp,hollingprob(Killed, Initial, a=0.01,h=1))
## [1] 730.8627
cc <- compileNimble(hollingprob, showCompilerOutput=TRUE)
## make[1]: Entering directory '/tmp/RtmpUNGxBP/nimble_generatedCode'
## g++ -std=gnu++11 -I"/usr/local/lib/R/include" -DNDEBUG -DR_NO_REMAP   -DEIGEN_MPL2_ONLY=1 -I"/usr/local/lib/R/site-library/nimble/include" -Wno-misleading-indentation -Wno-ignored-attributes -Wno-deprecated-declarations  -I/usr/local/include  -fpic  -g -O2  -c P_1_rcFun_4.cpp -o P_1_rcFun_4.o
## g++ -std=gnu++11 -shared -L/usr/local/lib -o P_1_rcFun_4_05_15_23_33_16.so P_1_rcFun_4.o -L/usr/local/lib/R/site-library/nimble/CppCode -lnimble -Wl,-rpath /usr/local/lib/R/site-library/nimble/CppCode -L/usr/local/lib/R/lib -lRlapack -L/usr/local/lib/R/lib -lRblas
## make[1]: Leaving directory '/tmp/RtmpUNGxBP/nimble_generatedCode'
with(ReedfrogFuncresp,cc(Killed, Initial, a=0.01,h=1))
## [1] 730.8627

other choices

  • greta: R -> Tensorflow interface. Hamiltonian Monte Carlo only
  • NIMBLE: modeling language -> MCMC, particle filtering,
  • Rcpp: C++ in R; make objective function faster …

Griewank, Andreas. 1989. “On Automatic Differentiation.” Mathematical Programming: Recent Developments and Applications 6 (6): 83–107. https://pdfs.semanticscholar.org/0342/12770af42af1db1018807398dbde1308a991.pdf.

———. 1992. “Achieving Logarithmic Growth of Temporal and Spatial Complexity in Reverse Automatic Differentiation.” Optimization Methods and Software 1 (1): 35–54.