.R
), R markdown (.Rmd
), or Sweave (.Rnw
)setwd()
rm(list=ls())
attach()
stat_sum
(with ggplot
) or plotrix::sizeplot()
so that the graph shows the number of data values at each point. It’s up to you whether to distinguish between series="I"
and series="II"
in the data.dd <- read.csv("../data/beetle3.csv")
library(ggplot2); theme_set(theme_bw())
ggplot(dd,aes(dosage,dead))+stat_sum()+facet_wrap(~series)+
geom_smooth()
Or:
library(plotrix)
with(dd,sizeplot(dosage,dead,ylim=c(-0.2,1.2)))
aggregate
(base R) or group_by
+ summarise
(dplyr
) to compute the proportion killed for each unique dosage value/series combination. Optionally, add another column with the total number of individuals for each dosage value/series combination.## with dplyr:
dd2 <- (dd
%>% group_by(series,dosage)
%>% summarise(tot=n(),prop=mean(dead))
)
## with aggregate:
## (there are various ways to do it but I prefer the formula interface
dd2B <- aggregate(dead~dosage+series, FUN=mean, data=dd)
## to add another column:
dd2B <- aggregate(dead~dosage+series,
FUN=function(x) c(prop=mean(x), tot=length(x)),
data=dd)
## collapse matrix elements
dd2B <- do.call(data.frame, dd2B)
ggplot(dd2,aes(dosage,prop,colour=series))+geom_point(aes(size=tot))+
geom_smooth(aes(weight=tot))
series
and log10(dose)
to the original (disaggregated) data.g1 <- glm(dead ~ series*log10(dosage), data=dd, family=binomial)
Answers will depend on whether you used treatment or sum-to-zero contrasts
Likelihood ratio test of combination of intercepts and slopes:
g0 <- update(g1, . ~ log10(dosage))
anova(g0, g1, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: dead ~ log10(dosage)
## Model 2: dead ~ series * log10(dosage)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 479 372.46
## 2 477 372.46 2 0.00039563 0.9998
(i.e. no evidence that either slopes or intercepts differ)
Wald tests of differences in intercept alone (seriesII
), differences in slope alone (seriesII:log10(dosage)
)
printCoefmat(coef(summary(g1))[c("seriesII","seriesII:log10(dosage)"),])
## Estimate Std. Error z value Pr(>|z|)
## seriesII -0.20470 10.36265 -0.0198 0.9842
## seriesII:log10(dosage) 0.11537 5.82493 0.0198 0.9842
log10(dose)
, ignoring series
.We already did this, more or less:
glm(dead ~ log10(dosage), data=dd, family=binomial)
##
## Call: glm(formula = dead ~ log10(dosage), family = binomial, data = dd)
##
## Coefficients:
## (Intercept) log10(dosage)
## -60.72 34.27
##
## Degrees of Freedom: 480 Total (i.e. Null); 479 Residual
## Null Deviance: 645.4
## Residual Deviance: 372.5 AIC: 376.5
(c1 <- stats::confint.default(g0)["log10(dosage)",])
## 2.5 % 97.5 %
## 28.56520 39.98054
(c2 <- suppressMessages(confint(g0)["log10(dosage)",]))
## 2.5 % 97.5 %
## 28.85651 40.30322
## suppressMessages() to get rid of "Waiting for profiling to be done ..."
## not identical, but **very** close
c1-c2
## 2.5 % 97.5 %
## -0.2913014 -0.3226809
Bootstrap:
bootfun0 <- function() {
bootdat <- dd[sample(nrow(dd),replace=TRUE),]
gb <- update(g0, data=bootdat)
b1 <- coef(gb)["log10(dosage)"]
return(b1)
}
set.seed(101)
bootvals0 <- replicate(1000,bootfun0())
(boot_ci0 <- quantile(bootvals0,c(0.025,0.975)))
## 2.5% 97.5%
## 29.23505 40.74178
Also very similar (bootstrap CIs have an extra stochastic component, as well; we might get slightly different answers if we ran it all with a different random-number seed)
rr <- DHARMa::simulateResiduals(g0, plot=TRUE)
Plot residuals look more or less perfect (which they kind of have to be, since the Bernoulli conditional distribution is necessarily true); quantile values (red vs dotted black lines) look good, suggesting little bias. (I didn’t use pch="."
here, not really useful unless we have a very large data set).
pframe <- with(dd2,
data.frame(dosage=c(min(dosage),mean(dosage),max(dosage))))
## could use 10^mean(log10(dosage)) for central value as well
pp <- predict(g0, newdata=pframe, se.fit=TRUE)
pframe$prob <- plogis(pp$fit)
pframe$lwr <- plogis(pp$fit-1.96*pp$se.fit)
pframe$upr <- plogis(pp$fit+1.96*pp$se.fit)
LD50 <- with(as.list(coef(g0)), -`(Intercept)`/`log10(dosage)`)
Delta method:
## b0 + b1*x = 0 -> x = -b0/b1
b0 <- coef(g0)[["(Intercept)"]]
b1 <- coef(g0)[["log10(dosage)"]]
## derivs: c(-1/b1, b0/b1^2)
grad <- c(-1/b1,b0/b1^2)
(delta_sd <- c(sqrt(grad %*% vcov(g0) %*% grad)))
## [1] 0.003857782
## c() converts result from a matrix to a (length-1) vector
(delta_ci <- (-b0/b1) + 1.96*c(-1,1)*delta_sd)
## [1] 1.764156 1.779278
Lazy method:
library(emdbook)
sqrt(deltavar(-b0/b1,meanval=c(b0=b0,b1=b1),Sigma=vcov(g0)))
## [1] 0.003857782
Bootstrap:
bootfun <- function() {
bootdat <- dd[sample(nrow(dd),replace=TRUE),]
gb <- update(g0, data=bootdat)
b0 <- coef(gb)[1]
b1 <- coef(gb)[2]
return(-b0/b1)
}
set.seed(101)
## a quick way to skip the for() loop
bootvals <- replicate(1000,bootfun())
(boot_ci <- quantile(bootvals,c(0.025,0.975)))
## 2.5% 97.5%
## 1.763499 1.779214
Delta-method and bootstrap results are very similar.