## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  #tidy.opts=list(width.cutoff=60),
  #tidy=TRUE,
  fig.align = 'center',
  warning = FALSE
)

## ----runtimer, include=FALSE--------------------------------------------------
runstart <- lubridate::now()

## ----setup, message=FALSE-----------------------------------------------------
# load packages
library(boot)
library(car)
library(rTPC)
library(nls.multstart)
library(broom)
library(tidyverse)
library(patchwork)
library(minpack.lm)


## ----load_data, fig.height=4, fig.width=6-------------------------------------
# load in data
data("bacteria_tpc")

# keep just a single curve
d <- filter(bacteria_tpc, phage == 'nophage')

# show the data
ggplot(d, aes(temp, rate)) +
  geom_point(size = 2, alpha = 0.5) +
  theme_bw(base_size = 12) +
  labs(
    x = 'Temperature (ºC)',
    y = 'Growth rate',
    title = 'Growth rate across temperatures'
  )


## ----fit_and_plot, fig.height=4, fig.width=6----------------------------------
# fit Sharpe-Schoolfield model
d_fit <- nest(d, data = c(temp, rate)) %>%
  mutate(
    sharpeschoolhigh = map(
      data,
      ~ nls_multstart(
        rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 15),
        data = .x,
        iter = c(3, 3, 3, 3),
        start_lower = get_start_vals(
          .x$temp,
          .x$rate,
          model_name = 'sharpeschoolhigh_1981'
        ) -
          10,
        start_upper = get_start_vals(
          .x$temp,
          .x$rate,
          model_name = 'sharpeschoolhigh_1981'
        ) +
          10,
        lower = get_lower_lims(
          .x$temp,
          .x$rate,
          model_name = 'sharpeschoolhigh_1981'
        ),
        upper = get_upper_lims(
          .x$temp,
          .x$rate,
          model_name = 'sharpeschoolhigh_1981'
        ),
        supp_errors = 'Y',
        convergence_count = FALSE
      )
    ),
    # create new temperature data
    new_data = map(
      data,
      ~ tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100))
    ),
    # predict over that data,
    preds = map2(sharpeschoolhigh, new_data, ~ augment(.x, newdata = .y))
  )

# unnest predictions
d_preds <- select(d_fit, preds) %>%
  unnest(preds)

# plot data and predictions
ggplot() +
  geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
  geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) +
  theme_bw(base_size = 12) +
  labs(
    x = 'Temperature (ºC)',
    y = 'Growth rate',
    title = 'Growth rate across temperatures'
  )


## ----bootstrap_models1--------------------------------------------------------
# refit model using nlsLM
fit_nlsLM <- minpack.lm::nlsLM(
  rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 15),
  data = d,
  start = coef(d_fit$sharpeschoolhigh[[1]]),
  lower = get_lower_lims(d$temp, d$rate, model_name = 'sharpeschoolhigh_1981'),
  upper = get_upper_lims(d$temp, d$rate, model_name = 'sharpeschoolhigh_1981'),
  weights = rep(1, times = nrow(d))
)

# bootstrap using case resampling
boot1 <- Boot(fit_nlsLM, method = 'case')

# look at the data
head(boot1$t)


## ----hist_boot, fig.height=7, fig.width=8-------------------------------------
hist(boot1, layout = c(2, 2))

## ----plot_boots, fig.height=3, fig.width=8, message=FALSE---------------------
# create predictions of each bootstrapped model
boot1_preds <- boot1$t %>%
  as.data.frame() %>%
  drop_na() %>%
  mutate(iter = 1:n()) %>%
  group_by_all() %>%
  do(data.frame(temp = seq(min(d$temp), max(d$temp), length.out = 100))) %>%
  ungroup() %>%
  mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15))

# calculate bootstrapped confidence intervals
boot1_conf_preds <- group_by(boot1_preds, temp) %>%
  summarise(
    conf_lower = quantile(pred, 0.025),
    conf_upper = quantile(pred, 0.975)
  ) %>%
  ungroup()

# plot bootstrapped CIs
p1 <- ggplot() +
  geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
  geom_ribbon(
    aes(temp, ymin = conf_lower, ymax = conf_upper),
    boot1_conf_preds,
    fill = 'blue',
    alpha = 0.3
  ) +
  geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) +
  theme_bw(base_size = 12) +
  labs(
    x = 'Temperature (ºC)',
    y = 'Growth rate',
    title = 'Growth rate across temperatures'
  )

# plot bootstrapped predictions
p2 <- ggplot() +
  geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
  geom_line(
    aes(temp, pred, group = iter),
    boot1_preds,
    col = 'blue',
    alpha = 0.007
  ) +
  geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) +
  theme_bw(base_size = 12) +
  labs(
    x = 'Temperature (ºC)',
    y = 'Growth rate',
    title = 'Growth rate across temperatures'
  )

p1 + p2


## ----bootstrap_case2, message=FALSE-------------------------------------------
# load in chlorella data
data('chlorella_tpc')

d2 <- filter(chlorella_tpc, curve_id == 1)

# fit Sharpe-Schoolfield model to raw data
d_fit <- nest(d2, data = c(temp, rate)) %>%
  mutate(
    sharpeschoolhigh = map(
      data,
      ~ nls_multstart(
        rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 15),
        data = .x,
        iter = c(3, 3, 3, 3),
        start_lower = get_start_vals(
          .x$temp,
          .x$rate,
          model_name = 'sharpeschoolhigh_1981'
        ) -
          10,
        start_upper = get_start_vals(
          .x$temp,
          .x$rate,
          model_name = 'sharpeschoolhigh_1981'
        ) +
          10,
        lower = get_lower_lims(
          .x$temp,
          .x$rate,
          model_name = 'sharpeschoolhigh_1981'
        ),
        upper = get_upper_lims(
          .x$temp,
          .x$rate,
          model_name = 'sharpeschoolhigh_1981'
        ),
        supp_errors = 'Y',
        convergence_count = FALSE
      )
    ),
    # create new temperature data
    new_data = map(
      data,
      ~ tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100))
    ),
    # predict over that data,
    preds = map2(sharpeschoolhigh, new_data, ~ augment(.x, newdata = .y))
  )

# refit model using nlsLM
fit_nlsLM2 <- nlsLM(
  rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 15),
  data = d2,
  start = coef(d_fit$sharpeschoolhigh[[1]]),
  lower = get_lower_lims(
    d2$temp,
    d2$rate,
    model_name = 'sharpeschoolhigh_1981'
  ),
  upper = get_upper_lims(
    d2$temp,
    d2$rate,
    model_name = 'sharpeschoolhigh_1981'
  ),
  control = nls.lm.control(maxiter = 500),
  weights = rep(1, times = nrow(d2))
)

# bootstrap using case resampling
boot2 <- Boot(fit_nlsLM2, method = 'case')

## ----bootstrap_case2_plot, fig.height=3,fig.width=8---------------------------
# unnest predictions of original model fit
d_preds <- select(d_fit, preds) %>%
  unnest(preds)

# predict over new data
boot2_preds <- boot2$t %>%
  as.data.frame() %>%
  drop_na() %>%
  mutate(iter = 1:n()) %>%
  group_by_all() %>%
  do(data.frame(temp = seq(min(d2$temp), max(d2$temp), length.out = 100))) %>%
  ungroup() %>%
  mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15))

# calculate bootstrapped confidence intervals
boot2_conf_preds <- group_by(boot2_preds, temp) %>%
  summarise(
    conf_lower = quantile(pred, 0.025),
    conf_upper = quantile(pred, 0.975)
  ) %>%
  ungroup()

# plot bootstrapped CIs
p1 <- ggplot() +
  geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
  geom_ribbon(
    aes(temp, ymin = conf_lower, ymax = conf_upper),
    boot2_conf_preds,
    fill = 'blue',
    alpha = 0.3
  ) +
  geom_point(aes(temp, rate), d2, size = 2) +
  theme_bw(base_size = 12) +
  labs(
    x = 'Temperature (ºC)',
    y = 'Growth rate',
    title = 'Growth rate across temperatures'
  )

# plot bootstrapped predictions
p2 <- ggplot() +
  geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
  geom_line(
    aes(temp, pred, group = iter),
    boot2_preds,
    col = 'blue',
    alpha = 0.007
  ) +
  geom_point(aes(temp, rate), d2, size = 2) +
  theme_bw(base_size = 12) +
  labs(
    x = 'Temperature (ºC)',
    y = 'Growth rate',
    title = 'Growth rate across temperatures'
  )

p1 + p2


## ----residual_resample_data, fig.height=3,fig.width=8-------------------------
# bootstrap using residual resampling
boot3 <- Boot(fit_nlsLM2, method = 'residual')

# predict over new data
boot3_preds <- boot3$t %>%
  as.data.frame() %>%
  drop_na() %>%
  mutate(iter = 1:n()) %>%
  group_by_all() %>%
  do(data.frame(temp = seq(min(d2$temp), max(d2$temp), length.out = 100))) %>%
  ungroup() %>%
  mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15))

# calculate bootstrapped confidence intervals
boot3_conf_preds <- group_by(boot3_preds, temp) %>%
  summarise(
    conf_lower = quantile(pred, 0.025),
    conf_upper = quantile(pred, 0.975)
  ) %>%
  ungroup()

# plot bootstrapped CIs
p1 <- ggplot() +
  geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
  geom_ribbon(
    aes(temp, ymin = conf_lower, ymax = conf_upper),
    boot3_conf_preds,
    fill = 'blue',
    alpha = 0.3
  ) +
  geom_point(aes(temp, rate), d2, size = 2) +
  theme_bw(base_size = 12) +
  labs(
    x = 'Temperature (ºC)',
    y = 'Growth rate',
    title = 'Growth rate across temperatures'
  )

# plot bootstrapped predictions
p2 <- ggplot() +
  geom_line(aes(temp, .fitted), d_preds, col = 'blue') +
  geom_line(
    aes(temp, pred, group = iter),
    boot3_preds,
    col = 'blue',
    alpha = 0.007
  ) +
  geom_point(aes(temp, rate), d2, size = 2) +
  theme_bw(base_size = 12) +
  labs(
    x = 'Temperature (ºC)',
    y = 'Growth rate',
    title = 'Growth rate across temperatures'
  )

p1 + p2


## ----confint_bact, error=TRUE, fig.height = 5, fig.width = 7------------------
try({

# First for the bacteria

# get parameters of fitted model
param_bact <- broom::tidy(fit_nlsLM) %>%
  select(param = term, estimate)

# calculate confidence intervals of models
ci_bact1 <- nlstools::confint2(fit_nlsLM, method = 'asymptotic') %>%
  as.data.frame() %>%
  rename(conf_lower = 1, conf_upper = 2) %>%
  rownames_to_column(., var = 'param') %>%
  mutate(method = 'asymptotic')
ci_bact2 <- confint(fit_nlsLM) %>%
  as.data.frame() %>%
  rename(conf_lower = 1, conf_upper = 2) %>%
  rownames_to_column(., var = 'param') %>%
  mutate(method = 'profile')

# CIs from case resampling
ci_bact3 <- confint(boot1, method = 'bca') %>%
  as.data.frame() %>%
  rename(conf_lower = 1, conf_upper = 2) %>%
  rownames_to_column(., var = 'param') %>%
  mutate(method = 'case bootstrap')

# CIs from residual resampling
ci_bact4 <- Boot(fit_nlsLM, method = 'residual') %>%
  confint(., method = 'bca') %>%
  as.data.frame() %>%
  rename(conf_lower = 1, conf_upper = 2) %>%
  rownames_to_column(., var = 'param') %>%
  mutate(method = 'residual bootstrap')

ci_bact <- bind_rows(ci_bact1, ci_bact2, ci_bact3, ci_bact4) %>%
  left_join(., param_bact)

# plot
ggplot(
  ci_bact,
  aes(
    forcats::fct_relevel(method, c('profile', 'asymptotic')),
    estimate,
    col = method
  )
) +
  geom_hline(
    aes(yintercept = conf_lower),
    linetype = 2,
    filter(ci_bact, method == 'profile')
  ) +
  geom_hline(
    aes(yintercept = conf_upper),
    linetype = 2,
    filter(ci_bact, method == 'profile')
  ) +
  geom_point(size = 4) +
  geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
  theme_bw() +
  facet_wrap(~param, scales = 'free') +
  scale_x_discrete('', labels = function(x) stringr::str_wrap(x, width = 10)) +
  labs(
    title = 'Calculation of confidence intervals for model parameters',
    subtitle = 'For the bacteria TPC; dashed lines are CI of profiling method'
  )


})

## ----confint_chlor, error=TRUE, fig.height = 5, fig.width = 7, warning=FALSE----
try({
# Second for Chlorella data

# get parameters of fitted model
param_chlor <- broom::tidy(fit_nlsLM2) %>%
  select(param = term, estimate)

# calculate confidence intervals of models
ci_chlor1 <- nlstools::confint2(fit_nlsLM2, method = 'asymptotic') %>%
  as.data.frame() %>%
  rename(conf_lower = 1, conf_upper = 2) %>%
  rownames_to_column(., var = 'param') %>%
  mutate(method = 'asymptotic')
ci_chlor2 <- nlstools::confint2(fit_nlsLM2, method = 'profile')
# profiling method fails
ci_chlor2 <- mutate(
  ci_chlor1,
  method = 'profile',
  conf_lower = NA,
  conf_upper = NA
)

# CIs from case resampling
ci_chlor3 <- confint(boot2, method = 'bca') %>%
  as.data.frame() %>%
  rename(conf_lower = 1, conf_upper = 2) %>%
  rownames_to_column(., var = 'param') %>%
  mutate(method = 'case bootstrap')

# CIs from residual resampling
ci_chlor4 <- confint(boot3, method = 'bca') %>%
  as.data.frame() %>%
  rename(conf_lower = 1, conf_upper = 2) %>%
  rownames_to_column(., var = 'param') %>%
  mutate(method = 'residual bootstrap')

ci_chlor <- bind_rows(ci_chlor1, ci_chlor2, ci_chlor3, ci_chlor4) %>%
  full_join(., param_chlor)

ggplot(
  ci_chlor,
  aes(
    forcats::fct_relevel(method, c('profile', 'asymptotic')),
    estimate,
    col = method
  )
) +
  geom_point(size = 4) +
  geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
  theme_bw() +
  facet_wrap(~param, scales = 'free') +
  scale_x_discrete('', labels = function(x) stringr::str_wrap(x, width = 10)) +
  labs(
    title = 'Calculation of confidence intervals for model parameters',
    subtitle = 'For the chlorella TPC; profile method failes'
  )


})

## ----ci_calc_param, fig.width = 7, fig.height = 6-----------------------------
extra_params <- calc_params(fit_nlsLM) %>%
  pivot_longer(everything(), names_to = 'param', values_to = 'estimate')

ci_extra_params <- Boot(
  fit_nlsLM,
  f = function(x) {
    unlist(calc_params(x))
  },
  labels = names(calc_params(fit_nlsLM)),
  R = 200,
  method = 'case'
) %>%
  confint(., method = 'bca') %>%
  as.data.frame() %>%
  rename(conf_lower = 1, conf_upper = 2) %>%
  rownames_to_column(., var = 'param') %>%
  mutate(method = 'case bootstrap')

ci_extra_params <- left_join(ci_extra_params, extra_params)

ggplot(ci_extra_params, aes(param, estimate)) +
  geom_point(size = 4) +
  geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) +
  theme_bw() +
  facet_wrap(~param, scales = 'free') +
  scale_x_discrete('') +
  labs(
    title = 'Calculation of confidence intervals for extra parameters',
    subtitle = 'For the bacteria TPC; using case resampling'
  )


## ----tot_time, include=FALSE--------------------------------------------------

tot_time <- lubridate::as.duration(lubridate::now() - runstart)