## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", #tidy.opts=list(width.cutoff=60), #tidy=TRUE, fig.align = 'center', warning = FALSE ) ## ----runtimer, include=FALSE-------------------------------------------------- runstart <- lubridate::now() ## ----setup, message=FALSE----------------------------------------------------- # load packages library(boot) library(car) library(rTPC) library(nls.multstart) library(broom) library(tidyverse) library(patchwork) library(minpack.lm) ## ----load_data, fig.height=4, fig.width=6------------------------------------- # load in data data("bacteria_tpc") # keep just a single curve d <- filter(bacteria_tpc, phage == 'nophage') # show the data ggplot(d, aes(temp, rate)) + geom_point(size = 2, alpha = 0.5) + theme_bw(base_size = 12) + labs( x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures' ) ## ----fit_and_plot, fig.height=4, fig.width=6---------------------------------- # fit Sharpe-Schoolfield model d_fit <- nest(d, data = c(temp, rate)) %>% mutate( sharpeschoolhigh = map( data, ~ nls_multstart( rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 15), data = .x, iter = c(3, 3, 3, 3), start_lower = get_start_vals( .x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981' ) - 10, start_upper = get_start_vals( .x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981' ) + 10, lower = get_lower_lims( .x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981' ), upper = get_upper_lims( .x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981' ), supp_errors = 'Y', convergence_count = FALSE ) ), # create new temperature data new_data = map( data, ~ tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100)) ), # predict over that data, preds = map2(sharpeschoolhigh, new_data, ~ augment(.x, newdata = .y)) ) # unnest predictions d_preds <- select(d_fit, preds) %>% unnest(preds) # plot data and predictions ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) + theme_bw(base_size = 12) + labs( x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures' ) ## ----bootstrap_models1-------------------------------------------------------- # refit model using nlsLM fit_nlsLM <- minpack.lm::nlsLM( rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 15), data = d, start = coef(d_fit$sharpeschoolhigh[[1]]), lower = get_lower_lims(d$temp, d$rate, model_name = 'sharpeschoolhigh_1981'), upper = get_upper_lims(d$temp, d$rate, model_name = 'sharpeschoolhigh_1981'), weights = rep(1, times = nrow(d)) ) # bootstrap using case resampling boot1 <- Boot(fit_nlsLM, method = 'case') # look at the data head(boot1$t) ## ----hist_boot, fig.height=7, fig.width=8------------------------------------- hist(boot1, layout = c(2, 2)) ## ----plot_boots, fig.height=3, fig.width=8, message=FALSE--------------------- # create predictions of each bootstrapped model boot1_preds <- boot1$t %>% as.data.frame() %>% drop_na() %>% mutate(iter = 1:n()) %>% group_by_all() %>% do(data.frame(temp = seq(min(d$temp), max(d$temp), length.out = 100))) %>% ungroup() %>% mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15)) # calculate bootstrapped confidence intervals boot1_conf_preds <- group_by(boot1_preds, temp) %>% summarise( conf_lower = quantile(pred, 0.025), conf_upper = quantile(pred, 0.975) ) %>% ungroup() # plot bootstrapped CIs p1 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_ribbon( aes(temp, ymin = conf_lower, ymax = conf_upper), boot1_conf_preds, fill = 'blue', alpha = 0.3 ) + geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) + theme_bw(base_size = 12) + labs( x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures' ) # plot bootstrapped predictions p2 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_line( aes(temp, pred, group = iter), boot1_preds, col = 'blue', alpha = 0.007 ) + geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) + theme_bw(base_size = 12) + labs( x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures' ) p1 + p2 ## ----bootstrap_case2, message=FALSE------------------------------------------- # load in chlorella data data('chlorella_tpc') d2 <- filter(chlorella_tpc, curve_id == 1) # fit Sharpe-Schoolfield model to raw data d_fit <- nest(d2, data = c(temp, rate)) %>% mutate( sharpeschoolhigh = map( data, ~ nls_multstart( rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 15), data = .x, iter = c(3, 3, 3, 3), start_lower = get_start_vals( .x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981' ) - 10, start_upper = get_start_vals( .x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981' ) + 10, lower = get_lower_lims( .x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981' ), upper = get_upper_lims( .x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981' ), supp_errors = 'Y', convergence_count = FALSE ) ), # create new temperature data new_data = map( data, ~ tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100)) ), # predict over that data, preds = map2(sharpeschoolhigh, new_data, ~ augment(.x, newdata = .y)) ) # refit model using nlsLM fit_nlsLM2 <- nlsLM( rate ~ sharpeschoolhigh_1981(temp = temp, r_tref, e, eh, th, tref = 15), data = d2, start = coef(d_fit$sharpeschoolhigh[[1]]), lower = get_lower_lims( d2$temp, d2$rate, model_name = 'sharpeschoolhigh_1981' ), upper = get_upper_lims( d2$temp, d2$rate, model_name = 'sharpeschoolhigh_1981' ), control = nls.lm.control(maxiter = 500), weights = rep(1, times = nrow(d2)) ) # bootstrap using case resampling boot2 <- Boot(fit_nlsLM2, method = 'case') ## ----bootstrap_case2_plot, fig.height=3,fig.width=8--------------------------- # unnest predictions of original model fit d_preds <- select(d_fit, preds) %>% unnest(preds) # predict over new data boot2_preds <- boot2$t %>% as.data.frame() %>% drop_na() %>% mutate(iter = 1:n()) %>% group_by_all() %>% do(data.frame(temp = seq(min(d2$temp), max(d2$temp), length.out = 100))) %>% ungroup() %>% mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15)) # calculate bootstrapped confidence intervals boot2_conf_preds <- group_by(boot2_preds, temp) %>% summarise( conf_lower = quantile(pred, 0.025), conf_upper = quantile(pred, 0.975) ) %>% ungroup() # plot bootstrapped CIs p1 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_ribbon( aes(temp, ymin = conf_lower, ymax = conf_upper), boot2_conf_preds, fill = 'blue', alpha = 0.3 ) + geom_point(aes(temp, rate), d2, size = 2) + theme_bw(base_size = 12) + labs( x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures' ) # plot bootstrapped predictions p2 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_line( aes(temp, pred, group = iter), boot2_preds, col = 'blue', alpha = 0.007 ) + geom_point(aes(temp, rate), d2, size = 2) + theme_bw(base_size = 12) + labs( x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures' ) p1 + p2 ## ----residual_resample_data, fig.height=3,fig.width=8------------------------- # bootstrap using residual resampling boot3 <- Boot(fit_nlsLM2, method = 'residual') # predict over new data boot3_preds <- boot3$t %>% as.data.frame() %>% drop_na() %>% mutate(iter = 1:n()) %>% group_by_all() %>% do(data.frame(temp = seq(min(d2$temp), max(d2$temp), length.out = 100))) %>% ungroup() %>% mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15)) # calculate bootstrapped confidence intervals boot3_conf_preds <- group_by(boot3_preds, temp) %>% summarise( conf_lower = quantile(pred, 0.025), conf_upper = quantile(pred, 0.975) ) %>% ungroup() # plot bootstrapped CIs p1 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_ribbon( aes(temp, ymin = conf_lower, ymax = conf_upper), boot3_conf_preds, fill = 'blue', alpha = 0.3 ) + geom_point(aes(temp, rate), d2, size = 2) + theme_bw(base_size = 12) + labs( x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures' ) # plot bootstrapped predictions p2 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_line( aes(temp, pred, group = iter), boot3_preds, col = 'blue', alpha = 0.007 ) + geom_point(aes(temp, rate), d2, size = 2) + theme_bw(base_size = 12) + labs( x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures' ) p1 + p2 ## ----confint_bact, error=TRUE, fig.height = 5, fig.width = 7------------------ try({ # First for the bacteria # get parameters of fitted model param_bact <- broom::tidy(fit_nlsLM) %>% select(param = term, estimate) # calculate confidence intervals of models ci_bact1 <- nlstools::confint2(fit_nlsLM, method = 'asymptotic') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'asymptotic') ci_bact2 <- confint(fit_nlsLM) %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'profile') # CIs from case resampling ci_bact3 <- confint(boot1, method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'case bootstrap') # CIs from residual resampling ci_bact4 <- Boot(fit_nlsLM, method = 'residual') %>% confint(., method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'residual bootstrap') ci_bact <- bind_rows(ci_bact1, ci_bact2, ci_bact3, ci_bact4) %>% left_join(., param_bact) # plot ggplot( ci_bact, aes( forcats::fct_relevel(method, c('profile', 'asymptotic')), estimate, col = method ) ) + geom_hline( aes(yintercept = conf_lower), linetype = 2, filter(ci_bact, method == 'profile') ) + geom_hline( aes(yintercept = conf_upper), linetype = 2, filter(ci_bact, method == 'profile') ) + geom_point(size = 4) + geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) + theme_bw() + facet_wrap(~param, scales = 'free') + scale_x_discrete('', labels = function(x) stringr::str_wrap(x, width = 10)) + labs( title = 'Calculation of confidence intervals for model parameters', subtitle = 'For the bacteria TPC; dashed lines are CI of profiling method' ) }) ## ----confint_chlor, error=TRUE, fig.height = 5, fig.width = 7, warning=FALSE---- try({ # Second for Chlorella data # get parameters of fitted model param_chlor <- broom::tidy(fit_nlsLM2) %>% select(param = term, estimate) # calculate confidence intervals of models ci_chlor1 <- nlstools::confint2(fit_nlsLM2, method = 'asymptotic') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'asymptotic') ci_chlor2 <- nlstools::confint2(fit_nlsLM2, method = 'profile') # profiling method fails ci_chlor2 <- mutate( ci_chlor1, method = 'profile', conf_lower = NA, conf_upper = NA ) # CIs from case resampling ci_chlor3 <- confint(boot2, method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'case bootstrap') # CIs from residual resampling ci_chlor4 <- confint(boot3, method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'residual bootstrap') ci_chlor <- bind_rows(ci_chlor1, ci_chlor2, ci_chlor3, ci_chlor4) %>% full_join(., param_chlor) ggplot( ci_chlor, aes( forcats::fct_relevel(method, c('profile', 'asymptotic')), estimate, col = method ) ) + geom_point(size = 4) + geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) + theme_bw() + facet_wrap(~param, scales = 'free') + scale_x_discrete('', labels = function(x) stringr::str_wrap(x, width = 10)) + labs( title = 'Calculation of confidence intervals for model parameters', subtitle = 'For the chlorella TPC; profile method failes' ) }) ## ----ci_calc_param, fig.width = 7, fig.height = 6----------------------------- extra_params <- calc_params(fit_nlsLM) %>% pivot_longer(everything(), names_to = 'param', values_to = 'estimate') ci_extra_params <- Boot( fit_nlsLM, f = function(x) { unlist(calc_params(x)) }, labels = names(calc_params(fit_nlsLM)), R = 200, method = 'case' ) %>% confint(., method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'case bootstrap') ci_extra_params <- left_join(ci_extra_params, extra_params) ggplot(ci_extra_params, aes(param, estimate)) + geom_point(size = 4) + geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) + theme_bw() + facet_wrap(~param, scales = 'free') + scale_x_discrete('') + labs( title = 'Calculation of confidence intervals for extra parameters', subtitle = 'For the bacteria TPC; using case resampling' ) ## ----tot_time, include=FALSE-------------------------------------------------- tot_time <- lubridate::as.duration(lubridate::now() - runstart)