https://github.com/mrc-ide/human_tsol_FoI_modelling
Tip revision: b3eb4c05a42a882c13fb755c3e50dfda0f3e4ef3 authored by Matt on 09 February 2022, 19:33:21 UTC
updated readme - data availability
updated readme - data availability
Tip revision: b3eb4c0
predicted_prevalence_functions.R
#=============================================================================================================================#
# Predicted prevalence functions #
#=============================================================================================================================#
#===============================#
# Simple Model (single dataset) #
# for MCMC
predicted_prev_func <- function(age, par){
lambda <- par[1]; se<- par[2]; sp <- par[3]
tp <- 1 - exp(-lambda * (data$age)) # true prevalence
op <- (1-sp) + (se+sp-1)*tp # observed prevalence Diggle et al 2011 Epi Res Int
op
}
# for single & multiple datasets (once fitted w/ parameter estimates to obtain prevalence curves for each datasets)
predicted_prev_func2 <- function(age, par){
lambda <- par[1]; se<- par[2]; sp <- par[3]
tp <- 1 - exp(-lambda * (age)) # true prevalence
op <- (1-sp) + (se+sp-1)*tp # observed prevalence Diggle et al 2011 Epi Res Int
op
}
#=========================================#
# Simple Model (multiple dataset fitting) #
predicted_prev_func_multidataset <- function(age, par){
# Sp and Se are the first to parameters in the vector of parameters
sp <- par[1]
se <- par[2]
# Site-specific lamdas are the remianing parameters in the vector of parameters
lambda_par <- par[3:length(par)]
# Repeat each site specific lambda for each age group in each dataset
lambda_all <- lambda_par[data$dataset]
# Prediction
tp <- 1 - exp(-lambda_all * data$age) # true prevalence
op <- (1-sp) + (se+sp-1)*tp # observed prevalence Diggle et al 2011 Epi Res Int
op
}
predicted_prev_multidataset_func2 <- function(age, par){
sp <- par[1]; se <- par[2]; lambda <- par[3]
tp <- 1 - exp(-lambda * (age)) # true prevalence
op <- (1-sp) + (se+sp-1)*tp # observed prevalence Diggle et al 2011 Epi Res Int
op
}
#===================================#
# reversible model (single dataset) #
predicted_prev_reversible_func <- function(age, par){
lambda <- par[1]; se<- par[2]; sp <- par[3]; rho <- par[4]
tp <- (lambda/(lambda + rho)) * (1 - exp(-(lambda + rho) *(data$age))) # true prevalence
op <- (1-sp) + (se+sp-1) * tp # observed prevalence Diggle et al 2011 Epi Res Int
op
}
predicted_prev_reversible_func2 <- function(age, par){
lambda <- par[1]; se<- par[2]; sp <- par[3]; rho <- par[4]
tp <- (lambda/(lambda + rho)) * (1 - exp(-(lambda + rho) *(age))) # true prevalence
op <- (1-sp) + (se+sp-1) * tp # observed prevalence Diggle et al 2011 Epi Res Int
op
}
#=====================================#
# reversible model (multiple dataset) #
# Model
predicted_prev_reversible_func_multidataset <- function(age, par){
# Sp and Se are the first to parameters in the vector of parameters
sp <- par[1]
se <- par[2]
# Site-specific lamdas are parameters 3: number of datasets in the vector of parameters
lambda_par <- par[3:(length(unique(data$dataset))+2)]
# Repeat each site specific lambda for each age group in each dataset
lambda_all <- lambda_par[data$dataset]
# Site-specific rhos are parameters
rho_par <- par[(2+length(unique(data$dataset))+1):(7+length(unique(data$dataset)))]
# Repeat each site specific lambda for each age group in each dataset
rho_all <- rho_par[data$dataset]
# Prediction
tp <- (lambda_all/(lambda_all + rho_all)) *(1 - exp(-(lambda_all + rho_all) * (data$age))) # true prevalence
op <- (1-sp) + (se+sp-1)*tp # observed prevalence Diggle et al 2011 Epi Res Int
op
}
# for multiple datasets (once fitted w/ parameter estimates to obtain prevalence curves for each datasets)
predicted_prev_reversible_func2_multidataset <- function(age, par){
sp <- par[1]; se <- par[2]; lambda <- par[3]; rho <- par[4]
tp <- (lambda/(lambda + rho)) * (1 - exp(-(lambda + rho) *(age))) # true prevalence
op <- (1-sp) + (se+sp-1) * tp # observed prevalence Diggle et al 2011 Epi Res Int
op
}
#================================================================#
# produce predicted prevalence curves (fitted to single dataset) #
calculate_predicted_prevalence_function <- function (max_age_toplot, data, pars, processed_chains, model) {
if(model == "simple"){
# specify posterior median parameters to enable predicted prevalence calculation
lambda.median <- pars[[1]]
se.median <- pars[[2]]
sp.median <- pars[[3]]
# set up dataframe and calculate (median) predicted prevalence
age_dum <- seq(from=0, to = max_age_toplot, by=0.005) ## If not already performed this step for simple catalytic model
fitted_curve_df <- as.data.frame(age_dum) ## make sequence of numbers (mean ages) for predicted variable
names(fitted_curve_df)[names(fitted_curve_df)=="age_dum"] <- "age"
predicted_median_curve <- full_join(fitted_curve_df, data)
predicted_median_curve$predicted <- sapply(1:nrow(predicted_median_curve),
function(i) predicted_prev_func2(age = predicted_median_curve$age[i],
c(lambda.median, se.median,
sp.median)))
# create uncertainty (credible interval) of model run resulting from posterior #
subsampled_model_outputs <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_func2(seq(0, max_age_toplot, 0.005),c(processed_chains[[1]][i,1],
processed_chains[[1]][i,2],
processed_chains[[1]][i,3]))
subsampled_model_outputs[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI <- cbind(Lower_obs, Upper_obs)
predicted_CrI <- as.data.frame(predicted_CrI)
predicted_CrI$age <- fitted_curve_df$age
}
if(model == "reversible"){
# specify posterior median parameters to enable predicted prevalence calculation
lambda.median <- pars[[1]]
se.median <- pars[[2]]
sp.median <- pars[[3]]
rho.median <- pars[[4]]
# set up dataframe and calculate (median) predicted prevalence
age_dum <- seq(from=0, to = max_age_toplot, by=0.005) ## If not already performed this step for simple catalytic model
fitted_curve_df <- as.data.frame(age_dum) ## make sequence of numbers (mean ages) for predicted variable
names(fitted_curve_df)[names(fitted_curve_df)=="age_dum"] <- "age"
predicted_median_curve <- full_join(fitted_curve_df, data)
predicted_median_curve$predicted <- sapply(1:nrow(predicted_median_curve),
function(i) predicted_prev_reversible_func2(
age = predicted_median_curve$age[i],
c(lambda.median, se.median, sp.median, rho.median)))
# create uncertainty (credible interval) of model run resulting from posterior #
subsampled_model_outputs <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_reversible_func2(seq(0, max_age_toplot, 0.005),c(processed_chains[[1]][i,1],
processed_chains[[1]][i,2],
processed_chains[[1]][i,3],
processed_chains[[1]][i,4]))
subsampled_model_outputs[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI <- cbind(Lower_obs, Upper_obs)
predicted_CrI <- as.data.frame(predicted_CrI)
predicted_CrI$age <- fitted_curve_df$age
}
# plot predicted prevalence curve & uncertainty band vs data
p <- ggplot() +
theme(legend.position = 'none') +
geom_point(data=predicted_median_curve, aes(x=age, y=prev))+
geom_errorbar(data=predicted_median_curve,aes(x=age, y=prev, ymin=lower, ymax=upper), width=0.8)+
geom_line(data=predicted_median_curve,aes(x=age, y=predicted), size= 1.1, colour='purple')+
geom_ribbon(data=predicted_CrI,aes(x=age, ymin=lower_credible_interval_processed,
ymax=upper_credible_interval_processed), fill="purple", alpha=0.1)+
ylim(0,1.0)+
labs(x="Age (months) of human host", y="(Sero)prevalence (0 -1)")+
theme_bw() +
theme(strip.background=element_rect(fill=NA, color=NA),
strip.text=element_text(size=11, face= "bold"),
axis.text.x = element_text(size = 18),
axis.text.y = element_text(size = 18),
plot.title = element_text(size = 18, hjust=0.5),
axis.title.x = element_text(size = 18, face= "bold"),
axis.title.y = element_text(size = 16, angle = 90, face= "bold"),
legend.position = c(0.83, 0.15),
legend.title=element_text(size=20),
legend.text=element_text(size=16))
p2 <- ggplot() +
theme(legend.position = 'none') +
geom_point(data=predicted_median_curve, aes(x=age, y=prev))+
geom_errorbar(data=predicted_median_curve,aes(x=age, y=prev, ymin=lower, ymax=upper), width=0.8)+
geom_line(data=predicted_median_curve,aes(x=age, y=predicted), size= 1.1, colour='purple')+
geom_ribbon(data=predicted_CrI,aes(x=age, ymin=lower_credible_interval_processed,
ymax=upper_credible_interval_processed), fill="purple", alpha=0.1)+
ylim(0,0.5)+
labs(x="Age (months) of human host", y="(Sero)prevalence (0 -1)")+
theme_bw() +
theme(strip.background=element_rect(fill=NA, color=NA),
strip.text=element_text(size=11, face= "bold"),
axis.text.x = element_text(size = 18),
axis.text.y = element_text(size = 18),
plot.title = element_text(size = 18, hjust=0.5),
axis.title.x = element_text(size = 18, face= "bold"),
axis.title.y = element_text(size = 16, angle = 90, face= "bold"),
legend.position = c(0.83, 0.15),
legend.title=element_text(size=20),
legend.text=element_text(size=16))
p3 <- ggplot() +
theme(legend.position = 'none') +
geom_point(data=predicted_median_curve, aes(x=age, y=prev))+
geom_errorbar(data=predicted_median_curve,aes(x=age, y=prev, ymin=lower, ymax=upper), width=0.8)+
geom_line(data=predicted_median_curve,aes(x=age, y=predicted), size= 1.1, colour='purple')+
geom_ribbon(data=predicted_CrI,aes(x=age, ymin=lower_credible_interval_processed,
ymax=upper_credible_interval_processed), fill="purple", alpha=0.1)+
ylim(0,0.25)+
labs(x="Age (months) of human host", y="(Sero)prevalence (0 -1)")+
theme_bw() +
theme(strip.background=element_rect(fill=NA, color=NA),
strip.text=element_text(size=11, face= "bold"),
axis.text.x = element_text(size = 18),
axis.text.y = element_text(size = 18),
plot.title = element_text(size = 18, hjust=0.5),
axis.title.x = element_text(size = 18, face= "bold"),
axis.title.y = element_text(size = 16, angle = 90, face= "bold"),
legend.position = c(0.83, 0.15),
legend.title=element_text(size=20),
legend.text=element_text(size=16))
return(list(p, p2, p3, predicted_median_curve, predicted_CrI))
}
#===============================================================================#
# produce predicted prevalence curves (simple model - fitted to single dataset) #
calculate_predicted_prevalence_multipledatasets_function1 <- function (max_age_toplot, data, pars, processed_chains,
model, number_datasets) {
## simple model ##
if(model == "simple"){
if(number_datasets == 5){
# specify posterior median parameters to enable predicted prevalence calculation
sp.median <- pars[[1]]
se.median <- pars[[2]]
lambda1.median <- pars[[3]]
lambda2.median <- pars[[4]]
lambda3.median <- pars[[5]]
lambda4.median <- pars[[6]]
lambda5.median <- pars[[7]]
# set up dataframes for predicted prevalence calculations (n = 5 datasets)
n <- length(unique(data$dataset))
eval(parse(text = paste0("data", seq(1:n), " <- ", split(data, data$dataset))))
eval(parse(text = paste0("data", seq(1:n), " <- as.data.frame(data", seq(1:number_datasets), ")"))) # change seq to 1: number of individual datasets
age_dum <- seq(from=0, to=max_age_toplot, by=0.005)
fitted_curve_df <- as.data.frame(age_dum) # make sequence of numbers (mean ages) for predicted variable
names(fitted_curve_df)[names(fitted_curve_df)=="age_dum"] <- "age"
predicted_median_curve1 <- full_join(fitted_curve_df, data1)
predicted_median_curve2 <- full_join(fitted_curve_df, data2)
predicted_median_curve3 <- full_join(fitted_curve_df, data3)
predicted_median_curve4 <- full_join(fitted_curve_df, data4)
predicted_median_curve5 <- full_join(fitted_curve_df, data5)
# calculate predicted prevalence for each dataset
predicted_median_curve1$predicted <- sapply(1:nrow(predicted_median_curve1), function(i) predicted_prev_multidataset_func2(age = predicted_median_curve1$age[i],
c(sp.median, se.median, lambda1.median)))
predicted_median_curve2$predicted <- sapply(1:nrow(predicted_median_curve2), function(i) predicted_prev_multidataset_func2(age = predicted_median_curve2$age[i],
c(sp.median, se.median, lambda2.median)))
predicted_median_curve3$predicted <- sapply(1:nrow(predicted_median_curve3), function(i) predicted_prev_multidataset_func2(age = predicted_median_curve3$age[i],
c(sp.median, se.median, lambda3.median)))
predicted_median_curve4$predicted <- sapply(1:nrow(predicted_median_curve4), function(i) predicted_prev_multidataset_func2(age = predicted_median_curve4$age[i],
c(sp.median, se.median, lambda4.median)))
predicted_median_curve5$predicted <- sapply(1:nrow(predicted_median_curve5), function(i) predicted_prev_multidataset_func2(age = predicted_median_curve5$age[i],
c(sp.median, se.median, lambda5.median)))
# make master dataframe combining all datasets
predicted_median_curve1$dataset_name <- rep(as.factor("data1"))
predicted_median_curve2$dataset_name <- rep(as.factor("data2"))
predicted_median_curve3$dataset_name <- rep(as.factor("data3"))
predicted_median_curve4$dataset_name <- rep(as.factor("data4"))
predicted_median_curve5$dataset_name <- rep(as.factor("data5"))
predicted_median_curve <- rbind(predicted_median_curve1, predicted_median_curve2, predicted_median_curve3, predicted_median_curve4, predicted_median_curve5)
# chains for each parameter to sample uncertainty from
sp.chain <- c(PC_simple[[1]][,1], PC_simple[[2]][,1])
se.chain <- c(PC_simple[[1]][,2], PC_simple[[2]][,2])
lambda1.chain <- c(PC_simple[[1]][,3], PC_simple[[2]][,3])
lambda2.chain <- c(PC_simple[[1]][,4], PC_simple[[2]][,4])
lambda3.chain <- c(PC_simple[[1]][,5], PC_simple[[2]][,5])
lambda4.chain <- c(PC_simple[[1]][,6], PC_simple[[2]][,6])
lambda5.chain <- c(PC_simple[[1]][,7], PC_simple[[2]][,7])
# calculate uncertainty (credible interval) of model run resulting from posterior #
# dataset 1 #
subsampled_model_outputs_dat1 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_multidataset_func2(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i], lambda1.chain[i]))
subsampled_model_outputs_dat1[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat1, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat1, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat1 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat1 <- as.data.frame(predicted_CrI_dat1)
predicted_CrI_dat1$age <- fitted_curve_df$age
predicted_CrI_dat1$dataset_name <- rep(as.factor("data1"))
names(predicted_CrI_dat1) <- c("lower", "upper", "age", "dataset_name")
# dataset 2 #
subsampled_model_outputs_dat2 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_multidataset_func2(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i], lambda2.chain[i]))
subsampled_model_outputs_dat2[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat2, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat2, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat2 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat2 <- as.data.frame(predicted_CrI_dat2)
predicted_CrI_dat2$age <- fitted_curve_df$age
predicted_CrI_dat2$dataset_name <- rep(as.factor("data2"))
names(predicted_CrI_dat2) <- c("lower", "upper", "age", "dataset_name")
# dataset 3 #
subsampled_model_outputs_dat3 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_multidataset_func2(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i], lambda3.chain[i]))
subsampled_model_outputs_dat3[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat3, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat3, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat3 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat3 <- as.data.frame(predicted_CrI_dat3)
predicted_CrI_dat3$age <- fitted_curve_df$age
predicted_CrI_dat3$dataset_name <- rep(as.factor("data3"))
names(predicted_CrI_dat3) <- c("lower", "upper", "age", "dataset_name")
# dataset 4 #
subsampled_model_outputs_dat4 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_multidataset_func2(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i], lambda4.chain[i]))
subsampled_model_outputs_dat4[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat4, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat4, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat4 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat4 <- as.data.frame(predicted_CrI_dat4)
predicted_CrI_dat4$age <- fitted_curve_df$age
predicted_CrI_dat4$dataset_name <- rep(as.factor("data4"))
names(predicted_CrI_dat4) <- c("lower", "upper", "age", "dataset_name")
# dataset 5 #
subsampled_model_outputs_dat5 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_multidataset_func2(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i], lambda5.chain[i]))
subsampled_model_outputs_dat5[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat5, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat5, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat5 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat5 <- as.data.frame(predicted_CrI_dat5)
predicted_CrI_dat5$age <- fitted_curve_df$age
predicted_CrI_dat5$dataset_name <- rep(as.factor("data5"))
names(predicted_CrI_dat5) <- c("lower", "upper", "age", "dataset_name")
# combined uncertainty dataframes across datasets
predicted_CrI <- rbind(predicted_CrI_dat1, predicted_CrI_dat2, predicted_CrI_dat3, predicted_CrI_dat4, predicted_CrI_dat5)
}
}
## reversible model ##
if(model == "reversible"){
if(number_datasets == 5){
# specify posterior median parameters to enable predicted prevalence calculation
sp.median <- pars[[1]]
se.median <- pars[[2]]
lambda1.median <- pars[[3]]
lambda2.median <- pars[[4]]
lambda3.median <- pars[[5]]
lambda4.median <- pars[[6]]
lambda5.median <- pars[[7]]
rho1.median <- pars[[8]]
rho2.median <- pars[[9]]
rho3.median <- pars[[10]]
rho4.median <- pars[[11]]
rho5.median <- pars[[12]]
# set up dataframes for predicted prevalence calculations (n = 5 datasets)
n <- length(unique(data$dataset))
eval(parse(text = paste0("data", seq(1:n), " <- ", split(data, data$dataset))))
eval(parse(text = paste0("data", seq(1:n), " <- as.data.frame(data", seq(1:number_datasets), ")"))) # change seq to 1: number of individual datasets
age_dum <- seq(from=0, to=max_age_toplot, by=0.005)
fitted_curve_df <- as.data.frame(age_dum) # make sequence of numbers (mean ages) for predicted variable
names(fitted_curve_df)[names(fitted_curve_df)=="age_dum"] <- "age"
predicted_median_curve1 <- full_join(fitted_curve_df, data1)
predicted_median_curve2 <- full_join(fitted_curve_df, data2)
predicted_median_curve3 <- full_join(fitted_curve_df, data3)
predicted_median_curve4 <- full_join(fitted_curve_df, data4)
predicted_median_curve5 <- full_join(fitted_curve_df, data5)
# calculate predicted prevalence for each dataset
predicted_median_curve1$predicted <- sapply(1:nrow(predicted_median_curve1), function(i) predicted_prev_reversible_func2_multidataset(age = predicted_median_curve1$age[i],
c(sp.median, se.median,
lambda1.median, rho1.median)))
predicted_median_curve2$predicted <- sapply(1:nrow(predicted_median_curve2), function(i) predicted_prev_reversible_func2_multidataset(age = predicted_median_curve2$age[i],
c(sp.median, se.median,
lambda2.median, rho2.median)))
predicted_median_curve3$predicted <- sapply(1:nrow(predicted_median_curve3), function(i) predicted_prev_reversible_func2_multidataset(age = predicted_median_curve3$age[i],
c(sp.median, se.median,
lambda3.median, rho3.median)))
predicted_median_curve4$predicted <- sapply(1:nrow(predicted_median_curve4), function(i) predicted_prev_reversible_func2_multidataset(age = predicted_median_curve4$age[i],
c(sp.median, se.median,
lambda4.median, rho4.median)))
predicted_median_curve5$predicted <- sapply(1:nrow(predicted_median_curve5), function(i) predicted_prev_reversible_func2_multidataset(age = predicted_median_curve5$age[i],
c(sp.median, se.median,
lambda5.median, rho5.median)))
# make master dataframe combining all datasets
predicted_median_curve1$dataset_name <- rep(as.factor("data1"))
predicted_median_curve2$dataset_name <- rep(as.factor("data2"))
predicted_median_curve3$dataset_name <- rep(as.factor("data3"))
predicted_median_curve4$dataset_name <- rep(as.factor("data4"))
predicted_median_curve5$dataset_name <- rep(as.factor("data5"))
predicted_median_curve <- rbind(predicted_median_curve1, predicted_median_curve2, predicted_median_curve3, predicted_median_curve4, predicted_median_curve5)
# chains for each parameter to sample uncertainty from
sp.chain <- c(PC_reversible[[1]][,1], PC_reversible[[2]][,1])
se.chain <- c(PC_reversible[[1]][,2], PC_reversible[[2]][,2])
lambda1.chain <- c(PC_reversible[[1]][,3], PC_reversible[[2]][,3])
lambda2.chain <- c(PC_reversible[[1]][,4], PC_reversible[[2]][,4])
lambda3.chain <- c(PC_reversible[[1]][,5], PC_reversible[[2]][,5])
lambda4.chain <- c(PC_reversible[[1]][,6], PC_reversible[[2]][,6])
lambda5.chain <- c(PC_reversible[[1]][,7], PC_reversible[[2]][,7])
rho1.chain <- c(PC_reversible[[1]][,8], PC_reversible[[2]][,8])
rho2.chain <- c(PC_reversible[[1]][,9], PC_reversible[[2]][,9])
rho3.chain <- c(PC_reversible[[1]][,10], PC_reversible[[2]][,10])
rho4.chain <- c(PC_reversible[[1]][,11], PC_reversible[[2]][,11])
rho5.chain <- c(PC_reversible[[1]][,12], PC_reversible[[2]][,12])
# calculate uncertainty (credible interval) of model run resulting from posterior #
# dataset 1 #
subsampled_model_outputs_dat1 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_reversible_func2_multidataset(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i],
lambda1.chain[i], rho1.chain[i]))
subsampled_model_outputs_dat1[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat1, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat1, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat1 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat1 <- as.data.frame(predicted_CrI_dat1)
predicted_CrI_dat1$age <- fitted_curve_df$age
predicted_CrI_dat1$dataset_name <- rep(as.factor("data1"))
names(predicted_CrI_dat1) <- c("lower", "upper", "age", "dataset_name")
# dataset 2 #
subsampled_model_outputs_dat2 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_reversible_func2_multidataset(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i],
lambda2.chain[i], rho2.chain[i]))
subsampled_model_outputs_dat2[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat2, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat2, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat2 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat2 <- as.data.frame(predicted_CrI_dat2)
predicted_CrI_dat2$age <- fitted_curve_df$age
predicted_CrI_dat2$dataset_name <- rep(as.factor("data2"))
names(predicted_CrI_dat2) <- c("lower", "upper", "age", "dataset_name")
# dataset 3 #
subsampled_model_outputs_dat3 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_reversible_func2_multidataset(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i],
lambda3.chain[i], rho3.chain[i]))
subsampled_model_outputs_dat3[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat3, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat3, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat3 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat3 <- as.data.frame(predicted_CrI_dat3)
predicted_CrI_dat3$age <- fitted_curve_df$age
predicted_CrI_dat3$dataset_name <- rep(as.factor("data3"))
names(predicted_CrI_dat3) <- c("lower", "upper", "age", "dataset_name")
# dataset 4 #
subsampled_model_outputs_dat4 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_reversible_func2_multidataset(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i],
lambda4.chain[i], rho4.chain[i]))
subsampled_model_outputs_dat4[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat4, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat4, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat4 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat4 <- as.data.frame(predicted_CrI_dat4)
predicted_CrI_dat4$age <- fitted_curve_df$age
predicted_CrI_dat4$dataset_name <- rep(as.factor("data4"))
names(predicted_CrI_dat4) <- c("lower", "upper", "age", "dataset_name")
# dataset 5 #
subsampled_model_outputs_dat5 <- matrix(NA, nrow = length(processed_chains[[1]][,1]), ncol = length(seq(0, 90, 0.005)))
for (i in 1:length(processed_chains[[1]][,1])){
single_model_output <- predicted_prev_reversible_func2_multidataset(seq(0, max_age_toplot, 0.005),c(sp.chain[i], se.chain[i],
lambda5.chain[i], rho5.chain[i]))
subsampled_model_outputs_dat5[i, ] <- single_model_output
}
lower_credible_interval_processed <- apply(subsampled_model_outputs_dat5, MARGIN = 2, quantile, prob = 0.025)
upper_credible_interval_processed <- apply(subsampled_model_outputs_dat5, MARGIN = 2, quantile, prob = 0.975)
Lower_obs <- as.data.frame(lower_credible_interval_processed)
Upper_obs <- as.data.frame(upper_credible_interval_processed)
predicted_CrI_dat5 <- cbind(Lower_obs, Upper_obs)
predicted_CrI_dat5 <- as.data.frame(predicted_CrI_dat5)
predicted_CrI_dat5$age <- fitted_curve_df$age
predicted_CrI_dat5$dataset_name <- rep(as.factor("data5"))
names(predicted_CrI_dat5) <- c("lower", "upper", "age", "dataset_name")
# combined uncertainty dataframes across datasets
predicted_CrI <- rbind(predicted_CrI_dat1, predicted_CrI_dat2, predicted_CrI_dat3, predicted_CrI_dat4, predicted_CrI_dat5)
}
}
# plot predicted prevalence curve & uncertainty band vs data
p <- ggplot() +
theme(legend.position = 'none') +
geom_point(data=predicted_median_curve, aes(x=age, y=prev))+
geom_errorbar(data=predicted_median_curve,aes(x=age, y=prev, ymin=lower, ymax=upper), width=0.8)+
geom_line(data=predicted_median_curve,aes(x=age, y=predicted), size= 1.1, colour='purple')+
geom_ribbon(data=predicted_CrI,aes(x=age, ymin=lower, ymax=upper), fill="purple", alpha=0.1)+
ylim(0,1.0)+
labs(x="Age (months) of human host", y="(Sero)prevalence (0 -1)")+
facet_wrap(~dataset_name, scales = "free")+
theme_bw() +
theme(strip.background=element_rect(fill=NA, color=NA),
strip.text=element_text(size=11, face= "bold"),
axis.text.x = element_text(size = 18),
axis.text.y = element_text(size = 18),
plot.title = element_text(size = 18, hjust=0.5),
axis.title.x = element_text(size = 18, face= "bold"),
axis.title.y = element_text(size = 16, angle = 90, face= "bold"),
legend.position = c(0.83, 0.15),
legend.title=element_text(size=20),
legend.text=element_text(size=16))
p2 <- ggplot() +
theme(legend.position = 'none') +
geom_point(data=predicted_median_curve, aes(x=age, y=prev))+
geom_errorbar(data=predicted_median_curve,aes(x=age, y=prev, ymin=lower, ymax=upper), width=0.8)+
geom_line(data=predicted_median_curve,aes(x=age, y=predicted), size= 1.1, colour='purple')+
geom_ribbon(data=predicted_CrI,aes(x=age, ymin=lower, ymax=upper), fill="purple", alpha=0.1)+
ylim(0,0.5)+
labs(x="Age (months) of human host", y="(Sero)prevalence (0 -1)")+
facet_wrap(~dataset_name, scales = "free")+
theme_bw() +
theme(strip.background=element_rect(fill=NA, color=NA),
strip.text=element_text(size=11, face= "bold"),
axis.text.x = element_text(size = 18),
axis.text.y = element_text(size = 18),
plot.title = element_text(size = 18, hjust=0.5),
axis.title.x = element_text(size = 18, face= "bold"),
axis.title.y = element_text(size = 16, angle = 90, face= "bold"),
legend.position = c(0.83, 0.15),
legend.title=element_text(size=20),
legend.text=element_text(size=16))
p3 <- ggplot() +
theme(legend.position = 'none') +
geom_point(data=predicted_median_curve, aes(x=age, y=prev))+
geom_errorbar(data=predicted_median_curve,aes(x=age, y=prev, ymin=lower, ymax=upper), width=0.8)+
geom_line(data=predicted_median_curve,aes(x=age, y=predicted), size= 1.1, colour='purple')+
geom_ribbon(data=predicted_CrI,aes(x=age, ymin=lower, ymax=upper), fill="purple", alpha=0.1)+
ylim(0,0.25)+
labs(x="Age (months) of human host", y="(Sero)prevalence (0 -1)")+
facet_wrap(~dataset_name, scales = "free")+
theme_bw() +
theme(strip.background=element_rect(fill=NA, color=NA),
strip.text=element_text(size=11, face= "bold"),
axis.text.x = element_text(size = 18),
axis.text.y = element_text(size = 18),
plot.title = element_text(size = 18, hjust=0.5),
axis.title.x = element_text(size = 18, face= "bold"),
axis.title.y = element_text(size = 16, angle = 90, face= "bold"),
legend.position = c(0.83, 0.15),
legend.title=element_text(size=20),
legend.text=element_text(size=16))
return(list(p, p2, p3, predicted_median_curve, predicted_CrI))
}