https://github.com/mrc-ide/human_tsol_FoI_modelling
Raw File
Tip revision: b3eb4c05a42a882c13fb755c3e50dfda0f3e4ef3 authored by Matt on 09 February 2022, 19:33:21 UTC
updated readme - data availability
Tip revision: b3eb4c0
master_multipledataset_script.R
#==============================================================================================================================#
#                                       MASTER SCRIPT  - multiple dataset fitting                                              #
#==============================================================================================================================#

rm(list = ls())

#=============================================#
#       Load data files                       #
#=============================================#

#=======================#                                                                                             
# Initiatie sub-scripts #                                                                                             
source('libraries.R')
source('plot_data_modelfit_functions.R')
source('diagnostic_parameter_functions.R')
source('predicted_prevalence_functions.R')
source('MCMC_functions_singledataset.R')
source('plot_MCMC_output_functions.R')
source('process_MCMC_output_functions.R')

#======================#
#    load data         #
#======================#

test_data_multipledataset <- read.csv("~/human_tsol_FoI_modelling/data/test_data_multipledataset.csv")

data <- test_data_multipledataset

data <- data[,c(which(colnames(data)=="ID"),which(colnames(data)!="ID"))]

names(data) <- c("dataset", "age", "pos", "n","prev","lower","upper","ref")

data$ref <- as.factor(data$ref)

plot_ageprev_func2(data) # plot age-prevalence data

#===============================================#
#  optimise parameters for diagnostic priors    #

# sensitivity #
sensitivity_parameters <- estimate_alpha_beta_par_diagnostic(input_alpha = 97, input_beta = 3, 
                                                             target_l = 0.95, target_u = 1.00)

alpha_se <- sensitivity_parameters[[1]]
alpha_se 
beta_se <- sensitivity_parameters[[2]]
beta_se

# specifcity #
specificity_parameters <- estimate_alpha_beta_par_diagnostic(input_alpha = 97, input_beta = 3, 
                                                             target_l = 0.94, target_u = 0.99)

alpha_sp <- specificity_parameters[[1]]
alpha_sp
beta_sp <- specificity_parameters[[2]]
beta_sp

#===========================================================================================================================#
#==================================================#
#  run MCMC (multiple datasets; simple FoI model)  #

inits1 <- c(0.95, 0.955, 0.001, 0.1,0.01,0.01,0.1)   # Initial parameter values to initiate chains
inits2 <- c(0.99, 0.995, 0.01, 0.01, 0.1, 0.1,0.01) # e.g. Ab−EITB, rT24H (se: 0.96 (0.93-0.99), sp: 0.98 (0.96-1), Noh et al. 2014) 
sd <- 0.0003 # set standard deviation of proposal distribution; aim for 0.25 acceptance
cov <- diag(sd^2, 2+length(unique(data$dataset))) # covariance
niter <- 100000 # number of iterations
burnin <- 50000 # burnin (chains to discard before convergence)

# run MCMC (chain 1)
set.seed(123) # for reproducibility
simple_out_chain1 <- MCMC_simple_model(inits1, niter, cov, fitting = "multiple datasets")  # initiate the MCMC

# run MCMC (chain 1)
set.seed(123)
simple_out_chain2 <- MCMC_simple_model(inits2, niter, cov, fitting = "multiple datasets")  # initiate the MCMC

# whats the acceptance ratio (aiming for 0.25)
sum(simple_out_chain1$Acceptances)/niter
sum(simple_out_chain2$Acceptances)/niter

# plot MCMC outputs #
chains_plot <- chains_plot_multidatasets_func(inits1 = inits1, chain1 = simple_out_chain1, chain2 = simple_out_chain2,
                                model = "simple", number_datasets = 5)

histograms_plot <- histogram_plot_multidatasets_func(inits1 = inits1, burnin = burnin, niter = niter, 
                                       chain1 = simple_out_chain1, chain2 = simple_out_chain2, model = "simple",
                                       number_datasets = 5)

#=============================================# 
#         process MCMC outputs                # 

chains1_output <- simple_out_chain1$MCMC_Output
chains2_output <- simple_out_chain2$MCMC_Output

# remove burnin and proceed with reducing autocorrelation (thinning by sub-sampling)
PC_simple <- Process_chains(chains1_output, chains2_output, burnin = burnin, sample = 100) # set burnin to 0 if already

# View the process chains (from the autocorrelation plots, sampling every 20th value seems appropriate)
plot_chains_multidatasets(PC_simple[[1]], PC_simple[[2]], inits = inits1, number_datasets = 5, model = "simple")

# check autocorrelation of chains for each parameter (to inform sub-sampling)
check_autocorr <- determine_autocorrelation_func1(processed_chain = PC_simple, number_datasets = 5)

check_autocorr[1] # autocorrelation significance parameter 1 
check_autocorr[2] # autocorrelation significance parameter 2 
check_autocorr[3] # autocorrelation significance parameter 3 
check_autocorr[4] # autocorrelation significance parameter 4 
check_autocorr[5] # autocorrelation significance parameter 5 
check_autocorr[6] # autocorrelation significance parameter 6 
check_autocorr[7] # autocorrelation significance parameter 7 

# plot loglikelihood
loglikchains_plot_func(chain1 = simple_out_chain1, chain2 = simple_out_chain2)

#==============================================================================#
# Obtain parameter values (median & credible) & plot posterior distributions   #

simple_model_parameters <- obtain_parameter_values_func(processed_chains = PC_simple, model = "simple", number_datasets = 5)
simple_model_parameters

plot_posterior_distrib_func(processed_chains = PC_simple, model = "simple", number_datasets = 5)

#============================================================#
# calculate Deviance Information Criterion (DIC) - model fit #

DIC_result <- calculate_DIC_func1(chain = simple_out_chain1, burnin = burnin, subsample = 50, 
                                 parameters = simple_model_parameters, number_datasets = 5)
DIC_result  # 1) D bar model1, 2) modal posterior likelihood, 3) modal posterior deviance, 4) DIC

#====================================================================================================================#
# calculate (with posterior parameter estimates) predicted (sero)prevalence and unceetainty intervals (for plotting) #

predicted_prev_output <- calculate_predicted_prevalence_multipledatasets_function1(max_age_toplot = 90, data = data, 
                                                                 pars = simple_model_parameters,
                                                                 processed_chains = PC_simple, number_datasets = 5,
                                                                 model = "simple")

predicted_prev_output[[1]] # predicted prevalence plot (ylim 0-100% prev)
predicted_prev_output[[2]] # predicted prevalence plot (ylim 0-50% prev)
predicted_prev_output[[3]] # predicted prevalence plot (ylim 0-25% prev)

#===========================================================================================================================#
#======================================================#
#  run MCMC (multiple datasets; reversible FoI model)  #

inits1 <- c(0.99, 0.95, 0.00003, 0.001,0.005,0.0001,0.0001,0.1,0.1,0.1,0.1,0.1)  # Initial parameter values to initiate chains
inits2 <- c(0.96, 0.99, 0.001, 0.04, 0.2, 0.04,0.04,0.01,0.01,0.01,0.01,0.01) # e.g. Ab−EITB, rT24H (se: 0.96 (0.93-0.99), sp: 0.98 (0.96-1), Noh et al. 2014) 
sd <- 0.0006 # set standard deviation of proposal distribution; aim for 0.25 acceptance
cov <- diag(sd^2, 2+(2*length(unique(data$dataset)))) # covariance
niter <- 1000000 # number of iterations
burnin <- 500000 # burnin (chains to discard before convergence)

# run MCMC (chain 1)
set.seed(123) # for reproducibility
reversible_out_chain1 <- MCMC_reversible_model(inits1, niter, cov, simple_lambda_median = c(0.00007756672,
                                                                                            0.003205126,
                                                                                            0.008291567,
                                                                                            0.003112397,
                                                                                            0.003481667), 
                                               fitting = "multiple datasets")  # initiate the MCMC

# run MCMC (chain 1)
set.seed(123)
reversible_out_chain2 <- MCMC_reversible_model(inits2, niter, cov, simple_lambda_median = c(0.00007756672,
                                                                                            0.003205126,
                                                                                            0.008291567,
                                                                                            0.003112397,
                                                                                            0.003481667),
                                               fitting = "multiple datasets")  # initiate the MCMC

# whats the acceptance ratio (aiming for 0.25)
sum(reversible_out_chain1$Acceptances)/niter
sum(reversible_out_chain2$Acceptances)/niter

# plot MCMC outputs #
chains_plot <- chains_plot_func(inits1 = inits1, chain1 = reversible_out_chain1, chain2 = reversible_out_chain2, 
                                model = "reversible")

histograms_plot <- histogram_plot_func(inits1 = inits1, burnin = burnin, niter = niter, 
                                       chain1 = reversible_out_chain1, chain2 = reversible_out_chain2,
                                       model = "reversible")

#=============================================# 
#         process MCMC outputs                # 

chains1_output <- reversible_out_chain1$MCMC_Output
chains2_output <- reversible_out_chain2$MCMC_Output

# remove burnin and proceed with reducing autocorrelation (thinning by sub-sampling)
PC_reversible <- Process_chains(chains1_output, chains2_output, burnin = burnin, sample = 50) # set burnin to 0 if already

# View the process chains (from the autocorrelation plots, sampling every 20th value seems appropriate)
plot_chains_multidatasets(PC_reversible[[1]], PC_reversible[[2]], inits = inits1, number_datasets = 5, model = "reversible")

# check autocorrelation of chains for each parameter (to inform sub-sampling)
check_autocorr <- determine_autocorrelation_func2(processed_chain = PC_reversible, number_datasets = 5)

check_autocorr[1] # autocorrelation significance parameter 1 
check_autocorr[2] # autocorrelation significance parameter 2 
check_autocorr[3] # autocorrelation significance parameter 3 
check_autocorr[4] # autocorrelation significance parameter 4 
check_autocorr[5] # autocorrelation significance parameter 5 
check_autocorr[6] # autocorrelation significance parameter 6 
check_autocorr[7] # autocorrelation significance parameter 7 
check_autocorr[8] # autocorrelation significance parameter 3 
check_autocorr[9] # autocorrelation significance parameter 4 
check_autocorr[10] # autocorrelation significance parameter 5 
check_autocorr[11] # autocorrelation significance parameter 6 
check_autocorr[12] # autocorrelation significance parameter 7 

# plot loglikelihood
loglikchains_plot_func(chain1 = reversible_out_chain1, chain2 = reversible_out_chain2)

#==============================================================================#
# Obtain parameter values (median & credible) & plot posterior distributions   #

reversible_model_parameters <- obtain_parameter_values_func(processed_chains = PC_reversible, model = "reversible", number_datasets = 5)
reversible_model_parameters

plot_posterior_distrib_func(processed_chains = PC_reversible, model = "reversible", number_datasets = 5)

#============================================================#
# calculate Deviance Information Criterion (DIC) - model fit #

DIC_result <- calculate_DIC_func2(chain = reversible_out_chain1, burnin = burnin, subsample = 50, 
                                 parameters = reversible_model_parameters, number_datasets = 5,
                                 simple_lambda_median = c(0.00007756672, 0.003205126, 0.008291567, 0.003112397, 0.003481667))
DIC_result  # 1) D bar model1, 2) modal posterior likelihood, 3) modal posterior deviance, 4) DIC

#====================================================================================================================#
# calculate (with posterior parameter estimates) predicted (sero)prevalence and unceetainty intervals (for plotting) #

predicted_prev_output <- calculate_predicted_prevalence_multipledatasets_function1(max_age_toplot = 90, data = data, 
                                                                                   pars = reversible_model_parameters,
                                                                                   processed_chains = PC_reversible, number_datasets = 5,
                                                                                   model = "reversible")

predicted_prev_output[[1]] # predicted prevalence plot (ylim 0-100% prev)
#predicted_prev_output[[2]] # predicted prevalence plot (ylim 0-50% prev)
#predicted_prev_output[[3]] # predicted prevalence plot (ylim 0-25% prev)


back to top