Content - e7e54d8baad96f8542b73538b93062895aca2dbc - fbf523f/master_singledataset_script.R

visit type:
Tip revision: b3eb4c05a42a882c13fb755c3e50dfda0f3e4ef3 authored by Matt on 09 February 2022, 19:33:21 UTC
updated readme - data availability
Tip revision: b3eb4c0
master_singledataset_script.R
#==============================================================================================================================#
#                                       MASTER SCRIPT  - single dataset fitting                                                #
#==============================================================================================================================#

rm(list = ls())

#=============================================#
#       Load data files                       #
#=============================================#

#=======================#                                                                                             
# Initiatie sub-scripts #                                                                                             
source('libraries.R')
source('plot_data_modelfit_functions.R')
source('diagnostic_parameter_functions.R')
source('predicted_prevalence_functions.R')
source('MCMC_functions_singledataset.R')
source('plot_MCMC_output_functions.R')
source('process_MCMC_output_functions.R')

#======================#
#    load data         #
#======================#

test_data_singledataset <- read.csv("~/human_tsol_FoI_modelling/data/test_data_singledataset.csv")

data <- test_data_singledataset

#data <- data_frame_Theis

names(data) <- c("age", "pos", "n","prev","lower","upper")

plot_ageprev_func(data) # plot age-prevalence data

#===============================================#
#  optimise parameters for diagnostic priors    #

# sensitivity #
sensitivity_parameters <- estimate_alpha_beta_par_diagnostic(input_alpha = 96, input_beta = 4, 
                                                             target_l = 0.93, target_u = 0.99)

alpha_se <- sensitivity_parameters[[1]]
alpha_se 
beta_se <- sensitivity_parameters[[2]]
beta_se

# specifcity #
specificity_parameters <- estimate_alpha_beta_par_diagnostic(input_alpha = 98, input_beta = 2, 
                                                             target_l = 0.96, target_u = 1)

alpha_sp <- specificity_parameters[[1]]
alpha_sp
beta_sp <- specificity_parameters[[2]]
beta_sp

#===============================================#
#  run MCMC (single dataset; simple FoI model)  #

inits1 <- c(0.1, 0.93, 0.96)   # Initial parameter values to initiate chains
inits2 <- c(0.0001, 0.99, 0.999) # e.g. Ab−EITB, rT24H (se: 0.96 (0.93-0.99), sp: 0.98 (0.96-1), Noh et al. 2014) 
sd <- 0.001 # set standard deviation of proposal distribution; aim for 0.25 acceptance
cov <- diag(sd^2, 3) # covariance
niter <- 100000 # number of iterations
burnin <- 50000 # burnin (chains to discard before convergence)

# run MCMC (chain 1)
set.seed(123) # for reproducibility
simple_out_chain1 <- MCMC_simple_model(inits1, niter, cov, fitting = "single dataset")  # initiate the MCMC

# run MCMC (chain 1)
set.seed(123)
simple_out_chain2 <- MCMC_simple_model(inits2, niter, cov, fitting = "single dataset")  # initiate the MCMC

# whats the acceptance ratio (aiming for 0.25)
sum(simple_out_chain1$Acceptances)/niter
sum(simple_out_chain2$Acceptances)/niter

# plot MCMC outputs #
chains_plot <- chains_plot_func(inits1 = inits1, chain1 = simple_out_chain1, chain2 = simple_out_chain2,
                                model = "simple")

histograms_plot <- histogram_plot_func(inits1 = inits1, burnin = burnin, niter = niter, 
                                        chain1 = simple_out_chain1, chain2 = simple_out_chain2, model = "simple")

#=============================================# 
#         process MCMC outputs                # 

chains1_output <- simple_out_chain1$MCMC_Output
chains2_output <- simple_out_chain2$MCMC_Output

# remove burnin and proceed with reducing autocorrelation (thinning by sub-sampling)
PC_simple <- Process_chains(chains1_output, chains2_output, burnin = burnin, sample = 50) # set burnin to 0 if already

# View the process chains (from the autocorrelation plots, sampling every 20th value seems appropriate)
plot_chains(PC_simple[[1]], PC_simple[[2]])

# check autocorrelation of chains for each parameter (to inform sub-sampling)
check_autocorr <- determine_autocorrelation_func1(processed_chain = PC_simple, number_datasets = 1)

check_autocorr[1] # autocorrelation significance parameter 1 (e.g. lambda)
check_autocorr[2] # autocorrelation significance parameter 2 (e.g. se)
check_autocorr[3] # autocorrelation significance parameter 1 (e.g. sp)

# plot loglikelihood
loglikchains_plot_func(chain1 = simple_out_chain1, chain2 = simple_out_chain2)

#==============================================================================#
# Obtain parameter values (median & credible) & plot posterior distributions   #

simple_model_parameters <- obtain_parameter_values_func(processed_chains = PC_simple, model = "simple", number_datasets = 1)
simple_model_parameters

plot_posterior_distrib_func(processed_chains = PC_simple, model = "simple", number_datasets = 1)

#============================================================#
# calculate Deviance Information Criterion (DIC) - model fit #

DIC_result <- calculate_DIC_func1(chain = simple_out_chain1, burnin = burnin, subsample = 50, 
                                 parameters = simple_model_parameters, number_datasets = 1)
DIC_result  # 1) D bar model1, 2) modal posterior likelihood, 3) modal posterior deviance, 4) DIC

#====================================================================================================================#
# calculate (with posterior parameter estimates) predicted (sero)prevalence and unceetainty intervals (for plotting) #

predicted_prev_output <- calculate_predicted_prevalence_function(max_age_toplot = 90, data = data, 
                                                                 pars = simple_model_parameters,
                                                                 processed_chains = PC_simple, model = "simple")

predicted_prev_output[[1]] # predicted prevalence plot (ylim 0-100% prev)
predicted_prev_output[[2]] # predicted prevalence plot (ylim 0-50% prev)
predicted_prev_output[[3]] # predicted prevalence plot (ylim 0-25% prev)

#=========================================================================================================================#
#===================================================#
#  run MCMC (single dataset; reversible FoI model)  #

inits1 <- c(0.004, 0.93, 0.96, 0.001)   # Initial parameter values to initiate chains
inits2 <- c(0.0001, 0.99, 0.999, 0.01) # e.g. Ab−EITB, rT24H (se: 0.96 (0.93-0.99), sp: 0.98 (0.96-1), Noh et al. 2014) 

sd <- 0.004 # set standard deviation of proposal distribution; aim for 0.25 acceptance
cov <- diag(sd^2, 4)# covariance
niter <- 100000 # number of iterations
burnin <- 50000 # burnin (chains to discard before convergence)

# run MCMC (chain 1)
set.seed(123) # for reproducibility
reversible_out_chain1 <- MCMC_reversible_model(inits1, niter, cov, simple_lambda_median = 0.00042,
                                               fitting = "single dataset")  # initiate the MCMC (& specify lambda median from simple model to inform the lognromal prior)

# run MCMC (chain 1)
set.seed(123)
reversible_out_chain2 <- MCMC_reversible_model(inits2, niter, cov, simple_lambda_median = 0.00042,
                                               fitting = "single dataset")   # initiate the MCMC (& specify lambda median from simple model to inform the lognromal prior)

# whats the acceptance ratio (aiming for 0.25)
sum(reversible_out_chain1$Acceptances)/niter
sum(reversible_out_chain2$Acceptances)/niter

# plot MCMC outputs #
chains_plot <- chains_plot_func(inits1 = inits1, chain1 = reversible_out_chain1, chain2 = reversible_out_chain2, 
                                model = "reversible")

histograms_plot <- histogram_plot_func(inits1 = inits1, burnin = burnin, niter = niter, 
                                        chain1 = reversible_out_chain1, chain2 = reversible_out_chain2,
                                        model = "reversible")

#=============================================# 
#         process MCMC outputs                # 

chains1_output <- reversible_out_chain1$MCMC_Output
chains2_output <- reversible_out_chain2$MCMC_Output

# remove burnin and proceed with reducing autocorrelation (thinning by sub-sampling)
PC_reversible <-Process_chains(chains1_output, chains2_output, burnin = burnin, sample = 50) # set burnin to 0 if already

# View the process chains (from the autocorrelation plots, sampling every 20th value seems appropriate)
plot_chains(PC_reversible[[1]], PC_reversible[[2]])

# check autocorrelation of chains for each parameter (to inform sub-sampling)
check_autocorr <- determine_autocorrelation_func2(processed_chain = PC_reversible, number_datasets = 1)

check_autocorr[1] # autocorrelation significance parameter 1 (e.g. lambda)
check_autocorr[2] # autocorrelation significance parameter 2 (e.g. se)
check_autocorr[3] # autocorrelation significance parameter 1 (e.g. sp)
check_autocorr[4] # autocorrelation significance parameter 1 (e.g. sp)

# plot loglikelihood
loglikchains_plot_func(chain1 = reversible_out_chain1, chain2 = reversible_out_chain2)

#==============================================================================#
# Obtain parameter values (median & credible) & plot posterior distributions   #

reversible_model_parameters <- obtain_parameter_values_func(processed_chains = PC_reversible, model = "reversible", number_datasets = 1)
reversible_model_parameters

plot_posterior_distrib_func(processed_chains = PC_reversible, model = "reversible", number_datasets = 1)

#============================================================#
# calculate Deviance Information Criterion (DIC) - model fit #

DIC_result <- calculate_DIC_func2(chain = reversible_out_chain1, burnin = burnin, subsample = 50, 
                                 parameters = reversible_model_parameters, number_datasets = 1,
                                 simple_lambda_median = 0.00042)
DIC_result  # 1) D bar model1, 2) modal posterior likelihood, 3) modal posterior deviance, 4) DIC

#====================================================================================================================#
# calculate (with posterior parameter estimates) predicted (sero)prevalence and unceetainty intervals (for plotting) #

predicted_prev_output <- calculate_predicted_prevalence_function(max_age_toplot = 90, data = data, 
                                                                 pars = reversible_model_parameters,
                                                                 processed_chains = PC_reversible, model = "reversible")

predicted_prev_output[[1]] # predicted prevalence plot (ylim 0-100% prev)
predicted_prev_output[[2]] # predicted prevalence plot (ylim 0-50% prev)
predicted_prev_output[[3]] # predicted prevalence plot (ylim 0-25% prev)
Browse the archive

https://github.com/mrc-ide/human_tsol_FoI_modelling