https://doi.org/10.5281/zenodo.15731718
03_model_selection.R
# ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
## MODEL SELECTION =============================================================
# Description:
# Select best temperature anomaly, long lag and short lag SPI interaction
# model by comparing goodness of fit (GOF) metrics (DIC, WAIC, MAE, RsqLR),
# fitted values, fixed effects and random effects.
# Paper:
# Compound and cascading effects of climatic extremes on dengue outbreak
# risk in the Caribbean: an impact-based modelling framework with long-lag
# and short-lag interactions
# Script authors:
# Chloe Fletcher ORCID: 0000-0002-6705-7605
# Prof. Rachel Lowe ORCID: 0000-0003-3939-7343
# ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
## Source packages, functions and data -----------------------------------------
# load packages and functions
source("functions/00_packages_functions.R")
# read in harmonised data
data <- read.csv("data/barbados_data.csv")
# load output of best model combination model
spi6.mod.out <- readRDS("outputs/spi6_mods.rds")
## Visualise goodness of fit matrices ------------------------------------------
# monthly mean temperature gof matrix
gof_tas <- gof_matrix(spi6.mod.out, temp_var="tas")
multi.heatmap.gof(gof_tas,
"Goodness of Fit for Mean Temperature-SPI6 Models")
# 3-monthly mean temperature gof matrix
gof_tas.3mon <- gof_matrix(spi6.mod.out, temp_var="tas.3mon")
multi.heatmap.gof(gof_tas.3mon,
"Goodness of Fit for 3-Month Mean Temperature and SPI-6 Models") +
theme(plot.background = element_rect(fill = "white"))
#ggsave("outputs/figs/F_01.png", dpi=600, height=12, width=15, bg="white")
# monthly max temperature gof matrix
gof_tasmax <- gof_matrix(spi6.mod.out, temp_var="tasmax")
multi.heatmap.gof(gof_tasmax,
"Goodness of Fit for Maximum Temperature-SPI6 Models")
# 3-monthly max temperature gof matrix
gof_tasmax.3mon <- gof_matrix(spi6.mod.out, temp_var="tasmax.3mon")
multi.heatmap.gof(gof_tasmax.3mon,
"Goodness of Fit for 3-Month Maximum Temperature-SPI6 Models")
# monthly min temperature gof matrix
gof_tasmin <- gof_matrix(spi6.mod.out, temp_var="tasmin")
multi.heatmap.gof(gof_tasmin,
"Goodness of Fit for Minimum Temperature-SPI6 Models")
# 3-monthly min temperature gof matrix
gof_tasmin.3mon <- gof_matrix(spi6.mod.out, temp_var="tasmin.3mon")
multi.heatmap.gof(gof_tasmin.3mon,
"Goodness of Fit for 3-Month Minimum Temperature-SPI6 Models")
## Extract best models by GOF --------------------------------------------------
# number of best models to select from each gof metric
n <- 20
# find best n models by gof metric
best_dic <- spi6.mod.out$mod.gof[with(spi6.mod.out$mod.gof,
rev(order(dic_vs_base))),"vars"][1:n]
best_waic <- spi6.mod.out$mod.gof[with(spi6.mod.out$mod.gof,
rev(order(waic_vs_base))),"vars"][1:n]
best_mae <- spi6.mod.out$mod.gof[with(spi6.mod.out$mod.gof,
rev(order(mae_vs_base))),"vars"][1:n]
best_rsq <- spi6.mod.out$mod.gof[with(spi6.mod.out$mod.gof,
rev(order(rsq))),"vars"][1:n]
# identify unique best models
best_mods <- Reduce(union, list(best_dic, best_waic, best_mae, best_rsq))
bm_id <- rownames(spi6.mod.out$mod.gof[spi6.mod.out$mod.gof$vars %in% best_mods, ])
bm_id <- as.numeric(bm_id)
# identify best models across all metrics
best_all <- Reduce(intersect, list(best_dic, best_waic, best_mae, best_rsq))
ba_id <- rownames(spi6.mod.out$mod.gof[spi6.mod.out$mod.gof$vars %in% best_all, ])
ba_id <- as.numeric(ba_id)
## Explore best models ---------------------------------------------------------
# evaluate goodness of fit for best models
best_mods_gof <- spi6.mod.out$mod.gof[spi6.mod.out$mod.gof$vars %in% best_mods, ]
# plot fitted values for each best model
for (bm in bm_id){
print(plot.fit(spi6.mod.out$fitted[[bm]], title=spi6.mod.out$mod.gof$vars[bm]))
}
# append baseline model
base.mod <- readRDS("outputs/base_mod.rds")
idx <- nrow(spi6.mod.out$mod.gof) + 1
spi6.mod.out$mod.gof[idx,"vars"] <- "base"
spi6.mod.out$fitted[[idx]] <- base.mod$summary.fitted.values
spi6.mod.out$fixed[[idx]] <- base.mod$summary.fixed
spi6.mod.out$random[[idx]] <- base.mod$summary.random
# plot random effects of each model incl. baseline
# (un-comment each selection specification one by one)
# selection <- c(bm_id[grepl("spi6.4", best_mods_gof$vars)],idx) # spi6.4 models
selection <- c(bm_id[grepl("spi6.5", best_mods_gof$vars)],idx) # spi6.5 models
# selection <- c(bm_id[grepl("spi6.6", best_mods_gof$vars)],idx) # spi6.6 models
plot.temp.res(mods=spi6.mod.out$random[selection],
titles=spi6.mod.out$mod.gof$vars[selection]) # seasonal effect
plot.year.res(mods=spi6.mod.out$random[selection],
titles=spi6.mod.out$mod.gof$vars[selection]) # interannual effect
red_seas <- c(23, 29, 58, 59, 65, 94, 95, 101, 167, 173, 202, 203, 209, 310,
311, 317, idx) # reduced seasonal
red_year <- c(bm_id[grepl("spi6", best_mods_gof$vars)], idx) # reduced interannual
red <- intersect(red_seas, red_year) # reduced seasonal and interannual
# print fixed effects of models with reduced temporal effects
for (r in red){
print(r)
print(spi6.mod.out$fixed[[r]])
}
# print fixed effects of models with best gof metrics
for (b in ba_id){
print(b)
print(spi6.mod.out$fixed[[b]])
}
## Select best models ----------------------------------------------------------
# list of candidates
candidate_mods <- c(23, 29, 30, 58, 59, 138, 173)
# name of candidates
spi6.mod.out$mod.gof$vars[candidate_mods]
## END