https://doi.org/10.5281/zenodo.15731718
01_data_exploration.R
# ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
## DATA EXPLORATION ============================================================
# Description:
# Explore and visualise epidemiological and climatic data in Barbados.
# Paper:
# Compound and cascading effects of climatic extremes on dengue outbreak
# risk in the Caribbean: an impact-based modelling framework with long-lag
# and short-lag interactions
# Script authors:
# Chloe Fletcher ORCID: 0000-0002-6705-7605
# Prof. Rachel Lowe ORCID: 0000-0003-3939-7343
# ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
## Source packages, functions and data -----------------------------------------
# load packages and functions
source("functions/00_packages_functions.R")
# read in harmonised data
data <- read.csv("data/barbados_data.csv")
tmp_means <- readRDS("data/temperature_means.rds")
# format dates
data$date <- as.Date(data$date)
date_start <- min(data$date)
date_end <- max(data$date)
# read in nino34 data
nino <- read.csv("data/nino_anom_ref_1991-2020.csv")
nino <- nino[, c(1, 2, 6)]
colnames(nino) <- c("cal_year", "cal_month", "nino34")
data <- merge(data, nino, by = c("cal_year", "cal_month"), x.all = TRUE)
# re-order data
data <- data[with(data, order(cal_year, cal_month)),]
rownames(data) <- NULL
## Explore data ----------------------------------------------------------------
# find peak months of confirmed cases per dengue season
annual_peak <- data %>%
group_by(year_index) %>%
slice(which.max(cases)) %>%
select(date, cal_year, cal_month, cases, month, year_index)
# calculate mean, 2.5th and 97.5th percentiles of confirmed cases per month
average_month <- data %>%
group_by(month) %>%
summarise(cases_mean = mean(cases, na.rm = TRUE),
cases_p025 = quantile(cases, 0.025, na.rm = TRUE),
cases_p975 = quantile(cases, 0.975, na.rm = TRUE)) %>%
as.data.frame()
# plot mean, 2.5th and 97.5th percentiles of confirmed cases per month
plt_average_month_cases <- ggplot(average_month, aes(x = month)) +
geom_ribbon(aes(ymin = cases_p025, ymax = cases_p975), fill = "deeppink2", alpha = 0.2) +
geom_line(aes(y = cases_mean), colour = "deeppink2") +
ggtitle("Mean and Percentile Range of Monthly Dengue Cases in Barbados (Jun-1999 to May-2022)") +
xlab("Month of Year") +
scale_x_continuous(breaks=seq(1, 12, by = 1), expand = c(.03,.03),
labels=c("Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
"Jan", "Feb", "Mar", "Apr", "May")) +
ylab("Dengue Cases") +
scale_y_continuous(breaks = seq(0, 225, by = 25), expand = c(.03, .03),
limits = c(0, 225)) +
theme_bw(base_size = 12) +
theme(panel.border = element_blank(), plot.title = element_text(hjust=0.5))
plt_average_month_cases
# plot boxplot of confirmed cases per month
plt_month_cases_boxplot <- ggplot(data, aes(x = as.factor(month), y = cases)) +
geom_boxplot(outlier.shape = NA) +
ggtitle("Boxplot of Dengue Cases per Month in Barbados (Jun-1999 to May-2022)") +
xlab("Month of Year") +
scale_x_discrete(breaks = seq(1, 12, by = 1), expand = c(.03, .03),
labels=c("Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
"Jan", "Feb", "Mar", "Apr", "May")) +
ylab("Dengue Cases") +
scale_y_continuous(breaks = seq(0, 200, by=20), expand = c(.03, .03)) +
coord_cartesian(ylim = c(0, 200)) +
theme_bw(base_size = 12) +
theme(panel.border = element_blank(), plot.title = element_text(hjust=0.5))
plt_month_cases_boxplot
## Create facet plot -----------------------------------------------------------
# facet plot time series for dengue, mean temp, spi6 and nino34
long_data <- data
long_data$tas <- long_data$tas + tmp_means[["tas"]]
long_data <- long_data %>%
pivot_longer(cols = c(cases, tas, spi6, nino34),
names_to = "Variable",
values_to = "Value")
plt_facet <- ggplot(long_data, aes(x = as.Date(date), y = Value)) +
geom_line(aes(color = Variable)) +
scale_color_manual(values = c(cases = "black", tas = "red3", spi6 = "blue",
nino34 = "purple4")) +
facet_wrap(~Variable, scales = "free_y", ncol = 1,
labeller = as_labeller(c(cases = "Dengue Cases",
tas = "Mean Temp (°C)",
spi6 = "SPI-6",
nino34 = "Niño 3.4 SSTA (°C)")),
strip.position = "left") +
labs(x = "Month of Year", y = NULL) +
scale_x_date(limit=c(date_start, date_end), expand = c(0, 0),
date_labels = "%b %Y", date_breaks = "6 months") +
theme_bw(base_size = 12) +
theme(panel.border = element_rect(linewidth = 0),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none", strip.background = element_blank(),
strip.placement = "outside", plot.title = element_text(hjust = 0.5))
plt_facet
#ggsave("outputs/figs/SF_02.png", width = 12, height = 7, dpi = 300)
## Plot heat maps --------------------------------------------------------------
# create month and year labels
month.lab <- c(month.abb[c(6:12)], month.abb[c(1:5)])
year.lab <- paste(seq(min(data$cal_year), max(data$cal_year) - 1, 1),
stringr::str_sub(seq(min(data$cal_year) + 1, max(data$cal_year),
1), -2, -1), sep="/")
# heat map of dengue cases
denv_heat <- ggplot(data = data, aes(x = month, y = year_index, fill = cases)) +
geom_raster() +
ylab("Year") +
xlab("Month") +
scale_fill_gradientn(name = "Dengue Cases", colours = brewer.pal(9, "PuRd")) +
scale_y_continuous(breaks = seq(1, max(data$year_index), 1),
labels = year.lab, expand = expansion(0)) +
scale_x_continuous(breaks = c(1:12), labels = month.lab, expand = expansion(0)) +
theme_bw()
denv_heat
# heat map of mean temperature anomaly
tmp <- data
tmp$tas <- tmp$tas + tmp_means["tas"]
tas_heat <- ggplot(data = tmp, aes(x = month, y = year_index, fill = tas)) +
geom_raster() +
ylab("Year") +
xlab("Month") +
scale_fill_viridis(name = "Mean Temp. Anom.", option = "magma", direction = -1) +
scale_y_continuous(breaks = seq(1, max(data$year_index), 1),
labels = year.lab, expand = expansion(0)) +
scale_x_continuous(breaks = c(1:12), labels = month.lab, expand = expansion(0)) +
theme_bw()
tas_heat
# heat map of spi-6
spi6_heat <- ggplot(data = data, aes(x = month, y = year_index, fill = spi6)) +
geom_raster() +
ylab("Year") +
xlab("Month") +
scale_fill_gradientn(name = "SPI-6", colours = brewer.pal(11, "BrBG")) +
scale_y_continuous(breaks = seq(1, max(data$year_index), 1),
labels = year.lab, expand = expansion(0)) +
scale_x_continuous(breaks = c(1:12), labels = month.lab, expand = expansion(0)) +
theme_bw()
spi6_heat
# heat map of nino 3.4 index
nino_heat <- ggplot(data = data, aes(x = month, y = year_index, fill = nino34)) +
geom_raster() +
ylab("Year") +
xlab("Month") +
scale_fill_gradientn(name = "Niño 3.4 Index", colours = rev(brewer.pal(11, "RdBu")),
breaks=c(-3,0,3), limits=c(-3,3)) +
scale_y_continuous(breaks = seq(1, max(data$year_index), 1),
labels = year.lab, expand = expansion(0)) +
scale_x_continuous(breaks = c(1:12), labels = month.lab, expand = expansion(0)) +
theme_bw()
nino_heat
# combine heat maps
ggarrange(denv_heat, tas_heat, spi6_heat, nino_heat,
ncol = 2, nrow = 2, labels = c("A", "B", "C", "D"), align = "hv",
hjust = -1, vjust = 1, font.label = list(size = 14, face = "plain"))
## Evaluate correlation --------------------------------------------------------
# select response (cases) and covariates (excl. lags)
data_cor <- data %>%
select(c(4, 9, 16, 23, 30, 37, 44, 51, 58, 65, 72, 82)) %>%
drop_na()
# show correlations
cor(data_cor)
## END