Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://doi.org/10.5281/zenodo.15731718
24 June 2025, 17:15:41 UTC
  • Code
  • Branches (0)
  • Releases (1)
  • Visits
    • Branches
    • Releases
      • 1
      • 1
    • 7c9d12f
    • /
    • 01_data_exploration.R
    Raw File Download

    To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
    Select below a type of object currently browsed in order to display its associated SWHID and permalink.

    • content
    • directory
    • snapshot
    • release
    origin badgecontent badge
    swh:1:cnt:32d46d3ee0391c40beb88dba1e0312511f0f5961
    origin badgedirectory badge
    swh:1:dir:7c9d12f286334bcec2d56e656b51bedec0450877
    origin badgesnapshot badge
    swh:1:snp:caa5e5c1373a152e7394b87e885b1339f8fc68ec
    origin badgerelease badge
    swh:1:rel:b8949b6da5a3c1d416881fc3e15d0848afff9f2d

    This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
    Select below a type of object currently browsed in order to generate citations for them.

    • content
    • directory
    • snapshot
    • release
    (requires biblatex-software package)
    Generating citation ...
    (requires biblatex-software package)
    Generating citation ...
    (requires biblatex-software package)
    Generating citation ...
    (requires biblatex-software package)
    Generating citation ...
    01_data_exploration.R
    # ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
    ## DATA EXPLORATION ============================================================
    
    # Description:
    #     Explore and visualise epidemiological and climatic data in Barbados.
    
    # Paper:
    #     Compound and cascading effects of climatic extremes on dengue outbreak
    #     risk in the Caribbean: an impact-based modelling framework with long-lag
    #     and short-lag interactions
    
    # Script authors:
    #     Chloe Fletcher        ORCID: 0000-0002-6705-7605
    #     Prof. Rachel Lowe     ORCID: 0000-0003-3939-7343
    
    # ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
    
    ## Source packages, functions and data -----------------------------------------
    
    # load packages and functions
    source("functions/00_packages_functions.R")
    
    # read in harmonised data
    data <- read.csv("data/barbados_data.csv")
    tmp_means <- readRDS("data/temperature_means.rds")
    
    # format dates
    data$date <- as.Date(data$date)
    date_start <- min(data$date)
    date_end <- max(data$date)
    
    # read in nino34 data
    nino <- read.csv("data/nino_anom_ref_1991-2020.csv")
    nino <- nino[, c(1, 2, 6)]
    colnames(nino) <- c("cal_year", "cal_month", "nino34")
    data <- merge(data, nino, by = c("cal_year", "cal_month"), x.all = TRUE)
    
    # re-order data
    data <- data[with(data, order(cal_year, cal_month)),]
    rownames(data) <- NULL
    
    
    ## Explore data ----------------------------------------------------------------
    
    # find peak months of confirmed cases per dengue season
    annual_peak <- data %>%
      group_by(year_index) %>%
      slice(which.max(cases)) %>%
      select(date, cal_year, cal_month, cases, month, year_index)
    
    # calculate mean, 2.5th and 97.5th percentiles of confirmed cases per month
    average_month <- data %>%
      group_by(month) %>%
      summarise(cases_mean = mean(cases, na.rm = TRUE),
                cases_p025 = quantile(cases, 0.025, na.rm = TRUE),
                cases_p975 = quantile(cases, 0.975, na.rm = TRUE)) %>%
      as.data.frame()
    
    # plot mean, 2.5th and 97.5th percentiles of confirmed cases per month
    plt_average_month_cases <- ggplot(average_month, aes(x = month)) +
      geom_ribbon(aes(ymin = cases_p025, ymax = cases_p975), fill = "deeppink2", alpha = 0.2) + 
      geom_line(aes(y = cases_mean), colour = "deeppink2") +
      ggtitle("Mean and Percentile Range of Monthly Dengue Cases in Barbados (Jun-1999 to May-2022)") +
      xlab("Month of Year") +
      scale_x_continuous(breaks=seq(1, 12, by = 1), expand = c(.03,.03),
                         labels=c("Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
                                  "Jan", "Feb", "Mar", "Apr", "May")) +
      ylab("Dengue Cases") +
      scale_y_continuous(breaks = seq(0, 225, by = 25), expand = c(.03, .03),
                         limits = c(0, 225)) +
      theme_bw(base_size = 12) +
      theme(panel.border = element_blank(), plot.title = element_text(hjust=0.5))
    plt_average_month_cases
    
    # plot boxplot of confirmed cases per month
    plt_month_cases_boxplot <- ggplot(data, aes(x = as.factor(month), y = cases)) +
      geom_boxplot(outlier.shape = NA) +
      ggtitle("Boxplot of Dengue Cases per Month in Barbados (Jun-1999 to May-2022)") +
      xlab("Month of Year") +
      scale_x_discrete(breaks = seq(1, 12, by = 1), expand = c(.03, .03),
                       labels=c("Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
                                "Jan", "Feb", "Mar", "Apr", "May")) +
      ylab("Dengue Cases") +
      scale_y_continuous(breaks = seq(0, 200, by=20), expand = c(.03, .03)) +
      coord_cartesian(ylim = c(0, 200)) +
      theme_bw(base_size = 12) +
      theme(panel.border = element_blank(), plot.title = element_text(hjust=0.5))
    plt_month_cases_boxplot
    
    
    ## Create facet plot -----------------------------------------------------------
    
    # facet plot time series for dengue, mean temp, spi6 and nino34
    long_data <- data
    long_data$tas <- long_data$tas + tmp_means[["tas"]]
    long_data <- long_data %>%
      pivot_longer(cols = c(cases, tas, spi6, nino34), 
                   names_to = "Variable", 
                   values_to = "Value")
    
    plt_facet <- ggplot(long_data, aes(x = as.Date(date), y = Value)) +
      geom_line(aes(color = Variable)) +
      scale_color_manual(values = c(cases = "black", tas = "red3", spi6 = "blue",
                                    nino34 = "purple4")) +
      facet_wrap(~Variable, scales = "free_y", ncol = 1,
                 labeller = as_labeller(c(cases = "Dengue Cases",
                                          tas = "Mean Temp (°C)",
                                          spi6 = "SPI-6",
                                          nino34 = "Niño 3.4 SSTA (°C)")),
                 strip.position = "left") +
      labs(x = "Month of Year", y = NULL) +
      scale_x_date(limit=c(date_start, date_end), expand = c(0, 0),
                   date_labels = "%b %Y", date_breaks = "6 months") +
      theme_bw(base_size = 12) +
      theme(panel.border = element_rect(linewidth = 0),
            axis.text.x = element_text(angle = 45, hjust = 1),
            legend.position = "none", strip.background = element_blank(),
            strip.placement = "outside", plot.title = element_text(hjust = 0.5))
    plt_facet
    #ggsave("outputs/figs/SF_02.png", width = 12, height = 7, dpi = 300)
    
    
    ## Plot heat maps --------------------------------------------------------------
    
    # create month and year labels
    month.lab <- c(month.abb[c(6:12)], month.abb[c(1:5)])
    year.lab <- paste(seq(min(data$cal_year), max(data$cal_year) - 1, 1),
                      stringr::str_sub(seq(min(data$cal_year) + 1, max(data$cal_year),
                                           1), -2, -1), sep="/")
    
    # heat map of dengue cases
    denv_heat <- ggplot(data = data, aes(x = month, y = year_index, fill = cases)) + 
      geom_raster() +
      ylab("Year") + 
      xlab("Month") +
      scale_fill_gradientn(name = "Dengue Cases", colours = brewer.pal(9, "PuRd")) + 
      scale_y_continuous(breaks = seq(1, max(data$year_index), 1),
                         labels = year.lab, expand = expansion(0)) +
      scale_x_continuous(breaks = c(1:12), labels = month.lab, expand = expansion(0)) +
      theme_bw()
    denv_heat
    
    # heat map of mean temperature anomaly
    tmp <- data
    tmp$tas <- tmp$tas + tmp_means["tas"]
    
    tas_heat <- ggplot(data = tmp, aes(x = month, y = year_index, fill = tas)) + 
      geom_raster() +
      ylab("Year") + 
      xlab("Month") +
      scale_fill_viridis(name = "Mean Temp. Anom.", option = "magma", direction = -1) +
      scale_y_continuous(breaks = seq(1, max(data$year_index), 1),
                         labels = year.lab, expand = expansion(0)) +
      scale_x_continuous(breaks = c(1:12), labels = month.lab, expand = expansion(0)) +
      theme_bw()
    tas_heat
    
    # heat map of spi-6
    spi6_heat <- ggplot(data = data, aes(x = month, y = year_index, fill = spi6)) + 
      geom_raster() +
      ylab("Year") + 
      xlab("Month") +
      scale_fill_gradientn(name = "SPI-6", colours = brewer.pal(11, "BrBG")) +
      scale_y_continuous(breaks = seq(1, max(data$year_index), 1),
                         labels = year.lab, expand = expansion(0)) +
      scale_x_continuous(breaks = c(1:12), labels = month.lab, expand = expansion(0)) +
      theme_bw()
    spi6_heat
    
    # heat map of nino 3.4 index
    nino_heat <- ggplot(data = data, aes(x = month, y = year_index, fill = nino34)) + 
      geom_raster() +
      ylab("Year") + 
      xlab("Month") +
      scale_fill_gradientn(name = "Niño 3.4 Index", colours = rev(brewer.pal(11, "RdBu")),
                           breaks=c(-3,0,3), limits=c(-3,3)) +
      scale_y_continuous(breaks = seq(1, max(data$year_index), 1),
                         labels = year.lab, expand = expansion(0)) +
      scale_x_continuous(breaks = c(1:12), labels = month.lab, expand = expansion(0)) +
      theme_bw()
    nino_heat
    
    # combine heat maps
    ggarrange(denv_heat, tas_heat, spi6_heat, nino_heat, 
              ncol = 2, nrow = 2, labels = c("A", "B", "C", "D"), align = "hv",
              hjust = -1, vjust = 1, font.label = list(size = 14, face = "plain"))
    
    
    ## Evaluate correlation --------------------------------------------------------
    
    # select response (cases) and covariates (excl. lags)
    data_cor <- data %>%
      select(c(4, 9, 16, 23, 30, 37, 44, 51, 58, 65, 72, 82)) %>%
      drop_na()
    
    # show correlations
    cor(data_cor)
    
    
    ## END
    

    back to top

    Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
    The source code of Software Heritage itself is available on our development forge.
    The source code files archived by Software Heritage are available under their own copyright and licenses.
    Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API