Revision 794180ddbb52c291f8585f8b8c462389d3d35daa authored by kgaythorpe on 30 January 2020, 17:22:28 UTC, committed by GitHub on 30 January 2020, 17:22:28 UTC
Rule out tibet
burden_compare.Rmd
---
title: "Burden Comparison between model variants"
author: "Katy Gaythorpe"
date: "`r format(Sys.time(), '%d %B, %Y')`"
header-includes:
- \usepackage{placeins}
output:
pdf_document:
df_print: "kable"
---
```{r knitsetup, include=FALSE}
knitr::opts_chunk$set(
dev = "png",
fig.path = "images_bestglmA/",
dpi = 300,
warning = FALSE,
message = FALSE
)
```
```{r setup}
library(mcmcplots)
library(ggmcmc)
library(gridExtra)
library(maptools)
library(readr)
library(fields)
library(rgdal)
library(R.utils)
library(magrittr)
library(pROC)
library(dplyr)
# homemade
library(snapalette)
library(YFestimation)
library(KsetupR)
R.utils::sourceDirectory("Functions", modifiedOnly=FALSE)
snapal = "Pop"
```
```{r read in foi}
foi_out = NULL
for(i in 1:20){
tmp <- read.csv(paste0("FOI_samples_bestglm_A_", i, ".csv"), stringsAsFactors = FALSE)
tmp %<>% mutate(variant = i, sample = row.names(tmp))
foi_out %<>% bind_rows(tmp)
}
# get in useful format
foi_samples = foi_out %>% gather(adm1, foi, -c(sample, variant))
foi_samples %<>% mutate(adm0 = substr(adm1, 1,3))
foi_samples %<>% mutate(variant = as.factor(variant))
```
```{r load_data}
# ---------------------------------------------------------------------------
### POPULATION DATA ###
all_res_pop_3d = get_pop_data_3d()
pop1 = all_res_pop_3d$pop1
P_tot = all_res_pop_3d$P_tot
p_prop = all_res_pop_3d$p_prop
pop1 %<>% rename(adm0_adm1 = adm1)
pop1 %<>% arrange(year) %>% dplyr::select(-c(country_code, country))
names(pop1)[3:ncol(pop1)] = paste0("a", 0:100)
pop1 %<>% filter(year<2051)
p_prop %<>% filter(year<2051)
P_tot %<>% filter(year<2051)
# ---------------------------------------------------------------------------
# VACCINATION DATA #
vc2d = readRDS("vacc_1940_1950.RDS")
vc2d = as.data.frame(vc2d)
# expand vc2d to match pop1
vc2d %<>% mutate(year = as.numeric(as.character(year)))
vc_tmp = pop1 %>% filter(!adm0_adm1 %in% vc2d$adm0_adm1)
vc_tmp[, grep("adm0_adm1|year", names(vc_tmp), invert = TRUE)] = 0
vc2d %<>% bind_rows(vc_tmp)
vc2d %<>% unique()
vc2d %<>% filter(!is.na(adm0_adm1))
```
```{r load shapefiles}
if(!exists("shp0")){
shp0 = rgdal::readOGR("../../shapefiles/Africa_America_0_smooth.shp",
stringsAsFactors = FALSE,
encoding = "UTF-8",
use_iconv = TRUE)
}
if(!exists("shp1")){
shp1 = rgdal::readOGR("../../shapefiles/Africa_America_1_smooth.shp",
stringsAsFactors = FALSE,
encoding = "UTF-8",
use_iconv = TRUE)
}
```
```{r load_foi}
ve = read.csv("vac_eff_samples.csv", stringsAsFactors = FALSE)$x
# sort pop and vc to included adm1
pop1 %<>% filter(adm0_adm1 %in% names(foi_out))
vc2d %<>% filter(adm0_adm1 %in% names(foi_out))
#arrange
pop1 %<>% arrange(adm0_adm1)
vc2d %<>% arrange(adm0_adm1)
```
```{r calculate burden}
# ---------------------------------------------------------------------------
fun_calcBurden = function(fois, year , adm1 = NA, vc2d, pop1, ve) { # Tini's burden function
if(anyNA(adm1)){
adm1 = names(fois)
}
foi.mat = matrix(fois, nrow=length(fois), ncol=101, byrow=FALSE)
class(foi.mat) = "numeric"
a.mat = matrix(0:100, nrow=length(fois), ncol=101, byrow=TRUE)
burden = fois * rowSums(exp(-foi.mat*a.mat) *
(1-ve*vc2d[vc2d$year==year &
vc2d$adm0_adm1 %in% adm1,-(1:3)]) *
pop1[pop1$year==year &
pop1$adm0_adm1 %in% adm1, -(1:2)], na.rm=TRUE)
return(burden)
}
# ---------------------------------------------------------------------------
year_out = 2018
burden_df = NULL
for(v in 1:20){
foi = foi_out %>%
filter(variant == v) %>%
select(-c(variant, sample))
foi = foi[unique(vc2d$adm0_adm1)] # order it in the same way
infections_out <- sapply(1:nrow(foi), FUN = function(i) fun_calcBurden(foi[i,],
year_out,
adm1 = names(foi),
vc2d,
pop1,
ve[i]))
infections_df <- as.data.frame(infections_out)
infections_df %<>% mutate(adm0_adm1 = rownames(infections_out),
year = year_out,
variant = v)
infections_df %<>% gather(sample, infections, -c(adm0_adm1, year, variant))
infections_df %<>% mutate(adm0 = substr(adm0_adm1, 1, 3)) %>%
mutate(infections = as.numeric(infections)) %>%
mutate(prob_severe = rbeta(nrow(infections_df),
6.367309,44.60736),
prob_severe_death = rbeta(nrow(infections_df),
16.43466, 18.49048))
infections_df %<>% mutate(severe_infections = prob_severe*infections,
deaths = prob_severe_death*severe_infections)
pop_df = data.frame(adm0_adm1 = pop1$adm0_adm1,
year = pop1$year,
pop = rowSums(pop1[, -c(1:2)], na.rm = TRUE))
infections_df %<>% left_join(pop_df)
infections_df %<>% mutate(severe_inci = severe_infections/pop,
deaths_inci = deaths/pop)
burden_df %<>% bind_rows(infections_df)
}
```
```{r table_deaths_all_variants}
df <- burden_df %>%
group_by(adm0, sample, variant) %>%
summarise(deaths = sum(deaths, na.rm = TRUE))
df %>% group_by(adm0) %>%
summarise(deaths_low = round(quantile(as.numeric(deaths), probs = 0.025, na.rm = T)),
deaths_median = round(quantile(as.numeric(deaths), probs = 0.5, na.rm = T)),
deaths_high = round(quantile(as.numeric(deaths), probs = 0.975, na.rm = T)) ) %>%
arrange(-deaths_median)
```
```{r plot_deaths_all_variants, fig.height=8, fig.width=16, fig.cap="Deaths in 2018 by country for all model variants. Mid line denotes median whereas the box denotes the 95% Credible interval."}
df %>% group_by(adm0) %>%
summarise(deaths_low = round(quantile(as.numeric(deaths), probs = 0.025, na.rm = T)),
deaths_median = round(quantile(as.numeric(deaths), probs = 0.5, na.rm = T)),
deaths_high = round(quantile(as.numeric(deaths), probs = 0.975, na.rm = T)) ) %>%
arrange(-deaths_median) %>%
ggplot(aes(x = reorder(adm0, -deaths_median),
ymin = deaths_low,
ymax = deaths_high,
y = deaths_median,
fill = as.factor(deaths_median))) +
geom_crossbar() +
theme_bw() +
theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Country")+ ylab("Deaths in 2018")+
scale_fill_manual(values = snapalette(snapal, length(unique(df$adm0)), "continuous")) +
scale_y_log10()
```
```{r plot_deaths_all_variants_violin, fig.height=8, fig.width=16, fig.cap="Deaths in 2018 by country for all model variants. "}
df %>%
ggplot(aes(x = reorder(adm0, -deaths),
y = deaths)) +
geom_violin(fill = snapalette(snapal, 5, "discrete")[5]) +
theme_bw() +
theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Country")+ ylab("Deaths in 2018")+
scale_y_log10()
```
```{r plot_deaths_by_variants, fig.height=16, fig.width=20, fig.cap="Deaths in 2018 by country by model variant. Mid line denotes median whereas the box denotes the 95% Credible interval."}
df %>%
ggplot(aes(x = deaths,
fill = as.factor(variant))) +
geom_density(alpha = 0.2) +
theme_bw() +
theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Deaths in 2018")+
scale_fill_manual(values = snapalette(snapal, 20, "continuous")) +
facet_wrap(adm0~., scale = "free")
```
# Compare burden between all variants in South America with PAHO data
```{r load_paho}
paho_dat = as.data.frame( data.table::fread("Z:/Data/LatinAmerica/Historic_data.csv", stringsAsFactors = FALSE))
paho_dat %<>% spread(`Measure Names`, `Measure Values`)
# filter to observation period
paho_dat %<>% filter(Year>1983)
```
```{r calculate burden 2}
infections_year = NULL
for(v in 1:20){
fois = foi_out %>%
filter(variant == v) %>%
select(-c(variant, sample))
inf_out = NULL
for(s in 1:10){ #nrow(fois)){
# take quantile
foi = fois[s,]
infections_df <- bind_rows(lapply(unique(paho_dat$Year), FUN = function(y) {
infections_out = fun_calcBurden(foi, y, adm1 = names(foi), vc2d, pop1, ve)
inf_df = data.frame(infections = as.numeric(infections_out),
adm0_adm1 = names(infections_out),
year = y,
adm0 = substr(names(infections_out), 1, 3))
inf_df %<>% mutate(prob_severe = rbeta(nrow(inf_df),
6.367309,44.60736),
prob_severe_death = rbeta(nrow(inf_df),
16.43466, 18.49048))
inf_df %<>% mutate(severe_infections = prob_severe*infections,
deaths = prob_severe_death*severe_infections)
}))
pop_df = data.frame(adm0_adm1 = pop1$adm0_adm1,
year = pop1$year,
pop = rowSums(pop1[, -c(1:2)], na.rm = TRUE))
infections_df %<>% left_join(pop_df)
infections_df %<>% mutate(severe_inci = severe_infections/pop,
deaths_inci = deaths/pop)
infections_df %<>% mutate(sample = s)
inf_out %<>% bind_rows(infections_df)
}
infections_year %<>% bind_rows( inf_out %>% mutate(variant = v))
}
```
```{r link_paho_data_and_model}
# get paho iso
translate = countrycode::countrycode(unique(paho_dat$Countries),
"country.name", "iso3c")
translate_df = data.frame(Countries = unique(paho_dat$Countries),
adm0 = translate)
paho_dat %<>% left_join(translate_df)
# get reporting proportion
#reporting_propn = Hmisc::binconf(416, 24000) #24000/416 #120000/805
reporting_propn = 1/c(mean(115, 21), 21, 115) # Dengue reporting https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6095627/
#filter(model out to paho countries)
infections_year %<>% filter(adm0 %in% paho_dat$adm0)
```
```{r combine}
inf_df = infections_year %>%
group_by(adm0, year, sample, variant) %>%
summarise(infections = sum(infections, na.rm = T),
severe_infections = sum(severe_infections, na.rm = T),
deaths = sum(deaths, na.rm = T),
pop = sum(pop, na.rm = T))
combined = left_join(paho_dat, inf_df, by = c("adm0" = "adm0", "Year" = "year"))
combined %<>% mutate(sample = as.character(sample))
```
```{r compare_cases_paho, fig.height=10, fig.width=8}
tmp <- combined %>% group_by(Countries, Year, Cases) %>% summarise(q0.025 = quantile(severe_infections, 0.025),
q0.5 = quantile(severe_infections, 0.5),
q0.975 = quantile(severe_infections, 0.975))
ggplot(tmp, aes( x = Year)) +
geom_line(aes(y = Cases)) +
geom_ribbon(aes(ymin = round(q0.025*reporting_propn[3]),
ymax = round(q0.975*reporting_propn[2])),
fill = alpha(snapalette("Pop", 5, "discrete")[5], 0.4)) +
geom_line(aes(y = round(q0.5*reporting_propn[1]) ), color = snapalette("Pop", 5, "discrete")[5], size = 1)+
facet_wrap(~Countries, scales = "free_y")+
theme_bw()
```
```{r compare_cases_paho_BRA, fig.height=10, fig.width=8}
tmp %>% filter(Countries == "Brazil")%>%
ggplot( aes( x = Year)) +
geom_ribbon(aes(ymin = round(q0.025*reporting_propn[3]),
ymax = round(q0.975*reporting_propn[2])),
fill = alpha(snapalette("Pop", 5, "discrete")[5], 0.4)) +
geom_line(aes(y = Cases)) +
geom_line(aes(y = round(q0.5*reporting_propn[1]) ), color = snapalette("Pop", 5, "discrete")[5], size = 1)+
theme_bw() +
#ylim(0, 5e4)
scale_y_log10()
```
Computing file changes ...