https://github.com/leftygray/Cascade_Incidence
Tip revision: 143d1b970bdaf64beac9aed9fb3193d5b6474015 authored by leftygray on 08 June 2018, 06:58:13 UTC
Edit to license statement
Edit to license statement
Tip revision: 143d1b9
0-GbmCascade.Rmd
GBM HIV Cascade Estimates
=========================
This Rmarkdown script is used to generate estimates for a HIV care and
diagnosis cascade for Australian gay and bisexual men. NOTE: specific
scenarios are hard coded.
```{r initialization}
# Clear workspace
rm(list=ls())
# Source to current directory and set working directory
basePath <- getwd()
# Various directories
dataFolder <- file.path(basePath, "data")
resultsFolder <- file.path(basePath, "output")
figFolder <- file.path(resultsFolder,"Cascade_figures")
Rcode <- file.path(basePath, "code")
# Load standard libraries, key functions and options
source(file.path(Rcode, "LoadLibrary.R"), echo = TRUE)
source(file.path(Rcode, "DataLibraries.R"), echo = TRUE)
source(file.path(Rcode, "GetLegend.R"), echo = TRUE)
LoadLibrary(scales)
```
```{r script details}
# Specify year of analysis
analysisYear <- 2014
# Script parameters
startYear <- 2005 # minimum for plotting; miumum assumed to be 2004 in
# analysis
try(if (startYear < 2004) stop("Start year before 2004"))
years <- 2004:analysisYear
nYears <- length(years)
xBreak <- ifelse(analysisYear == 2015, 5, 4) # xticks for plots
# Input files
#
# This file contains preprepared estimates for the number of
# gay and bisexual men living with diagnosed HIV since 2004. These
# estimates were obtained using the Australian HIV cascade methodology
# described in the Annual Surveillance Report applied to gay and bisexual
# men.
pldhivFile <- file.path(dataFolder, toString(analysisYear),
"pldhiv_estimates_gbm.csv")
# This file contains preprepared estimates for the percentage of
# undiagnosed gay and bisexual men living with HIV since 2004. These
# estimates were obtained using the ECDC HIV Modelling Tool with previously
# diagnosed overseas men included and excluded (npd)
undiagFile <- file.path(dataFolder, toString(analysisYear),
"undiagnosed_estimates_gbm.csv")
# This file contains preprepared estimates for the proportion of
# gay and bisexual men diagnosed with HIV on ART and with suppressed virus
# since 2004
treatmentFile <- file.path(dataFolder, toString(analysisYear),
"treatment_proportions_gbm.csv")
# File containing new diagnosis and estimated new infections for GBM
# newInfectionsFile <- file.path(dataFolder, "new_infections_gbm.csv")
# Save and plot
saveCascade <- TRUE
plotCascade <- TRUE
plotUndiagnosed <- TRUE
savePlots <- TRUE
# Option for undiagnosed proportion ---------------------------------------
# Here we set the option for estimating the proportion of HIV+ GBM who are
# undiagnosed. This can either be the same as COUNT in 2014 (~8.9% see
# below) or a linear change from a specified value in 2004 to the COUNT
# estimate.
undiagnosedOption <- "ecdc" # "count" or "ecdc"
```
```{r data}
# Load data and create all our data inputs for our estimates
# Assumed to start from 2004
pldhiv <- read.csv(pldhivFile)
# Hard coded data values --------------------------------------------------
# Proportion undiagnosed from COUNT results (2014)
undiagnosedCount <- c(0.089, 0.058, 0.135)
# Specify undiagnosed proportion
if (undiagnosedOption == "count") {
# Convert count data into a matrix
undiagnosedMatrix <- matrix(rep(undiagnosedCount, 11), ncol = 11)
} else if (undiagnosedOption == "ecdc") {
# Use results from the ECDC HIV Modelling Tool
propUndiag <- read.csv(undiagFile)
# Take the mean of the estimates including all diagnoses and when
# previously diagnosed overseas are excluded (convert to proportion)
ecdcEstimates <- (propUndiag$percentage + propUndiag$percentage_npd) /
2 / 100
# Take lowerest estimate (when excluding previously diagnosed overseas)
ecdcEstimatesLower <- propUndiag$lower_npd/ 100
# Take highest estimate (includes all diagnoses)
ecdcEstimatesUpper <-propUndiag$upper / 100
undiagnosedMatrix <- matrix(0, nrow = 3, ncol = length(years))
undiagnosedMatrix[1, ] <- ecdcEstimates
undiagnosedMatrix[2, ] <- ecdcEstimatesLower
undiagnosedMatrix[3, ] <- ecdcEstimatesUpper
} else {
# Create best estimate using linear change
undiagnosedMatrix <- matrix(0, nrow = 3, ncol = length(years))
undiagnosedMatrix[1, ] <- seq(startYearProp[1], undiagnosedCount[1],
length = 11)
undiagnosedMatrix[2, ] <- seq(startYearProp[2], undiagnosedCount[2],
length = 11)
undiagnosedMatrix[3, ] <- seq(startYearProp[3], undiagnosedCount[3],
length = 11)
}
treatmentData <- read.csv(treatmentFile)
# Proportion on treatment from Gay Community Periodic Surveys.
# Note first value for 2004 is an assumption
propTreat <- treatmentData$prop_treat
# Proportion of people on treatment with suppressed VL at last test
# From Australian HIV Observational Database (AHOD) for MSM.
# Rather than a single estimate we use a range from minimum to maximum
# proportion.
# For 2014 range is given by % < 50 and % < 1000 at last test.
# For 2014 range is given by 95% confidence interval for < 200 at last
# test.
propSuppressedMin <- treatmentData$prop_suppressed_min
propSuppressedMax <- treatmentData$prop_suppressed_max
# Assume best estimate is in the middle
propSuppressed <- (propSuppressedMin + propSuppressedMax)/2
# Some error checking to make sure everything aligns ----------------------
try(if(nrow(pldhiv) < nYears)
stop("Analysis year older than available data"))
# try(if(nrow(pldhiv) != nrow(newInfects))
# stop("Insufficient new infections data"))
try(if(length(propTreat) != nYears)
stop("Incorrect number of data points or proportion on ART"))
try(if(length(propTreat) != nYears)
stop("Incorrect number of data points for proportion on ART"))
try(if((length(propSuppressedMin) != nYears) ||
(length(propSuppressedMin) != nYears))
stop("Incorrect number of data points for proportion suppressed"))
```
```{r createstimates}
# Initialize data frame for storage
hivCascade <- data.frame(year = integer(),
stage = character(),
value = double(),
lower = double(),
upper = double())
# Calculate each stage of the cascade -------------------------------------
# Add to our hivCascade data frame as we go
# First extract the years we want
pldhiv <- filter(pldhiv, year <= analysisYear)
# People living with HIV
plhiv <- pldhiv$value / (1 - undiagnosedMatrix[1, ])
plhivLower <- pldhiv$lower / (1 - undiagnosedMatrix[2, ])
plhivUpper <- pldhiv$upper / (1 - undiagnosedMatrix[3, ])
hivCascade <- rbind(hivCascade,
data.frame(year = years,
stage = "plhiv",
value = plhiv,
lower = plhivLower,
upper = plhivUpper))
# Number undiagnosed
undiagnosed <- plhiv * undiagnosedMatrix[1, ]
undiagnosedLower <- plhivLower * undiagnosedMatrix[2, ]
undiagnosedUpper <- plhivUpper * undiagnosedMatrix[3, ]
hivCascade <- rbind(hivCascade,
data.frame(year = years,
stage = "undiagnosed",
value = undiagnosed,
lower = undiagnosedLower,
upper = undiagnosedUpper))
# Add people living with diagnosed HIV after adding a stage description
# and reorder
pldhiv$stage <- "pldhiv"
hivCascade <- rbind(hivCascade, pldhiv)
hivCascade <- select(hivCascade, year, stage, everything())
# Number on treatment
treated <- pldhiv$value * propTreat
treatedLower <- pldhiv$lower * propTreat
treatedUpper <- pldhiv$upper * propTreat
# Number diagnosed not on treatment - have to be careful the minimum value
# is not less than zero
diagnosed <- pldhiv$value - treated
diagnosedLower <- pmax(pldhiv$lower - treatedUpper, 0)
diagnosedUpper <- pldhiv$upper - treatedLower
hivCascade <- rbind(hivCascade,
data.frame(year = years,
stage = "diagnosed",
value = diagnosed,
lower = diagnosedLower,
upper = diagnosedUpper))
hivCascade <- rbind(hivCascade,
data.frame(year = years,
stage = "treated",
value = treated,
lower = treatedLower,
upper = treatedUpper))
# Number on treatment suppressed at last test
suppressed <- treated * propSuppressed
suppressedLower <- treatedLower * propSuppressedMin
suppressedUpper <- treatedUpper * propSuppressedMax
# Number with unsuppressed virus-have to be careful the minimum value is
# not less than zero
unsuppressed <- treated - suppressed
unsuppressedLower <- pmax(treatedLower - suppressedUpper, 0)
unsuppressedUpper <- treatedUpper - suppressedLower
hivCascade <- rbind(hivCascade,
data.frame(year = years,
stage = "unsuppressed",
value = unsuppressed,
lower = unsuppressedLower,
upper = unsuppressedUpper))
hivCascade <- rbind(hivCascade,
data.frame(year = years,
stage = "suppressed",
value = suppressed,
lower = suppressedLower,
upper = suppressedUpper))
# Output some stuff for the final year
results <- hivCascade %>%
filter(year == analysisYear) %>%
filter(stage %in% c("undiagnosed", "diagnosed","unsuppressed",
"suppressed"))
cat(paste("HIV cascade numbers for", toString(analysisYear), "..."))
print(results)
cat(paste("HIV cascade proportions for", toString(analysisYear), "..."))
print(results$value/sum(results$value))
# Create a string version for publications
cascadeString <- hivCascade %>%
filter(stage %in% c("undiagnosed", "diagnosed", "unsuppressed",
"suppressed")) %>%
group_by(year, stage) %>%
mutate(string = paste0(toString(round(value, digits = -1)),
" (",
toString(round(lower, digits = -1)),
"-",
toString(round(upper, digits = -1)),
")")) %>%
ungroup() %>%
select(year, stage, string) %>%
spread(stage, string)
# Create string percentage version for publications
plhivFrame <- hivCascade %>%
filter(stage == "plhiv")
cascadePercent <- hivCascade %>%
filter(stage %in% c("undiagnosed", "diagnosed", "unsuppressed",
"suppressed")) %>%
group_by(stage) %>%
mutate(percent = 100 * value / plhivFrame$value,
perlower = 100 * lower / plhivFrame$upper,
perupper = 100 * upper / plhivFrame$lower) %>%
ungroup() %>%
group_by(year, stage) %>%
mutate(string = paste0(toString(round(percent, digits = 1)),
"% (",
toString(round(perlower, digits = 1)),
"-",
toString(round(perupper, digits = 1)),
"%)")) %>%
ungroup() %>%
select(year, stage, string) %>%
spread(stage, string)
# Save our HIV cascade to a csv file
if (saveCascade) {
if (undiagnosedOption == "count") {
undiagString <- "count"
} else if (undiagnosedOption == "ecdc") {
undiagString <- "ecdc"
} else {
undiagString <- fileTag
}
saveString <- file.path(resultsFolder, paste0("gbm_hiv_cascade-", ... =
toString(analysisYear), "_", undiagString))
write.csv(hivCascade, file = paste0(saveString, ".csv"),
row.names = FALSE)
write.csv(cascadeString, file = paste0(saveString, "-numString.csv"),
row.names = FALSE)
write.csv(cascadePercent, file = paste0(saveString,
"-percentString.csv"),
row.names = FALSE)
}
```
```{r plot undiagnosed}
if (plotUndiagnosed) {
# Load default plot options
source(file.path(Rcode, "PlotOptions.R"), echo = TRUE)
# Set up undiagnosed estimates
plotECDC <- TRUE
# Tidy up indiagnosed matrix
undiagPropECDC <- tbl_df(as.data.frame(t(undiagnosedMatrix)))
colnames(undiagPropECDC) <- c("estimate", "lower", "upper")
undiagPropECDC$year <- propUndiag$year
undiagPropCascade <- data_frame(estimate = undiagnosed / plhiv,
lower = undiagnosedLower / plhivUpper,
upper = undiagnosedUpper / plhivLower,
year = propUndiag$year)
countDF <- data_frame(year = 2014,
estimate = undiagnosedCount[1],
lower = undiagnosedCount[2],
upper = undiagnosedCount[3])
# Create plot
undiagPlot <- ggplot(data = undiagPropECDC, aes(x = year)) +
geom_ribbon(data = undiagPropCascade,
aes(x = year, ymin = 100 * lower, ymax = 100 * upper,
fill = "cascade",
alpha = "cascade")) +
geom_line(aes(y = 100 * estimate, colour = "cascade")) +
geom_ribbon(aes(ymin = 100 * lower, ymax = 100 * upper,
fill = "ecdc", alpha = "ecdc")) +
geom_line(aes(y = 100 * estimate, colour = "ecdc")) +
geom_errorbar(data = countDF,
aes(x = year, ymin = 100 * lower, ymax = 100 * upper,
colour = "count")) +
geom_point(data = countDF,
aes(x = year, y = 100 * estimate, colour = "count")) +
scale_colour_manual(name = "",
labels = c("ECDC HIV Modelling tool estimates",
"Resulting cascade estimates",
"COUNT estimate and 95% CI"),
values = c(asrcols[2], asrcols[5], "black"),
limits = c("ecdc", "cascade", "count")) +
scale_fill_manual(name = "",
labels = c("ECDC HIV Modelling tool estimates",
"Resulting cascade estimates",
"COUNT estimate and 95% CI"),
values = c(asrcols[2], asrcols[5], NA),
limits = c("ecdc", "cascade", "count")) +
scale_alpha_manual(name = "",
labels = c("ECDC HIV Modelling tool estimates",
"Resulting cascade estimates",
"COUNT estimate and 95% CI"),
values = c(0.4, 0.4, NA),
limits = c("ecdc", "cascade", "count")) +
scale_x_continuous(breaks = seq(startYear,
analysisYear, by = xBreak)) +
coord_cartesian(ylim = c(0, 20)) +
ylab("Percentage undiagnosed (%)") + xlab("Year") +
plotOpts + theme(legend.justification = c(0, 0),
legend.position = c(0.05, 0.05))
if (savePlots) {
# Plot specs
plotWidth <- 10
plotHeight <- 10
plotUnits <- "cm"
# Create figure folder
dir.create(file.path(figFolder, toString(analysisYear)),
showWarnings = FALSE)
# Do the plots
ggsave(file.path(figFolder, toString(analysisYear),
paste0("undiagnosedPercent-",
toString(analysisYear), ".png")),
plot = undiagPlot, width = plotWidth, height = plotHeight,
units = plotUnits)
}
}
```
```{r plot results}
# Plot some figures of our cascade
if (plotCascade) {
# Load default plot options
source(file.path(Rcode, "PlotOptions.R"), echo = TRUE)
# Tidy up data for plotting
plotData <- hivCascade %>%
filter(year >= startYear, year <= analysisYear) %>%
filter(stage %in% c("undiagnosed", "diagnosed","unsuppressed",
"suppressed"))
# Set up plot variables
plotCols <- asrcols[c(1, 2, 4, 5)]
plotLabels <- c("Undiagnosed",
"Diagnosed no ART",
"On ART unsuppressed VL",
"On ART suppressed VL")
xValues <- seq(startYear, analysisYear, by = xBreak)
# PLDHIV overall and by proportion
plhivPlotNum <- ggplot(data = plotData, aes(x = year, y = value,
fill = stage)) + geom_area(stat="identity") +
ylab("Number of PLHIV") + xlab("Year") +
scale_fill_manual(values = plotCols, name = "", labels = plotLabels,
guide = guide_legend(nrow = 2)) +
scale_x_continuous(breaks = xValues) + plotOpts
plhivPlotProp <- ggplot(data = plotData, aes(x = year, y = 100 * value,
fill = stage)) +
geom_area(stat="identity", position= "fill") +
ylab("Percentage of PLHIV") + xlab("Year") +
scale_fill_manual(values = plotCols, name = "", labels = plotLabels,
guide = guide_legend(nrow = 2)) +
scale_x_continuous(breaks = xValues) +
scale_y_continuous(labels = percent) +
plotOpts
# Display plots
print(plhivPlotNum)
print(plhivPlotProp)
if (savePlots) {
# Plot specs
plotWidth <- 10
plotHeight <- 10
plotUnits <- "cm"
# Create figure folder
dir.create(file.path(figFolder, toString(analysisYear)),
showWarnings = FALSE)
# Do the plots
ggsave(file.path(figFolder, toString(analysisYear),
paste("cascadeIncidence-", "numPLHIV",
"-", toString(analysisYear), "_", undiagString,
".png",sep ="")),
plot = plhivPlotNum, width = plotWidth, height = plotHeight,
units = plotUnits)
ggsave(file.path(figFolder, toString(analysisYear),
paste("cascadeIncidence-", "numDiags",
"-", toString(analysisYear), "_", undiagString,
".png",sep ="")),
plot = plhivPlotProp, width = plotWidth, height = plotHeight,
units = plotUnits)
}
# Do time varying plots ------------------------------------------------
plotStages <- c("undiagnosed", "diagnosed", "unsuppressed",
"suppressed")
if (analysisYear <= 2014) {
yLabels <- c("undiagnosed" = "Number undiagnosed",
"diagnosed" = "Number diagnosed untreated",
"unsuppressed" = "Number on ART VL > 400",
"suppressed" = "Number with VL < 400")
} else {
yLabels <- c("undiagnosed" = "Number undiagnosed",
"diagnosed" = "Number diagnosed untreated",
"unsuppressed" = "Number on ART VL > 200",
"suppressed" = "Number with VL < 200")
}
# Loop across stages and save plots
plotCurrent <- list()
for (ii in plotStages) {
#Select data we want
tempData <- filter(plotData, stage == ii)
# Plot the stage over time
plotCurrent[[ii]] <- ggplot(data = tempData,
aes(x = year, y = value)) +
geom_ribbon(aes(ymin = lower, ymax = upper),
fill = asrcols[2], alpha = 0.4) +
geom_line(color = asrcols[2]) +
scale_x_continuous(breaks = seq(startYear,
analysisYear, by = xBreak)) +
expand_limits(y = 0) +
ylab(yLabels[ii]) + xlab("Year") +
plotOpts
# Save the current plot
if (savePlots) {
# Plot specs
plotWidth <- 10
plotHeight <- 10
plotUnits <- "cm"
ggsave(file.path(figFolder, toString(analysisYear),
paste("GBMstage-", ii, "_",
toString(startYear), "-", toString(analysisYear),
"_", undiagString,
".png", sep = "")), plot = plotCurrent[[ii]], width = plotWidth,
height = plotHeight, units = plotUnits)
}
}
}
```
```{r Plot ECDC new infections}
if (plotCascade) {
# Load default plot options
source(file.path(Rcode, "PlotOptions.R"), echo = TRUE)
# Load data
newInfectionsFile <- file.path(dataFolder, toString(analysisYear),
"new_infections_gbm.csv")
newInfects <- read.csv(newInfectionsFile)
# Create plot
plotInfects <- ggplot(data = newInfects, aes(x = year)) +
geom_ribbon(aes(ymin = lower, ymax = upper),
fill = "blue", alpha = 0.3) +
geom_point(aes(y = diagnoses), colour = "blue", shape = 16) +
geom_line(aes(y = infections, colour = "ipd", linetype = "ipd")) +
geom_ribbon(aes(ymin = lower_npd, ymax = upper_npd),
fill = "red", alpha = 0.3) +
geom_point(aes(y = diagnoses_npd), colour = "red", shape = 17) +
geom_line(aes(y = infections_npd, colour = "npd", linetype = "npd")) +
geom_line(aes(y = (infections + infections_npd)/2, colour = "best",
linetype = "best")) +
scale_x_continuous(breaks = seq(startYear,
analysisYear, by = xBreak)) +
scale_colour_manual(name = "",
limits = c("ipd", "npd", "best"),
labels = c("Including diagnosed overseas (data: blue discs)",
"Excluding diagnosed overseas (data: red triangles)",
"Estimated new infections"),
values = c("blue", "red", "black"),
guide = guide_legend(nrow = 3)) +
scale_linetype_manual(name = "",
limits = c("ipd", "npd", "best"),
labels = c("Including diagnosed overseas (data: blue discs)",
"Excluding diagnosed overseas (data: red triangles)",
"Estimated new infections"),
values = c("longdash", "dotted", "solid"),
guide = guide_legend(nrow = 3)) +
expand_limits(y = 0) +
ylab("Number") + xlab("Year") +
plotOpts + theme(legend.justification=c(0,0),
legend.position=c(0.05,0.05))
if (savePlots) {
# Plot specs
plotWidth <- 11
plotHeight <- 11
plotUnits <- "cm"
ggsave(file.path(figFolder, toString(analysisYear),
paste0("ECDC_new_infections-",
toString(analysisYear), ".png")),
plot = plotInfects, width = plotWidth, height = plotHeight,
units = plotUnits)
}
}
```
```{r Combination plots}
# Create some combined plots for publications
LoadLibrary(cowplot)
LoadLibrary(gridExtra)
# Create a combined cascade plot - extract legend so we only use it one
#
legend <- GetLegend(plhivPlotNum)
plhivPlotNum2 <- plhivPlotNum + theme(legend.position="none")
plhivPlotProp2 <- plhivPlotProp + theme(legend.position="none")
plhivCombined <- grid.arrange(legend, plhivPlotNum2, plhivPlotProp2,
ncol=2, nrow = 2,
layout_matrix = rbind(c(1,1), c(2,3)),
widths = c(4, 4), heights = c(0.6, 3.4))
# Create a cascade main plot
# cascadeFig1 <- ggdraw() +
# draw_plot(undiagPlot, 0, 0.5, 0.5, 0.5) +
# draw_plot(plotInfects, 0.5, 0.5, 0.5, 0.5) +
# draw_plot(plhivCombined, 0, 0, 1, 0.5) +
# draw_plot_label(c("A", "B", "C"), c(0, 0.5, 0), c(1, 1, 0.5),
# size = 12)
# Reviewer wants labels on each subfigure
cascadeFig1 <- ggdraw() +
draw_plot(undiagPlot, 0, 0.5, 0.5, 0.5) +
draw_plot(plotInfects, 0.5, 0.5, 0.5, 0.5) +
draw_plot(plhivPlotNum, 0, 0, 0.5, 0.5) +
draw_plot(plhivPlotProp, 0.5, 0, 0.5, 0.5) +
draw_plot_label(c("A", "B", "C", "D"), c(0, 0.5, 0, 0.5),
c(1, 1, 0.5, 0.5), size = 12)
# Create individual stage plots - need to add titles
cascadeStagePlots <- plotCurrent
cascadeStagePlots$undiagnosed <- plotCurrent$undiagnosed +
ggtitle("Undiagnosed")
cascadeStagePlots$diagnosed <- plotCurrent$diagnosed +
ggtitle("Diagnosed")
cascadeStagePlots$unsuppressed <- plotCurrent$unsuppressed +
ggtitle("Unsuppressed")
cascadeStagePlots$suppressed <- plotCurrent$suppressed +
ggtitle("Suppressed")
cascadeFig2 <- plot_grid(cascadeStagePlots$undiagnosed,
cascadeStagePlots$diagnosed,
cascadeStagePlots$unsuppressed, cascadeStagePlots$suppressed,
labels = c("A", "B", "C", "D"))
if (savePlots) {
# Save cascade combined figure
ggsave(file.path(figFolder, toString(analysisYear), paste0("Cascades-",
toString(analysisYear), ".png")),
plot = plhivCombined, width = 20, height = 10,
units = "cm")
# Save first cascade figure
ggsave(file.path(figFolder, toString(analysisYear),
paste0("Cascade_Combined-",
toString(analysisYear), ".png")),
plot = cascadeFig1, width = 24, height = 22,
units = "cm")
# Save second cascade figure
ggsave(file.path(figFolder, toString(analysisYear),
paste0("Cascade_Stages-",
toString(analysisYear), ".png")),
plot = cascadeFig2, width = 20, height = 20,
units = "cm")
}
```