1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460 | #---
#R script to analyse spore data and infectivity of Odospora colligata
#authors: Andrew Jackson & Charlotte Kunze
#---
#load packages
library(Hmisc)
library(tidyverse)
library(ggplot2)
library(scales)
library(rjags)
library(ggpubr)
library(cowplot)
#set the working directory in the same folder, our Rscript is saved
setwd(dirname(rstudioapi::getSourceEditorContext()$path))
#### Import the data ####
spore_data <- read.csv("SporesNoMaleNA.csv",
header = TRUE, stringsAsFactors = FALSE)
# only exposed animals
spore_data_I <- spore_data %>% filter(exposed == "I")
names(spore_data_I)
#first-plots
g1 <- ggplot(spore_data_I, aes(x = realtemp, y = no_spore,
color = treatment,
group = treatment)) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
geom_point() +
geom_smooth()
print(g1)
## Try fitting the more complex beta function to the infection rate data
#These data are the column `infect` in the subsetted data object `spore_data_I`.
#In the jags code below i need to force T_max and T_min to be outside the range of the data, else it generates invalid numbers and wont run.
#### Model formulation infectivity ####
#Source the beta function we used to model the TPCs which is held in an external `*.R` file as it is called from multiple notebooks.
source("betaFunction.R")
# add points for the average by temperature
summary_spore <- spore_data_I %>%
group_by(treatment, temperature) %>%
summarise(mu = mean(infect), tt = mean(realtemp),
Ninfect = sum(infect), Ntrials = length(infect))
#Fitting binominal model, aggregating the data by temperature and treatment.
# Here Temperature T was replaced in the maths above with x when specifying the model.
model_beta_fun_binomial = '
model
{
# Likelihood
for (i in 1:n) {
y[i] ~ dbinom(p[i], N[i])
# make sure all values are >= 0
p[i] <- min(max(mu[i], 0) , 1)
mu[i] <- c_m[treat[i]] *
( (x_max[treat[i]] - x[i]) / (x_max[treat[i]] - x_opt[treat[i]]) ) *
( (x[i] - x_min[treat[i]]) / (x_opt[treat[i]] - x_min[treat[i]]) ) ^
( (x_opt[treat[i]] - x_min[treat[i]]) / (x_max[treat[i]] - x_opt[treat[i]]) )
}
# Priors
for (j in 1:M) {
x_min[j] ~ dunif(5, 15)
x_opt[j] ~ dunif(x_min[j] + 0, x_min[j] + 20)
x_max[j] ~ dunif(x_opt[j] + 0, 35)
c_m[j] ~ dunif(0, 1)
}
}
'
#Set up and fit the binomial model to the `spore_data_I` object.
model_beta_cnx <- textConnection(model_beta_fun_binomial)
model_data_binomial <- list(y = summary_spore$Ninfect,
x = summary_spore$tt,
N = summary_spore$Ntrials,
n = nrow(summary_spore),
treat = as.numeric(factor(summary_spore$treatment)),
M = length(unique(summary_spore$treatment)))
# The jags model is initialised using jags.model()
model_binomial <- jags.model(model_beta_cnx, data = model_data_binomial,
n.chains = 3, quiet = TRUE)
close(model_beta_cnx) # close the model connection as we dont need it any more
# We then use coda.samples to ask for posterior draws
# which we will use as a reflectin of the posterior.
output_binomial <- coda.samples(model = model_binomial,
variable.names = c("c_m", "x_max", "x_min", "x_opt"),
n.iter = 1000, quiet = TRUE)
#BGR test
gelman.diag(output_binomial)
#Summarise the posterior
post_smry_binomial <- summary(output_binomial)
post_means_binomial <- post_smry_binomial$statistics[,1]
print(post_smry_binomial)
#save model output in a text file using sink()
#sink('infectivity.csv')
print(summary(output_binomial))
#save model information in a dataframe
summary_model<- summary(output_binomial)
#sub data frame with only quantiles to add to the plot
summy_model <- as.data.frame(summary_model$quantiles) %>%
tibble::rownames_to_column() %>%
rename(dummy = rowname) %>%
mutate(treatment = ifelse(str_detect(dummy, '1'), paste('constant'),ifelse(str_detect(dummy, '2'), paste('fluctuation'), 'pulse')))
# specify a range of temps over which to evaluate our function
xx <- seq(10, 30, length = 100)
# and the coresponding estimate based on the means of the posteriors
yy1 <- b_fun(xx,
c_m = post_means_binomial["c_m[1]"],
x_max = post_means_binomial["x_max[1]"],
x_min = post_means_binomial["x_min[1]"],
x_opt = post_means_binomial["x_opt[1]"])
yy2 <- b_fun(xx,
c_m = post_means_binomial["c_m[2]"],
x_max = post_means_binomial["x_max[2]"],
x_min = post_means_binomial["x_min[2]"],
x_opt = post_means_binomial["x_opt[2]"])
yy3 <- b_fun(xx,
c_m = post_means_binomial["c_m[3]"],
x_max = post_means_binomial["x_max[3]"],
x_min = post_means_binomial["x_min[3]"],
x_opt = post_means_binomial["x_opt[3]"])
# write the predictions in a data frame for further use
inf_cs <- as.data.frame(cbind(xx,yy1)) %>%
mutate(treatment = paste('constant'),
prediction = yy1) %>%
select(-yy1)
inf_flux <- as.data.frame(cbind(xx,yy2))%>%
mutate(treatment = paste('fluctuation'),
prediction = yy2)%>%
select(-yy2)
inf_pulse <- as.data.frame(cbind(xx,yy3))%>%
mutate(treatment = paste('pulse'),
prediction = yy3)%>%
select(-yy3)
#merge predictions and data for single treatments into one df
inf_total <- rbind(inf_cs, inf_flux, inf_pulse)
# save the predicitions in a dataframe to make the values accessable
df <- as.data.frame(model_data_binomial) %>%
mutate(treatment = paste(ifelse(treat == 1, 'constant', ifelse(treat == 2, 'fluctuation', 'pulse'))))
#calculate the CI of the model for the different temperatures
data = binconf(x=df$y, n=df$N, alpha = 0.05) #store in a list
data = as.data.frame(data) #change list to data.frame
#import data with the mean temperature and treatment to bind to the orginial dataset
MeanTemp <- read_csv("MeanTempTreatments.csv") %>%
rename( x = meanTemp) %>%
select(-X1)
#create a new total df with temperature information
all_df <- cbind(data, MeanTemp) %>%
left_join(df, by = c('x', 'treatment')) %>% #merge with the old one by temperature and treatment
group_by(x, treatment) %>%
mutate(sd = (sqrt(n())* (Upper-Lower)/3.92), #calculate se from lower and upper ci
se = sd/sqrt(n()))
#### infectivity plot Fig. 2a####
# Note: 2.5 % and 97.5 % CI of maximum infectivity, Tmin, Topt and Tmax are added to the plot using geom_segment
# data are stored in the summy_model datafile
inf_plot <- ggplot(all_df, aes(y = I(model_data_binomial$y / model_data_binomial$N), x = model_data_binomial$x, col = treatment, shape = treatment))+
geom_point(size = 3.5)+
geom_errorbar(aes(ymin = I(model_data_binomial$y / model_data_binomial$N) -se, ymax = I(model_data_binomial$y / model_data_binomial$N)+se), width = .2)+
#cm
geom_segment(x = 32, xend = 32, y = summy_model[1,2], yend = summy_model[1,6], col ='#003C67FF',size = 1.3)+ #constant
geom_segment(x = 32.5, xend = 32.5, y = summy_model[2,2],yend =summy_model[2,6], col ='#EFC000FF', size = 1.3)+ #fluctuation
geom_segment(x = 33, xend = 33, y = summy_model[3,2],yend = summy_model[3,6],col = '#A73030FF',size = 1.3) +#pulse
#Topt
geom_segment(x = summy_model[10,2] , xend = summy_model[10,6], y = 1.29, yend = 1.29, col = '#003C67FF',size = 1.3)+ #constant
geom_segment(x = summy_model[11,2], xend = summy_model[11,6], y = 1.27, yend = 1.27, col = '#EFC000FF',size = 1.3)+ #fluctuation
geom_segment(x = summy_model[12,2] , xend = summy_model[12,6], y = 1.25, yend = 1.25,col = '#A73030FF',size = 1.3)+ #pulse
# Tmin
# for better visibility we included only the 97.5% interval, the 2.5% interval below 8 is indicated by arrows
geom_segment(x = 8 , xend = summy_model[7,6] , y = 1.29, yend = 1.29, arrow = arrow(ends = 'first', length = unit(0.11,'cm')), col = '#003C67FF',size = 1.3)+#constant
geom_segment(x = 8, xend = summy_model[8,6], y = 1.27, yend = 1.27, arrow = arrow(ends = 'first', length = unit(0.11,'cm')), col = '#EFC000FF',size = 1.3)+ #fluctuation
geom_segment(x = 8, xend = summy_model[9,6] , y = 1.25, yend = 1.25, arrow = arrow(ends = 'first',length = unit(0.11,'cm')), col = '#A73030FF',size = 1.3)+#pulse
#Tmax
# for better visibility Tmax values over 30 degree are indicated by arrows
geom_segment(x = summy_model[4,2] , xend = 30, y = 1.29, yend = 1.29, arrow = arrow(length = unit(0.11,'cm')), col = '#003C67FF',size = 1.3)+#constant
geom_segment(x = summy_model[5,2] , xend = summy_model[5,6], y = 1.27, yend = 1.27, col = '#EFC000FF',size = 1.3)+#fluctuation
geom_segment(x = summy_model[6,2], xend = 30, y = 1.25, yend = 1.25, arrow = arrow(length = unit(0.11,'cm')), col = '#A73030FF',size = 1.3)+#pulse
geom_line(data = inf_total, aes(x = xx, y = prediction, color = treatment), linetype = 'longdash', size = 0.8)+
scale_x_continuous(breaks = seq(10, 28,3), limits=c(8,30))+
scale_y_continuous(limits = c(0,1.15), breaks= seq(0,1,0.2))+
scale_shape_manual(values=c( 16,17,15), labels = c('constant', 'fluctuating', 'heat wave'))+
scale_color_manual(values=c('#003C67FF','#EFC000FF','#A73030FF'),labels = c('constant', 'fluctuating', 'heat wave'))+
labs(title = '',x = 'mean Temperature (in °C)', y = 'infection rate', col = ' ', shape = ' ')+
coord_cartesian(ylim = c(0, 1.15), clip="off")+ #changes coord system to include the CI arrows in our plot
theme( panel.background = element_rect(fill = NA),
panel.border= element_rect(colour = "black", fill=NA, size=0.5),
strip.background = element_rect(color = 'black', fill = 'grey95'),
legend.background = element_blank(),
legend.title = element_text(hjust=0),
legend.position = 'none',
legend.key = element_blank(),
text = element_text(size=21),
plot.margin = unit(c(1,2,1.5,1.5), "cm")) #t, r, b, l bzw. top, right, bottom, left
ggpar(inf_plot)
#####################################################################################################################
#### Burden Data ####
#Now we can explore and model the data for parasite burden.
#These are individuals that have been infected *and* have positive burden *and* died within the last X days of the experiment.
burden_data <- spore_data_I %>% filter(no_spore > 0 & lastday == 1)
#Plot the burden data
g3 <- ggplot(burden_data, aes(x = realtemp, y = no_spore,
color = treatment,
group = treatment)) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
geom_point() +
geom_smooth()
print(g3)
#### Model formulation burden ####
#Define a JAGS model for gaussian errors with a beta function - useful for the burden data.
model_beta_fun_poisson = '
model
{
# Likelihood
for (i in 1:n) {
y[i] ~ dpois(lambda[i])
# log10 link function
lambda[i] <- 10^(mu[i])
mu[i] <- c_m[treat[i]] *
( (x_max[treat[i]] - x[i]) / (x_max[treat[i]] - x_opt[treat[i]]) ) *
( (x[i] - x_min[treat[i]]) / (x_opt[treat[i]] - x_min[treat[i]]) ) ^
( (x_opt[treat[i]] - x_min[treat[i]]) / (x_max[treat[i]] - x_opt[treat[i]]) )
}
# Priors
for (j in 1:M) {
x_min[j] ~ dunif(5, 14)
# x_opt[j] ~ dunif(15, 22)
# x_max[j] ~ dunif(23, 26)
x_opt[j] ~ dunif(x_min[j] + 0, x_min[j] + 10)
x_max[j] ~ dunif(x_opt[j] + 0, 30)
# these rather narrow priors worked during model testing
# x_min[j] ~ dunif(5, 14)
# x_opt[j] ~ dunif(15, 22)
# x_max[j] ~ dunif(23, 26)
c_m[j] ~ dunif(0, 10)
}
sigma ~ dunif(0, 100)
}
'
model_beta_cnx <- textConnection(model_beta_fun_poisson)
model_data_burden <- list(y = burden_data$no_spore,
x = burden_data$realtemp,
n = nrow(burden_data),
treat = as.numeric(factor(burden_data$treatment)),
M = length(unique(burden_data$treatment)))
# The jags model is initialised using jags.model()
model_burden <- jags.model(model_beta_cnx, data = model_data_burden,
n.chains = 3, quiet = TRUE)
close(model_beta_cnx) # close the model connection as we dont need it any more
# We then use coda.samples to ask for posterior draws
# which we will use as a reflectin of the posterior.
output_burden <- coda.samples(model = model_burden,
variable.names = c("c_m", "x_max", "x_min", "x_opt"),
n.iter = 1000, quiet = TRUE)
gelman.diag(output_burden) #The BGR test should return values less than 1.1 to indicate convergence.
#summarise the burden model#
post_smry_burden <- summary(output_burden)
print(post_smry_burden)
post_means_burden <- post_smry_burden$statistics[,1]
print(post_smry_burden)
#save stats in a txt file
#sink('parasite_burden.csv')
print(summary(output_burden))
#sink()
# note: C_m has to be transponated with 10^
#save model information in a dataframe
summary_model_burden<- summary(output_burden)
#sub data frame with only quantiles to add to the plot
burden_CI <- as.data.frame(summary_model_burden$quantiles) %>%
tibble::rownames_to_column() %>%
rename(dummy = rowname) %>%
mutate(treatment = ifelse(str_detect(dummy, '1'), paste('constant'),ifelse(str_detect(dummy, '2'), paste('fluctuation'), 'pulse'))) %>%
gather(key = 'Quantile', value = 'value', -dummy, -treatment) %>%
mutate(transform= ifelse(str_detect(dummy, 'c_m'), 10^value, value)) %>% #backtransform maximum spore data
select(-value) %>% #remove raw values to spread data
spread(key = Quantile, value = transform)
# specify a range of temps over which to evaluate our function
# add in the x_min values + a very small amount to ensure the
# lines are drawn just above the lowest treshold. The resultant
# vector is sorted sequentially.
xx <- sort(c(post_means_burden["x_min[1]"] + 10^-20,
post_means_burden["x_min[2]"] + 10^-20,
post_means_burden["x_min[3]"] + 10^-20,
seq(10, 30, length = 10^3) ) )
# and the coresponding estimate based on the means of the posteriors
yy1 <- b_fun(xx,
c_m = post_means_burden["c_m[1]"],
x_max = post_means_burden["x_max[1]"],
x_min = post_means_burden["x_min[1]"],
x_opt = post_means_burden["x_opt[1]"])
yy2 <- b_fun(xx,
c_m = post_means_burden["c_m[2]"],
x_max = post_means_burden["x_max[2]"],
x_min = post_means_burden["x_min[2]"],
x_opt = post_means_burden["x_opt[2]"])
yy3 <- b_fun(xx,
c_m = post_means_burden["c_m[3]"],
x_max = post_means_burden["x_max[3]"],
x_min = post_means_burden["x_min[3]"],
x_opt = post_means_burden["x_opt[3]"])
#store predictions in data frames
spore_cs <- as.data.frame(cbind(yy1,xx))%>%
mutate(treatment = paste('constant'),
prediction = yy1) %>%
select(-yy1)
spore_flux <- as.data.frame(cbind(yy2,xx))%>%
mutate(treatment = paste('fluctuation'),
prediction = yy2) %>%
select(-yy2)
spore_pulse <- as.data.frame(cbind(yy3,xx))%>%
mutate(treatment = paste('pulse'), #add treatment specification
prediction = yy3) %>%
select(-yy3)
#merge the predictions
spore_total <- rbind(spore_cs, spore_flux, spore_pulse) %>%
mutate(trans_pred = 10^prediction)
#calculate mean, sd,se
data <- as.data.frame(model_data_burden) %>%
mutate(treatment = paste(ifelse(treat == 1, 'constant', ifelse(treat == 2, 'fluctuation', 'pulse')))) %>%
group_by(treatment, x) %>%
mutate(mean = mean(y, na.rm = T),
sd = sd(y, na.rm = T),
se = sd/sqrt(n()))
#### Plot burden Fig. 2b####
# Note: CI of maximum spore burden, topt, tmin,tmax are stored in the dataframe burden_CI
## and are added in geom_segment
spore_plot <- ggplot(data, aes(x = x, y = mean, color = treatment, shape = treatment ))+
geom_line(data = spore_total, aes(x = xx, y = trans_pred, color = treatment), linetype = 'longdash', size = 0.8)+
geom_point(size = 3.5)+
geom_errorbar(aes(ymin = mean-se, ymax = mean+se), width = .2)+
#Cm
geom_segment(x = 32, xend = 32, y = burden_CI[1,3], yend =burden_CI[1,7],col ='#EFC000FF', size = 1.3)+#cs
geom_segment(x = 32.5, xend = 32.5, y = burden_CI[2,3], yend = burden_CI[2,7], col ='#003C67FF', size = 1.3)+#flux
geom_segment(x = 33, xend = 33, y = burden_CI[3,3],yend = burden_CI[3,7],col = '#A73030FF', size = 1.3) +#pulse
#Topt
geom_segment(x = burden_CI[10,3], xend = burden_CI[10,7], y = 950, yend = 950, col = '#003C67FF', size = 1.3)+ #constant
geom_segment(x = burden_CI[11,3], xend = burden_CI[11,7], y = 938, yend = 938, col = '#EFC000FF', size = 1.3)+ #flux
geom_segment(x = burden_CI[12,3], xend = burden_CI[12,7], y = 930, yend = 930, col = '#A73030FF', size = 1.3)+ #pulse
# Tmin
geom_segment(x = burden_CI[7,3] , xend = burden_CI[7,7] , y = 950, yend = 950, col = '#003C67FF', size = 1.3)+#cs
geom_segment(x = burden_CI[8,3], xend = burden_CI[8,7], y = 938, yend = 938, col = '#EFC000FF', size = 1.3)+#fl
geom_segment(x = burden_CI[9,3], xend = burden_CI[9,7] , y = 930, yend = 930, col = '#A73030FF', size = 1.3)+#pl
#Tmax
geom_segment(x = burden_CI[4,3] , xend = burden_CI[4,7], y = 950, yend = 950, col = '#003C67FF', size = 1.3)+#cs
geom_segment(x = burden_CI[5,3] , xend = burden_CI[5,7], y = 938, yend = 938, col = '#EFC000FF', size = 1.3)+#fl
geom_segment(x = burden_CI[6,3], xend = burden_CI[6,7], y = 930, yend = 930, col = '#A73030FF', size = 1.3)+#pl
scale_y_continuous(limits = c(0,850), breaks= seq(0,800,200))+
scale_x_continuous(breaks = seq(10, 28,3), limits=c(8,30))+
scale_shape_manual(values=c( 16,17,15), labels = c('constant', 'fluctuating', 'heat wave'))+
scale_color_manual(values=c('#003C67FF','#EFC000FF','#A73030FF'),labels = c('constant', 'fluctuating', 'heat wave'))+
labs(title = '',x = 'mean Temperature (in °C)', y = 'spore burden (no of clusters)', col = ' ', shape = ' ')+
coord_cartesian(ylim = c(0, 850), clip="off")+ #changes coord system to include the CI arrows in our plot
theme( panel.background = element_rect(fill = NA),
panel.border= element_rect(colour = "black", fill=NA, size=0.5),
strip.background = element_rect(color = 'black', fill = 'grey95'),
legend.background = element_blank(),
legend.title = element_text(hjust=0),
legend.position ='none',
legend.key = element_blank(),
text = element_text(size=21),
plot.margin = unit(c(1,2,1.5,1.5), "cm")) #t, r, b, l bzw. top, right, bottom, left
ggpar(spore_plot)
#### Figure 2 ####
#note: plot is the host data using the bayesian_host.R script
plot_grid(inf_plot,spore_plot,plot,nrow = 1)
ggsave(plot = last_plot(), 'host_and_parasite_traits_se_inf.tiff', width = 24, height = 9)
|