swh:1:snp:11178938c71179d6353f5f1bb8c5e9e2f87dae4d
Tip revision: e012c1df579871600334847e254a1ecc6c053592 authored by Amanda Larracuente on 10 May 2022, 19:52:54 UTC
Updated README
Updated README
Tip revision: e012c1d
generate_Fig4-S1A.R
library(tidyverse)
library(cowplot)
##############Goodness of fit (all non-coding snps)
emp <- read_delim("../Figure_4/simulations_data/sumst_ZI_in2rmal_overall_noncoding.txt",
" ", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
colnames(emp) = c("pi.m","s.m","tajD.m")
emp
### load sweep
sw = read_delim("simulations_models/sweep_S.2237_t.0.088400.txt", " ",escape_double = FALSE, col_names = c('pi','s','tajD'),
comment = "#", trim_ws = TRUE)
sw$model = "sweep"
const = read_delim("simulations_models/const_S.2237.txt", " ",escape_double = FALSE, col_names = c('pi','s','tajD'),
comment = "#", trim_ws = TRUE)
const$model = "const"
expon_sd = read_delim("simulations_models/exp.growth_S.2237_alpha.0.263700.txt", " ",escape_double = FALSE, col_names = c('pi','s','tajD'),
comment = "#", trim_ws = TRUE)
expon_sd$model = "exp_growth_sd"
#####
datos = bind_rows(sw,const,expon_sd)
datos = datos %>% pivot_longer(-model, "parameter","value") %>% filter(parameter != "s")
emp2 = data.frame("parameter" = c("pi","s","tajD"), "value" = as.numeric(emp)) %>% filter(parameter != "s")
mod_hist = ggplot(datos, aes(x = value))+
geom_histogram(fill="white", color="black")+
facet_grid(model~parameter, scales = "free_x")+
geom_vline(data = emp2,
aes(xintercept = value),color="red")
mod_bp = ggplot(datos, aes(x = model, y=value))+
geom_boxplot(fill="white", color="black")+
facet_grid(parameter~., scales = "free_y")+
geom_hline(data = emp2,
aes(yintercept = value),color="blue",linetype = "dashed")
mod_hist
mod_bp
###### Empirical Cumulative Distribution Function
my_ecdf = function(df,emp) {
par(mfrow=c(1,2))
p_pi = ecdf(df$pi)
x = p_pi(emp$pi.m)
plot(p_pi)
abline(v = emp$pi.m,col='red')
p_tajD = ecdf(df$tajD)
y = p_tajD(emp$tajD.m)
plot(p_tajD)
abline(v = emp$tajD.m,col='red')
res = c(x,y)
names(res) = c("p_pi","p_tajD")
return(res)
}
#####
prob_models = data.frame(
"constant" = my_ecdf(const,emp),
"expon_sd" = my_ecdf(expon_sd,emp),
"sweep" = my_ecdf(sw,emp)
)
prob_models
#### boxplots for paper
p = filter(datos, parameter == "pi")
probs = prob_models[1,]
probs = round(probs, 4)
pi_all = ggplot(p, aes(x = model, y=value))+
geom_boxplot(fill="white", color="black")+
facet_grid(scales = "free_y")+
geom_hline(data = filter(emp2, parameter == "pi"),
aes(yintercept = value),color="blue",linetype = "dashed")+
scale_x_discrete(labels=c(
paste("Constant\nfrequency\n(p =", probs[1],")",sep = ""),
paste("Exp.growth\n\U03B1 = 0.26\n(p =", probs[2],")",sep = ""),
paste("Selective\nsweep\n(p =", probs[3],")",sep = "")))+
labs(x="",y="\U03C0")+
theme_bw(base_size = 20)+
theme()
d = filter(datos, parameter == "tajD")
probs = prob_models[2,]
probs = round(probs, 4)
d_all = ggplot(d, aes(x = model, y=value))+
geom_boxplot(fill="white", color="black")+
facet_grid(scales = "free_y")+
geom_hline(data = filter(emp2, parameter == "tajD"),
aes(yintercept = value),color="blue",linetype = "dashed")+
scale_x_discrete(labels=c(
paste("Constant\nfrequency\n(p =", probs[1],")",sep = ""),
paste("Exp.growth\n\U03B1 = 0.26\n(p =", probs[2],")",sep = ""),
paste("Selective\nsweep\n(p =", probs[3],")",sep = "")))+
labs(x="",y="Tajima's D")+
theme_bw(base_size = 20)+
theme()
bp_all = plot_grid(pi_all, d_all)
bp_all