swh:1:snp:11178938c71179d6353f5f1bb8c5e9e2f87dae4d
Raw File
Tip revision: e012c1df579871600334847e254a1ecc6c053592 authored by Amanda Larracuente on 10 May 2022, 19:52:54 UTC
Updated README
Tip revision: e012c1d
generate_Fig4-S1A.R
library(tidyverse)
library(cowplot)


##############Goodness of fit (all non-coding snps)
emp <- read_delim("../Figure_4/simulations_data/sumst_ZI_in2rmal_overall_noncoding.txt", 
                  " ", escape_double = FALSE, col_names = FALSE, 
                  trim_ws = TRUE)
colnames(emp) = c("pi.m","s.m","tajD.m")
emp



### load sweep

sw = read_delim("simulations_models/sweep_S.2237_t.0.088400.txt", " ",escape_double = FALSE, col_names = c('pi','s','tajD'), 
                comment = "#", trim_ws = TRUE)
sw$model = "sweep"

const = read_delim("simulations_models/const_S.2237.txt", " ",escape_double = FALSE, col_names = c('pi','s','tajD'), 
                comment = "#", trim_ws = TRUE)
const$model = "const"

expon_sd = read_delim("simulations_models/exp.growth_S.2237_alpha.0.263700.txt", " ",escape_double = FALSE, col_names = c('pi','s','tajD'), 
                   comment = "#", trim_ws = TRUE)
expon_sd$model = "exp_growth_sd"


##### 
datos = bind_rows(sw,const,expon_sd)

datos = datos %>% pivot_longer(-model, "parameter","value") %>% filter(parameter != "s")
emp2 = data.frame("parameter" = c("pi","s","tajD"), "value" = as.numeric(emp))  %>% filter(parameter != "s")

mod_hist = ggplot(datos, aes(x = value))+
  geom_histogram(fill="white", color="black")+
  facet_grid(model~parameter, scales = "free_x")+
  geom_vline(data = emp2,
             aes(xintercept = value),color="red")

mod_bp = ggplot(datos, aes(x = model, y=value))+
  geom_boxplot(fill="white", color="black")+
  facet_grid(parameter~., scales = "free_y")+
  geom_hline(data = emp2,
             aes(yintercept = value),color="blue",linetype = "dashed")


mod_hist
mod_bp

###### Empirical Cumulative Distribution Function


my_ecdf = function(df,emp) {
  par(mfrow=c(1,2))
  p_pi = ecdf(df$pi)
  x = p_pi(emp$pi.m)
  
  plot(p_pi)
  abline(v = emp$pi.m,col='red')
  
  p_tajD = ecdf(df$tajD)
  y = p_tajD(emp$tajD.m)
  
  plot(p_tajD)
  abline(v = emp$tajD.m,col='red')
  
  res = c(x,y)
  names(res) = c("p_pi","p_tajD")
  return(res)
}

##### 

prob_models = data.frame(
           "constant" = my_ecdf(const,emp),
           "expon_sd" = my_ecdf(expon_sd,emp),
           "sweep" = my_ecdf(sw,emp)
           )

prob_models


#### boxplots for paper

p = filter(datos, parameter == "pi")

probs = prob_models[1,]
probs = round(probs, 4)

pi_all = ggplot(p, aes(x = model, y=value))+
  geom_boxplot(fill="white", color="black")+
  facet_grid(scales = "free_y")+
  geom_hline(data = filter(emp2, parameter == "pi"),
             aes(yintercept = value),color="blue",linetype = "dashed")+
  scale_x_discrete(labels=c(
    paste("Constant\nfrequency\n(p =", probs[1],")",sep = ""),
    paste("Exp.growth\n\U03B1 = 0.26\n(p =", probs[2],")",sep = ""),
    paste("Selective\nsweep\n(p =", probs[3],")",sep = "")))+
  labs(x="",y="\U03C0")+
  theme_bw(base_size = 20)+
  theme()


d = filter(datos, parameter == "tajD")
probs = prob_models[2,]
probs = round(probs, 4)

d_all = ggplot(d, aes(x = model, y=value))+
  geom_boxplot(fill="white", color="black")+
  facet_grid(scales = "free_y")+
  geom_hline(data = filter(emp2, parameter == "tajD"),
             aes(yintercept = value),color="blue",linetype = "dashed")+
  scale_x_discrete(labels=c(
    paste("Constant\nfrequency\n(p =", probs[1],")",sep = ""),
    paste("Exp.growth\n\U03B1 = 0.26\n(p =", probs[2],")",sep = ""),
    paste("Selective\nsweep\n(p =", probs[3],")",sep = "")))+
  labs(x="",y="Tajima's D")+
  theme_bw(base_size = 20)+
  theme()

bp_all = plot_grid(pi_all, d_all)
bp_all

back to top