https://github.com/bnavarrodominguez/SD-Population-genomics
Raw File
Tip revision: e012c1df579871600334847e254a1ecc6c053592 authored by Amanda Larracuente on 10 May 2022, 19:52:54 UTC
Updated README
Tip revision: e012c1d
generate_Fig1-S1.R
library(tidyverse)
library(cowplot)


#### load data from mosdepth

temp = list.files(path = "metasembly_rsp_locs/",pattern="*bed.gz",full.names = TRUE)

##do not load the recombinant SD-ZI
# temp
# temp = temp[c(1:6,8:16)]
# temp

# #### do not load the recombinant SD-ZI or the low coverage SD
temp
temp = temp[c(1:6,8,10:16)]
temp



myfiles = lapply(temp, function(x){read.delim(gzfile(x,'rt'),sep = '\t',header = FALSE)})

nms = gsub('metasembly_rsp_locs//','',temp)
nms = gsub('.rsp.loc.dp.regions.bed.gz', "",nms)
nms
names(myfiles) = nms

datos = do.call(rbind,myfiles)
head(datos)

colnames(datos) = c("contig","start",'stop','repeat','dp')
datos$line = sub("\\..*", "", rownames(datos))
head(datos)
datos$geno = ifelse(grepl('ERR',datos$line),'iso-1','sd-zi')

head(datos)

table(datos$contig)


#### load average depth for chromosome 2 for normalization
ave.line <- read.table("metasembly_100bp/ave.depth.chrom2.txt", quote="\"", comment.char="")
colnames(ave.line) = c("line","ave.line")

datos = left_join(datos,ave.line) %>% mutate("norm.dp" = dp/ave.line)


#### name Rsp loci
rsp_loc = ifelse(datos$contig == "2R_meta-contig_PBcR-BLASR-r6" & datos$stop <180000, 
                 "Rsp-major", 
                 ifelse(datos$contig=="X","Rsp-like",
                        ifelse(datos$contig=="3L","3L",
                               ifelse(datos$contig=='2R_meta-contig_PBcR-BLASR-r6' & datos$start >180000 & datos$stop < 24000000, "Rsp-minor", 
                                      ifelse(datos$contig=='2R_meta-contig_PBcR-BLASR-r6' & datos$start >24000000,
                                                          "60A", ifelse(datos$contig =="G2_rsp_contig_pilon", 
                                                         "Rsp-proximal","other") )))))
datos$rsp_loc = as.character(rsp_loc)



dat = datos %>% 
  group_by(geno,line,rsp_loc) %>% 
  summarise('tot.loc'=sum(norm.dp)) %>%
  filter(rsp_loc != "Rsp-like")

dat$rsp_loc = factor(dat$rsp_loc,levels = c("Rsp-proximal","Rsp-major","Rsp-minor","60A","3L"),ordered = TRUE)

cols = c("iso-1"="darkslategrey",'sd-zi'="darkorange")

ploc = ggplot(dat,aes(x=rsp_loc,y = tot.loc,color=geno))+
  geom_boxplot()+
  scale_color_manual(values = cols,labels=c("Iso-1","ZI[SD-Mal]"))+
  theme_minimal(base_size = 14)

ploc = ploc+
  ylab('Total Depth')+
  #xlab(label = "Rsp locus")+
  theme_bw(base_size = 14)+
  theme(legend.position = c(0.9, 0.9),
        #legend.position = "left",
        axis.text.x = element_text(face = "italic"),
        axis.title.x = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(face='italic'))

ploc

back to top