Revision 469689991e7dbce2be4c4f618584304d91841c49 authored by Chase W. Nelson on 01 October 2020, 17:39:04 UTC, committed by Chase W. Nelson on 01 October 2020, 17:39:04 UTC
1 parent 0b9f8cb
Raw File
mass-spec_vs_riboseq.R
### SARS-CoV-2 ribosome profiling vs. proteomics comparison
library(tidyverse)
library(RColorBrewer)
library(scales)

### set working directory
setwd("/Users/cwnelson88/scripts_NGS/SARS-CoV-2-ORF3d/")

####################################################################################################
### Ribo-seq UPSTREAM PEAKS vs. mass spectrometry iBAQ%

### load & wrangle Ribo-seq depth MAX UPSTREAM PEAKS
riboseq_peak_coverage_maxima <- read_tsv("data/riboseq_upstream_peaks.tsv")
(riboseq_peak_coverage_maxima <- filter(riboseq_peak_coverage_maxima, peak == "Peak"))

### load & wrangle iBAQ%
(proteome_data2 <- read_tsv("data/mass-spec_data.tsv"))
proteome_data2[proteome_data2$`Protein Names` == "ORF9b and ORF9b-short_form", ]$`Protein Names` <- "ORF9b"
proteomic_gene_IDs2 <- c("ORF1ab polyprotein", "surface glycoprotein", "ORF3a protein", "candidate-from-permutation-analysis",
                        "envelope protein", "membrane glycoprotein", 
                        "ORF6 protein", "ORF7a protein", "ORF7b", "ORF8 protein", 
                        "nucleocapsid phosphoprotein", "ORF9b", "ORF9b-short_form")
proteomic_gene_names2 <- c("ORF1ab", "S", "ORF3a", "ORF1ab-sas",
                          "E", "M", 
                          "ORF6", "ORF7a", "ORF7b", "ORF8", 
                          "N", "ORF9b", "ORF9b-short")

proteome_data2$gene <- factor(proteome_data2$`Protein Names`,
                                  levels = proteomic_gene_IDs2,
                                  labels = proteomic_gene_names2)

# Finally, mean of the two datasets
(proteome_data2_means <- proteome_data2 %>%
  group_by(gene) %>%
  summarise(
    mean_iBAQ_pct = mean(`iBAQ%`)
  )) 

### COMBINE the two
(expression_data <- left_join(proteome_data2_means, riboseq_peak_coverage_maxima, by = "gene"))

# Some e.g., ORF1ab-sas aren't shared, so eliminate
(expression_data <- filter(expression_data, ! is.na(reads_per_million)))

### FACTOR GENE BY EXPRESSION (proteomics)
gene_order_proteomics <- arrange(expression_data, desc(mean_iBAQ_pct))$gene
expression_data$gene <- factor(expression_data$gene, levels = gene_order_proteomics)

### CORRELATE
cor.test(expression_data$mean_iBAQ_pct, expression_data$reads_per_million, method = 'spearman') # 0.8909091 p-value = 0.0004284

### PLOT exploration
(expression_data_PLOT <- ggplot(data = expression_data, mapping = aes(x = log10(mean_iBAQ_pct), y = reads_per_million, color = gene, shape = gene)) +
    geom_point(size = 3) +
    xlab("Mass spec expression (log10[iBAQ%])") +
    ylab("Ribo-seq peak (reads per million mapped)") +
    theme_bw() +
    theme(panel.grid = element_blank(),
          legend.title = element_text(size = 9),
          legend.key.size = unit(0.75, "line"),
          legend.text = element_text(size = 9, face = "italic"),
          strip.text = element_text(size = 9),
          axis.text.x = element_text(size = 9), 
          axis.title.x = element_text(size = 9),
          axis.text.y = element_text(size = 9),
          axis.title.y = element_text(size = 9),
          panel.border = element_rect(),
          strip.background = element_blank()) +
    scale_shape_manual(values = c(1:11)) +
    scale_color_manual(values = c(brewer.pal(11, 'RdYlBu')[0:5], 'yellow', brewer.pal(11, 'RdYlBu')[7:11]))) 



################################################################################
### Now instead compare to Ribo-seq MEAN DEPTH PER GENE, BY FRAME
mapped_reads_by_readlength_wCDS <- read_tsv("data/mapped_reads_by_readlength_wCDS.tsv")

# limit to reads of length 30, 5 hpi
mapped_reads_by_readlength_wCDS <- filter(mapped_reads_by_readlength_wCDS, read_length == 30, time == "5 hpi")

### vvv CHANGE THIS vvv ### <-- CHANGE
# in-frame only! #
mapped_reads_by_readlength_wCDS <- filter(mapped_reads_by_readlength_wCDS, frame == "Codon pos 1")
###

# add condition_combn
mapped_reads_by_readlength_wCDS$condition_combn <- paste0(mapped_reads_by_readlength_wCDS$treatment, " / ", mapped_reads_by_readlength_wCDS$time)

# SUMMARIZE read_count by time, treatment, sample, gene, region_type
(mapped_reads_by_readlength_wCDS_SUMMARY <- mapped_reads_by_readlength_wCDS %>%
  group_by(condition_combn, sample, gene, region_type) %>%
  summarize(
    mean_depth = mean(read_count)
  ))

# SUMMARIZE FURTHER by taking the means of the samples
(mapped_reads_by_readlength_wCDS_SUMMARY <- mapped_reads_by_readlength_wCDS_SUMMARY %>%
  group_by(condition_combn, gene, region_type) %>%
  summarize(
    mean_depth = mean(mean_depth)
  )) 

### COMBINE WITH EARLIER EXPRESSION DATA

# set aside proteomics for joining
expression_data_PROTEOMICS <- dplyr::select(expression_data, gene, mean_iBAQ_pct)

# extract upstream peaks
expression_data_PEAKS <- dplyr::select(expression_data, gene, reads_per_million)
unique(expression_data_PEAKS$gene)
names(expression_data_PEAKS) <- c("gene", "riboseq")
expression_data_PEAKS$condition_combn <- "Upstream peak"

# excise treatment means
riboseq_summary_allFrames_PROCESSED <- filter(mapped_reads_by_readlength_wCDS_SUMMARY, region_type == "gene")
riboseq_summary_allFrames_PROCESSED <- rbind(riboseq_summary_allFrames_PROCESSED, filter(mapped_reads_by_readlength_wCDS_SUMMARY, gene == "ORF9b")) # add ORF9b
riboseq_summary_allFrames_PROCESSED[riboseq_summary_allFrames_PROCESSED$gene == "ORF1ab_1", ]$gene <- "ORF1ab" # rename first half of ORF1ab
unique(riboseq_summary_allFrames_PROCESSED$gene)
riboseq_summary_allFrames_PROCESSED <- dplyr::select(riboseq_summary_allFrames_PROCESSED, gene, mean_depth, condition_combn)
names(riboseq_summary_allFrames_PROCESSED) <- c("gene", "riboseq", "condition_combn")

### COMBINE ###
expression_data_COMBINED <- rbind(as.data.frame(expression_data_PEAKS), as.data.frame(riboseq_summary_allFrames_PROCESSED))

# join
expression_data_COMBINED <- left_join(expression_data_COMBINED, expression_data_PROTEOMICS, by = "gene")

# remove NA
expression_data_COMBINED <- filter(expression_data_COMBINED, ! is.na(mean_iBAQ_pct))


### FACTOR GENE BY EXPRESSION (proteomics)
gene_order_proteomics <- arrange(expression_data, desc(mean_iBAQ_pct))$gene
expression_data_COMBINED$gene <- factor(expression_data_COMBINED$gene, levels = gene_order_proteomics)

### FACTOR CONDITION
expression_data_COMBINED$condition_combn <- factor(expression_data_COMBINED$condition_combn,
                                                   levels = c("Upstream peak", "CHX / 5 hpi", "Harr / 5 hpi", "LTM / 5 hpi", "mRNA / 5 hpi"))

##############################
### CORRELATE after limiting to Codon pos 1***
cor.test(filter(expression_data_COMBINED, condition_combn == "Upstream peak")$mean_iBAQ_pct, 
         filter(expression_data_COMBINED, condition_combn == "Upstream peak")$riboseq, 
         method = "spearman")
#Spearman's rank correlation rho
#data:  filter(expression_data_COMBINED, condition_combn == "Upstream peak")$mean_iBAQ_pct and filter(expression_data_COMBINED, condition_combn == "Upstream peak")$riboseq
#S = 24, p-value = 0.0004284
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#  rho 
#0.8909091 

cor.test(filter(expression_data_COMBINED, condition_combn == "CHX / 5 hpi")$mean_iBAQ_pct, 
         filter(expression_data_COMBINED, condition_combn == "CHX / 5 hpi")$riboseq, 
         method = "spearman")
#Spearman's rank correlation rho
#data:  filter(expression_data_COMBINED, condition_combn == "CHX / 5 hpi")$mean_iBAQ_pct and filter(expression_data_COMBINED, condition_combn == "CHX / 5 hpi")$riboseq
#S = 82, p-value = 0.04399
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#  rho 
#0.6272727 

cor.test(filter(expression_data_COMBINED, condition_combn == "Harr / 5 hpi")$mean_iBAQ_pct, 
         filter(expression_data_COMBINED, condition_combn == "Harr / 5 hpi")$riboseq, 
         method = "spearman")
#Spearman's rank correlation rho
#data:  filter(expression_data_COMBINED, condition_combn == "Harr / 5 hpi")$mean_iBAQ_pct and filter(expression_data_COMBINED, condition_combn == "Harr / 5 hpi")$riboseq
#S = 88, p-value = 0.05618
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#  rho 
#0.6 

cor.test(filter(expression_data_COMBINED, condition_combn == "LTM / 5 hpi")$mean_iBAQ_pct, 
         filter(expression_data_COMBINED, condition_combn == "LTM / 5 hpi")$riboseq, 
         method = "spearman")
#Spearman's rank correlation rho
#data:  filter(expression_data_COMBINED, condition_combn == "LTM / 5 hpi")$mean_iBAQ_pct and filter(expression_data_COMBINED, condition_combn == "LTM / 5 hpi")$riboseq
#S = 86, p-value = 0.05188
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#  rho 
#0.6090909 

cor.test(filter(expression_data_COMBINED, condition_combn == "mRNA / 5 hpi")$mean_iBAQ_pct, 
         filter(expression_data_COMBINED, condition_combn == "mRNA / 5 hpi")$riboseq, 
         method = "spearman")
#Spearman's rank correlation rho
#data:  filter(expression_data_COMBINED, condition_combn == "mRNA / 5 hpi")$mean_iBAQ_pct and filter(expression_data_COMBINED, condition_combn == "mRNA / 5 hpi")$riboseq
#S = 156, p-value = 0.3864
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#  rho 
#0.2909091 


### PLOT all conditions
(expression_data_COMBINED_PLOT <- ggplot(data = expression_data_COMBINED, mapping = aes(x = log10(mean_iBAQ_pct), y = riboseq, color = gene, shape = gene)) +
    geom_point(size = 2) + 
    geom_smooth(method = 'lm', se = FALSE, size = 0.4, color = "darkgrey") + 
    xlab("Mass spec expression (log10[iBAQ%])") +
    ylab("Ribo-seq coverage (reads per million mapped)") +
    facet_grid(condition_combn ~ ., scales = "free_y") + 
    theme_bw() +
    theme(panel.grid = element_blank(),
          legend.title = element_text(size = 9),
          legend.key.size = unit(0.75, "line"),
          legend.text = element_text(size = 9, face = "italic"),
          strip.text = element_text(size = 9),
          axis.text.x = element_text(size = 9), 
          axis.title.x = element_text(size = 9),
          axis.text.y = element_text(size = 9),
          axis.title.y = element_text(size = 9),
          panel.border = element_rect(),
          strip.background = element_blank()) +
    scale_shape_manual(values = c(1:11)) +
    scale_color_manual(values = c(brewer.pal(11, 'RdYlBu')[0:5], 'yellow', brewer.pal(11, 'RdYlBu')[7:11]))) 



### FIGURES AND SUPPLEMENT, ALL SITES, ALL CODON POSITIONS

### RELOAD load & wrangle Ribo-seq MEAN DEPTH PER GENE
mapped_reads_by_readlength_wCDS <- read_tsv("data/mapped_reads_by_readlength_wCDS.tsv")

# limit to reads of length 30, 5 hpi
mapped_reads_by_readlength_wCDS <- filter(mapped_reads_by_readlength_wCDS, read_length == 30, time == "5 hpi") # 61,974 x 34

# add condition_combn
mapped_reads_by_readlength_wCDS$condition_combn <- paste0(mapped_reads_by_readlength_wCDS$treatment, " / ", mapped_reads_by_readlength_wCDS$time)

### SUMMARY ALL FRAME
# SUMMARIZE read_count by time, treatment, sample, gene, region_type
(riboseq_summary_allFrame <- mapped_reads_by_readlength_wCDS %>%
    group_by(condition_combn, sample, gene, frame, region_type) %>%
    summarize(
      mean_depth = mean(read_count)
    )) 

# SUMMARIZE FURTHER by taking the means of the samples
(riboseq_summary_allFrame <- riboseq_summary_allFrame %>%
    group_by(condition_combn, gene, region_type) %>%
    summarize(
      mean_depth = mean(mean_depth)
    )) # 84 x 4

riboseq_summary_allFrame$frame <- "All frames"
(riboseq_summary_allFrame <- dplyr::select(riboseq_summary_allFrame, condition_combn, gene, frame, region_type, mean_depth))
# 84 x 5

### SUMMARY BY FRAME
# SUMMARIZE read_count by time, treatment, sample, gene, frame, region_type
(riboseq_summary_byFrame <- mapped_reads_by_readlength_wCDS %>%
    group_by(condition_combn, sample, gene, frame, region_type) %>%
    summarize(
      mean_depth = mean(read_count)
    )) 

# SUMMARIZE FURTHER by taking the means of the samples
(riboseq_summary_byFrame <- riboseq_summary_byFrame %>%
    group_by(condition_combn, gene, frame, region_type) %>%
    summarize(
      mean_depth = mean(mean_depth)
    )) # 236 x 5

# combine
riboseq_summary_allFrames <- rbind(riboseq_summary_allFrame, riboseq_summary_byFrame)


### COMBINE WITH EARLIER EXPRESSION DATA

# set aside proteomics for joining
expression_data_PROTEOMICS <- dplyr::select(expression_data, gene, mean_iBAQ_pct)

# excise treatment means
riboseq_summary_allFrames_PROCESSED <- filter(riboseq_summary_allFrames, region_type == "gene")
riboseq_summary_allFrames_PROCESSED <- rbind(riboseq_summary_allFrames_PROCESSED, filter(riboseq_summary_allFrames, gene == "ORF9b")) # add ORF9b
riboseq_summary_allFrames_PROCESSED[riboseq_summary_allFrames_PROCESSED$gene == "ORF1ab_1", ]$gene <- "ORF1ab" # rename first half of ORF1ab
unique(riboseq_summary_allFrames_PROCESSED$gene)

# join
riboseq_summary_allFrames_PROCESSED <- left_join(riboseq_summary_allFrames_PROCESSED, expression_data_PROTEOMICS, by = "gene")

# remove NA
riboseq_summary_allFrames_PROCESSED <- filter(riboseq_summary_allFrames_PROCESSED, ! is.na(mean_iBAQ_pct))

### FACTOR GENE BY EXPRESSION (proteomics)
gene_order_proteomics <- arrange(expression_data, desc(mean_iBAQ_pct))$gene
riboseq_summary_allFrames_PROCESSED$gene <- factor(riboseq_summary_allFrames_PROCESSED$gene, levels = gene_order_proteomics)

### FACTOR CONDITION
riboseq_summary_allFrames_PROCESSED$condition_combn <- factor(riboseq_summary_allFrames_PROCESSED$condition_combn,
                                                   levels = c("CHX / 5 hpi", "Harr / 5 hpi", "LTM / 5 hpi", "mRNA / 5 hpi"))

### FACTOR FRAME
riboseq_summary_allFrames_PROCESSED$frame <- factor(riboseq_summary_allFrames_PROCESSED$frame,
                                                              levels = c("All frames", "Codon pos 1", "Codon pos 2", "Codon pos 3"))

# Rename Gene
names(riboseq_summary_allFrames_PROCESSED)[names(riboseq_summary_allFrames_PROCESSED) == "gene"] <- "Gene"

# PLOT Figure 2—figure supplement 2
(riboseq_summary_allFrames_PROCESSED_PLOT <- ggplot(data = riboseq_summary_allFrames_PROCESSED, mapping = aes(x = log10(mean_iBAQ_pct), y = mean_depth, color = Gene, shape = Gene)) +
    geom_point(size = 2, stroke = 0.8) + 
    geom_smooth(method = 'lm', se = FALSE, size = 0.4, color = "darkgrey") + 
    xlab("log10(iBAQ%) (mass spectrometry)") +
    ylab("Reads per million mapped (ribosomal profiling))") +
    facet_grid(condition_combn ~ frame, scales = "free_y") + 
    theme_bw() +
    theme(panel.grid = element_blank(),
          legend.title = element_text(size = 9),
          legend.key.size = unit(0.75, "line"),
          legend.text = element_text(size = 9, face = "italic"),
          strip.text = element_text(size = 9),
          axis.text.x = element_text(size = 9), 
          axis.title.x = element_text(size = 9),
          axis.text.y = element_text(size = 9),
          axis.title.y = element_text(size = 9),
          panel.border = element_rect(),
          strip.background = element_blank()) +
    scale_x_continuous(expand = expand_scale(mult = c(0.075, 0.075))) +
    scale_y_continuous(expand = expand_scale(mult = c(0.075, 0.075))) +
    scale_shape_manual(values = c(1:10, 13)) +
    scale_color_manual(values = c(brewer.pal(11, 'RdYlBu')[0:5], brewer.pal(11, 'BrBG')[5], brewer.pal(11, 'RdYlBu')[7:11])))


### CORRELATIONS

## ALL FRAMES ###
cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "CHX / 5 hpi", frame == "All frames")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "CHX / 5 hpi", frame == "All frames")$mean_depth, 
         method = "spearman")
#p-value = 0.08745, r=0.5454545 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "Harr / 5 hpi", frame == "All frames")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "Harr / 5 hpi", frame == "All frames")$mean_depth, 
         method = "spearman")
#p-value = 0.2139, r=0.4090909

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "LTM / 5 hpi", frame == "All frames")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "LTM / 5 hpi", frame == "All frames")$mean_depth, 
         method = "spearman")
#p-value = 0.1069, r=0.5181818

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "mRNA / 5 hpi", frame == "All frames")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "mRNA / 5 hpi", frame == "All frames")$mean_depth, 
         method = "spearman")
#p-value = 0.4021, r=0.2818182 


## CODON POS 1 ##
cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "CHX / 5 hpi", frame == "Codon pos 1")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "CHX / 5 hpi", frame == "Codon pos 1")$mean_depth, 
         method = "spearman")
#p-value = 0.04399, r=0.6272727

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "Harr / 5 hpi", frame == "Codon pos 1")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "Harr / 5 hpi", frame == "Codon pos 1")$mean_depth, 
         method = "spearman")
#p-value = 0.05618, r=0.6 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "LTM / 5 hpi", frame == "Codon pos 1")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "LTM / 5 hpi", frame == "Codon pos 1")$mean_depth, 
         method = "spearman")
#p-value = 0.05188, r=0.6090909 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "mRNA / 5 hpi", frame == "Codon pos 1")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "mRNA / 5 hpi", frame == "Codon pos 1")$mean_depth, 
         method = "spearman")
#p-value = 0.3864, r=0.2909091 


## CODON POS 2 ##
cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "CHX / 5 hpi", frame == "Codon pos 2")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "CHX / 5 hpi", frame == "Codon pos 2")$mean_depth, 
         method = "spearman")
#p-value = 0.08155, r=0.5545455 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "Harr / 5 hpi", frame == "Codon pos 2")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "Harr / 5 hpi", frame == "Codon pos 2")$mean_depth, 
         method = "spearman")
#p-value = 0.2484, r=0.3818182 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "LTM / 5 hpi", frame == "Codon pos 2")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "LTM / 5 hpi", frame == "Codon pos 2")$mean_depth, 
         method = "spearman")
#p-value = 0.2365, r=0.3909091 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "mRNA / 5 hpi", frame == "Codon pos 2")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "mRNA / 5 hpi", frame == "Codon pos 2")$mean_depth, 
         method = "spearman")
#p-value = 0.4021, r=0.2818182 


## CODON POS 3 ##
cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "CHX / 5 hpi", frame == "Codon pos 3")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "CHX / 5 hpi", frame == "Codon pos 3")$mean_depth, 
         method = "spearman")
#p-value = 0.1456, r=0.4727273 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "Harr / 5 hpi", frame == "Codon pos 3")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "Harr / 5 hpi", frame == "Codon pos 3")$mean_depth, 
         method = "spearman")
#p-value = 0.1456, r=0.4727273 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "LTM / 5 hpi", frame == "Codon pos 3")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "LTM / 5 hpi", frame == "Codon pos 3")$mean_depth, 
         method = "spearman")
#p-value = 0.2994, r=0.3454545 

cor.test(filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "mRNA / 5 hpi", frame == "Codon pos 3")$mean_iBAQ_pct, 
         filter(riboseq_summary_allFrames_PROCESSED, condition_combn == "mRNA / 5 hpi", frame == "Codon pos 3")$mean_depth, 
         method = "spearman")
#p-value = 0.313, r=0.3363636 





#########################################################################################################
### FINAL FIGURE 2B: (1) upstream peak, (2) In frame, and (3) out-of-frame, average of Harr and LTM

### RELOAD & wrangle Ribo-seq MEAN DEPTH PER GENE
mapped_reads_by_readlength_wCDS <- read_tsv("data/mapped_reads_by_readlength_wCDS.tsv")

# limit to reads of length 30, 5 hpi
mapped_reads_by_readlength_wCDS <- filter(mapped_reads_by_readlength_wCDS, read_length == 30, time == "5 hpi")

# add condition_combn
mapped_reads_by_readlength_wCDS$condition_combn <- paste0(mapped_reads_by_readlength_wCDS$treatment, " / ", mapped_reads_by_readlength_wCDS$time)


### SUMMARY IN-FRAME
# SUMMARIZE read_count by time, treatment, sample, gene, region_type
(riboseq_summary_30nt_allCond_inFrame <- filter(mapped_reads_by_readlength_wCDS, frame == "Codon pos 1") %>%
    group_by(condition_combn, sample, gene, region_type) %>% 
    summarize(
      mean_depth = mean(read_count)
    )) 

# SUMMARIZE FURTHER by taking the means of the samples just calculated
(riboseq_summary_30nt_allCond_inFrame <- riboseq_summary_30nt_allCond_inFrame %>%
    group_by(condition_combn, gene, region_type) %>%
    summarize(
      mean_depth = mean(mean_depth)
    )) 

riboseq_summary_30nt_allCond_inFrame$frame <- "In-frame"
(riboseq_summary_30nt_allCond_inFrame <- dplyr::select(riboseq_summary_30nt_allCond_inFrame, condition_combn, gene, frame, region_type, mean_depth))


### SUMMARY OUT-OF-FRAME
# SUMMARIZE read_count by time, treatment, sample, gene, region_type
(riboseq_summary_30nt_allCond_outOfFrame <- filter(mapped_reads_by_readlength_wCDS, frame != "Codon pos 1") %>%
    group_by(condition_combn, sample, gene, region_type) %>%
    summarize(
      mean_depth = mean(read_count)
    )) 

# SUMMARIZE FURTHER by taking the means of the samples just calculated
(riboseq_summary_30nt_allCond_outOfFrame <- riboseq_summary_30nt_allCond_outOfFrame %>%
    group_by(condition_combn, gene, region_type) %>%
    summarize(
      mean_depth = mean(mean_depth)
    )) 

riboseq_summary_30nt_allCond_outOfFrame$frame <- "Out-of-frame"
(riboseq_summary_30nt_allCond_outOfFrame <- dplyr::select(riboseq_summary_30nt_allCond_outOfFrame, condition_combn, gene, frame, region_type, mean_depth))

# combine
riboseq_summary_30nt_allCond <- rbind(riboseq_summary_30nt_allCond_inFrame, riboseq_summary_30nt_allCond_outOfFrame)


### Prepare IN-FRAME MEAN of Harr of and LTM for the Ribo-seq
(riboseq_summary_30nt_HarrLTM <- filter(riboseq_summary_30nt_allCond, frame == "In-frame", condition_combn %in% c("Harr / 5 hpi", "LTM / 5 hpi")) %>%
    group_by(gene) %>%
    summarise(
      riboseq = mean(mean_depth)
    ))
riboseq_summary_30nt_HarrLTM$condition_combn <- "In-frame mean"

### Prepare OUT-OF-FRAME MEAN of Harr of and LTM for the Ribo-seq
(riboseq_summary_30nt_HarrLTM_outOfFrame <- filter(riboseq_summary_30nt_allCond, frame == "Out-of-frame", condition_combn %in% c("Harr / 5 hpi", "LTM / 5 hpi")) %>%
    group_by(gene) %>%
    summarise(
      riboseq = mean(mean_depth)
    ))
riboseq_summary_30nt_HarrLTM_outOfFrame$condition_combn <- "Out-of-frame mean"


# combine
riboseq_summary_30nt_HarrLTM <- rbind(riboseq_summary_30nt_HarrLTM, riboseq_summary_30nt_HarrLTM_outOfFrame)


# set aside proteomics for joining
expression_data_PROTEOMICS <- dplyr::select(expression_data, gene, mean_iBAQ_pct)

# prepare for join
riboseq_summary_30nt_HarrLTM[riboseq_summary_30nt_HarrLTM$gene == "ORF1ab_1", ]$gene <- "ORF1ab" # rename first half of ORF1ab

# join
riboseq_summary_30nt_HarrLTM <- left_join(riboseq_summary_30nt_HarrLTM, expression_data_PROTEOMICS, by = "gene")

# remove NA
riboseq_summary_30nt_HarrLTM <- filter(riboseq_summary_30nt_HarrLTM, ! is.na(mean_iBAQ_pct))

# Import UPSTREAM PEAK DATA, which is also based only on 29-31nt reads and Harr/LTM means
(riboseq_upstreamPeak_data <- read_tsv("data/expression_data_by_gene.tsv")) # LTM and Harr mean
riboseq_upstreamPeak_data <- dplyr::select(riboseq_upstreamPeak_data, -method)

# combine
riboseq_summary_30nt_HarrLTM <- rbind(riboseq_summary_30nt_HarrLTM, riboseq_upstreamPeak_data)

### FACTOR GENE BY EXPRESSION (proteomics)
gene_order_proteomics <- arrange(expression_data, desc(mean_iBAQ_pct))$gene
riboseq_summary_30nt_HarrLTM$gene <- factor(riboseq_summary_30nt_HarrLTM$gene, levels = gene_order_proteomics)

### FACTOR CONDITION
#unique(riboseq_summary_30nt_HarrLTM$condition_combn)
riboseq_summary_30nt_HarrLTM$condition_combn <- factor(riboseq_summary_30nt_HarrLTM$condition_combn,
                                                       levels = c("Upstream peak", "In-frame mean", "Out-of-frame mean"))

# Rename Gene
names(riboseq_summary_30nt_HarrLTM)[names(riboseq_summary_30nt_HarrLTM) == "gene"] <- "Gene"

### PLOT main Figure 2B
(riboseq_summary_30nt_HarrLTM_PLOT <- ggplot(data = riboseq_summary_30nt_HarrLTM, mapping = aes(x = log10(mean_iBAQ_pct), y = riboseq, color = Gene, shape = Gene)) +
    geom_point(size = 2, stroke = 1) + 
    geom_smooth(method = 'lm', se = FALSE, size = 0.4, color = "darkgrey") + 
    xlab("Mass spec expression (log10[iBAQ%])") +
    ylab("Ribo-seq coverage (reads per million mapped)") +
    facet_grid(condition_combn ~ ., scales = "free_y") + 
    theme_bw() +
    theme(panel.grid = element_blank(),
          legend.position = 'none',
          legend.title = element_text(size = rel(0.9)),
          legend.key.size = unit(0.75, "line"),
          legend.text = element_text(size = rel(0.9), face = "italic"),
          strip.text = element_text(size = rel(0.9)),
          axis.text = element_text(size = rel(0.9)), 
          axis.title.x = element_text(size = rel(0.9)),
          axis.title.y = element_text(size = rel(0.9)),
          panel.border = element_rect(),
          strip.background = element_blank()) +
    scale_shape_manual(values = c(1:10, 13)) +
    scale_color_manual(values = c(brewer.pal(11, 'RdYlBu')[0:5], brewer.pal(11, 'BrBG')[5], brewer.pal(11, 'RdYlBu')[7:11])))


### CORRELATIONS FOR THESE

cor.test(filter(riboseq_summary_30nt_HarrLTM, condition_combn == "Upstream peak")$mean_iBAQ_pct, 
         filter(riboseq_summary_30nt_HarrLTM, condition_combn == "Upstream peak")$riboseq, 
         method = "spearman")
#Spearman's rank correlation rho
#
#data:  filter(riboseq_summary_30nt_HarrLTM, condition_combn == "Upstream peak")$mean_iBAQ_pct and filter(riboseq_summary_30nt_HarrLTM, condition_combn == "Upstream peak")$riboseq
#S = 24, p-value = 0.0004284
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#  rho 
#0.8909091 


cor.test(filter(riboseq_summary_30nt_HarrLTM, condition_combn == "In-frame mean")$mean_iBAQ_pct, 
         filter(riboseq_summary_30nt_HarrLTM, condition_combn == "In-frame mean")$riboseq, 
         method = "spearman")
#Spearman's rank correlation rho
#
#data:  filter(riboseq_summary_30nt_HarrLTM, condition_combn == "In-frame mean")$mean_iBAQ_pct and filter(riboseq_summary_30nt_HarrLTM, condition_combn == "In-frame mean")$riboseq
#S = 88, p-value = 0.05618
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#rho 
#0.6 

cor.test(filter(riboseq_summary_30nt_HarrLTM, condition_combn == "Out-of-frame mean")$mean_iBAQ_pct, 
         filter(riboseq_summary_30nt_HarrLTM, condition_combn == "Out-of-frame mean")$riboseq, 
         method = "spearman")
#Spearman's rank correlation rho
#data:  filter(riboseq_summary_30nt_HarrLTM, condition_combn == "Out-of-frame mean")$mean_iBAQ_pct and filter(riboseq_summary_30nt_HarrLTM, condition_combn == "Out-of-frame mean")$riboseq
#S = 134, p-value = 0.2365
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#  rho 
#0.3909091  




back to top