https://github.com/JulianvanGerwen/GxE_muscle_phos
Tip revision: 5906d3b83cdc0b5b1a8685380abc32d23e393e07 authored by Julian van Gerwen on 07 January 2024, 09:18:46 UTC
Added code used in revisions
Added code used in revisions
Tip revision: 5906d3b
05_characterising_Diet_insregdiffs.Rmd
###Background
Here I characterise Diet differences in insulin regulation as identified in 03_insulin_regulation.Rmd
###Initialise
```{r}
library(tidyverse)
library(purrr)
library(ggplot2)
library(qvalue)
library(factoextra)
home_directory <- "..\\"
source(paste(home_directory, "scripts/essential_visualisations_JvG.R", sep = ""))
source(paste(home_directory, "scripts/enrichment_analysis_JvG.R", sep = ""))
source("..\\scripts/01_objects__1.R")
source("..\\scripts/02_global_functions__1.R")
source("scripts/02_basic_visualisations.R")
source("scripts/03_basic_functions.R")
source("scripts/06_annotation.R")
source("scripts/09_insregdiff.R")
source("scripts/11_enrichment.R")
load("data/intermediate/phos_data/annotation/phos_data_annotation__1.RData")
load("data/intermediate/phos_data/phos_data_proc_insreg__4.RData")
load("data/intermediate/phos_data/data_wNAs_filt__2.RData")
```
###Set up data
```{r}
##Make insreg data
insreg_data <- subset(phos_data_proc_insreg, insreg_bool == TRUE)
insreg_data_wNAs_filt <- subset(data_wNAs_filt, insreg_bool == TRUE)
```
##Canonical insulin signalling proteins
Set up
```{r}
##Directory
canins_dir <- "output/images/analysis/insulin_regulation/insreg_changes/Diet_2wayaov/caninssig/"
```
This is a list of "canonical insulin signalling proteins" compiled from several ontologies, which features in 2022 Nat Biotech paper (doi.org/10.1038/s41587-021-01099-9 )
```{r}
###Set up
##Load in and process
can_ins_sig_proteins_EN <- readRDS("data/intermediate/biol_databases/can_ins_sig/can_ins_prots.rds") %>%
tolower %>%
strsplit("") %>% map(~paste(c(toupper(.[1]), .[-1]), collapse = "")) %>% unlist
##Combine with insreg_data
insreg_data_caninsEN <- insreg_data
insreg_data_caninsEN$caninssig_EN <- FALSE
insreg_data_caninsEN[which(insreg_data_caninsEN$gene %in% can_ins_sig_proteins_EN), "caninssig_EN"] <- TRUE
##Diet changes
Dietdiff_caninsEN_summary <- summary_StrainxDiet_change(insreg_data_caninsEN,
Var2 = "caninssig_EN")
#y axis is proportion with strain effect
ggplot(subset(Dietdiff_caninsEN_summary$Dietoverall,
insregdiff_Dietoverall_change_bool == TRUE),
aes(x = caninssig_EN, y = prop_of_caninssig_EN*100)) +
geom_col(fill = "black", width = 0.8) +
comfy_theme() +
labs(y = "% Diet effect",
x = "Canonical insulin signalling protein")
ggsave_pdfpng(paste(canins_dir, "bplot_Dietoveralldiff_caninsEN_prop_ystrain",
sep = ""),
width = 1.25, height = 1.5)
#Fisher's exact for overrepresentation
fishers_DE_pathway(background_genes = rownames(insreg_data_caninsEN),
DE_genes = subset(insreg_data_caninsEN,
caninssig_EN == TRUE) %>% rownames,
pathway_genes = subset(insreg_data_caninsEN,
insregdiff_Dietoverall_change_bool == TRUE) %>% rownames,
alternative = "two.sided")
```
###PCA on log2 ins/bas
```{r}
source("scripts/01_QC.R")
##Set up data
data <- subset(data_wNAs_filt, insreg_bool == TRUE)
rownames(data) <- strsplit(rownames(data), "_") %>%
map(~paste(.[1], " ", .[3], " P", .[4], sep = "")) %>% unlist
num_cols <- all_FC_cols$Ins
##Make PCA
pca <- prcomp(t(na.omit(data[, num_cols])), center = TRUE, scale. = FALSE)
pca_df <- as.data.frame(pca$x, stringsAsFactors = FALSE) %>%
mutate(Strain = strsplit(rownames(.), "_") %>% map(~.[1]) %>% unlist,
Diet = strsplit(rownames(.), "_") %>% map(~.[2]) %>% unlist,
StrainDiet = strsplit(rownames(.), "_") %>% map(~paste(.[1], .[2], sep = "_")) %>% unlist) %>%
mutate(Strain = factor(Strain, levels = all_levels$Strain),
Diet = factor(Diet, levels = all_levels$Diet),
StrainDiet = factor(StrainDiet, levels = all_levels$StrainDiet))
##Plot PCA
PCs <- c(1, 2)
PC_labels <- paste("PC", PCs, sep = "")
corrplot_general(pca_df, xaxis = "PC1", yaxis = "PC2",
include_basins = FALSE) +
labs(x = paste(c(PC_labels[1], " (",
100*summary(pca)$importance[2, PCs[1]],
"%)"),
collapse = ""),
y = paste(c(PC_labels[2], " (",
100*summary(pca)$importance[2, PCs[2]],
"%)"),
collapse = ""))
ggsave_pdfpng(file = "output/images/analysis/insulin_regulation/insreg_changes/global_vis/PCA_insreg_insbasFCs",
width = 2.5, height = 1.5)
```
###Overlapping and exclusive Strain/Diet effects
Do Strain and Diet effects co-occur?
I assess whether sites with a Strain effect are more likely to have a Diet effect than by chance
```{r}
###Set up data
insreg_ppeptides_list <- list(
"insreg" = rownames(insreg_data),
"Strain" = subset(insreg_data, insregdiff_StrainCHOW_change_bool == TRUE) %>% rownames,
"Dietoverall" = subset(insreg_data, insregdiff_Dietoverall_change_bool == TRUE) %>% rownames,
"StrainxDiet" = subset(insreg_data, insregdiff_StrainxDiet_change_bool == TRUE) %>% rownames,
"UniformDiet" = subset(insreg_data, insregdiff_Diet_change_bool == TRUE) %>% rownames
)
insreg_genes_list <- map(insreg_ppeptides_list, function(vec){
genes <- strsplit(vec, "_") %>% map(~.[1]) %>% unlist %>% unique
return(genes)
})
###Overrepresentation
StrainDietoverrep <- fishers_DE_pathway(background_genes = insreg_ppeptides_list$insreg,
DE_genes = insreg_ppeptides_list$Strain,
pathway_genes = insreg_ppeptides_list$Dietoverall,
alternative = "two.sided")
###Venn diagram
ggVennDiagram_nice(list = insreg_ppeptides_list[c("Strain", "Dietoverall")],
colours = brewer.pal(9, "Greys")[1:6],
fill_limits = c(0, NA))
ggsave_pdfpng("output/images/analysis/insulin_regulation/insreg_changes/global_vis/venn_StrainDiet_overlap",
width = 3, height = 3)
```