--- Author: "Amin Haghani" Paper: "DNA Methylation Networks Underlying Mammalian Traits" Authors: "Amin Haghani1, 2 †*; Caesar Z. Li3, 4 †; Todd R. Robeck5; Joshua Zhang1; Ake T. Lu1, 2; Julia Ablaeva6; Victoria A. Acosta-Rodríguez7; Danielle M. Adams8; Abdulaziz N. Alagaili9, 10; Javier Almunia11; Ajoy Aloysius12; Nabil M.S. Amor13; Reza Ardehali14; Adriana Arneson15, 16; C. Scott Baker17; Gareth Banks18; Katherine Belov19; Nigel C. Bennett20; Peter Black21; Daniel T. Blumstein22, 23; Eleanor K. Bors17; Charles E. Breeze24; Robert T. Brooke25; Janine L. Brown26; Gerald Carter27; Alex Caulton28, 29; Julie M. Cavin30; Lisa Chakrabarti31; Ioulia Chatzistamou32; Andreas S. Chavez27, 33; Hao Chen34; Kaiyang Cheng35; Priscila Chiavellini36; Oi-Wa Choi37, 38; Shannon Clarke28; Joseph A. Cook39; Lisa N. Cooper40; Marie-Laurence Cossette41; Joanna Day42; Joseph DeYoung37, 38; Stacy Dirocco43; Christopher Dold44; Jonathan L. Dunnum39; Erin E. Ehmke45; Candice K. Emmons46; Stephan Emmrich6; Ebru Erbay47, 48, 49; Claire Erlacher-Reid43; Chris G. Faulkes50, 51; Zhe Fei3, 52; Steven H. Ferguson53, 54; Carrie J. Finno55; Jennifer E. Flower56; Jean-Michel Gaillard57; Eva Garde58; Livia Gerber59, 60; Vadim N. Gladyshev61; Rodolfo G. Goya36; Matthew J Grant62; Carla B. Green7; M. Bradley Hanson46; Daniel W. Hart20; Martin Haulena63; Kelsey Herrick64; Andrew N. Hogan65; Carolyn J. Hogg19; Timothy A. Hore66; Taosheng Huang67; Juan Carlos Izpisua Belmonte2; Anna J. Jasinska37, 68, 69; Gareth Jones70; Eve Jourdain71; Olga Kashpur72; Harold Katcher73; Etsuko Katsumata74; Vimala Kaza75; Hippokratis Kiaris76; Michael S. Kobor77; Pawel Kordowitzki78; William R. Koski79; Michael Krützen60; Soo Bin Kwon16, 15; Brenda Larison22, 80; Sang-Goo Lee61; Marianne Lehmann36; Jean-François Lemaître57; Andrew J. Levine81; Xinmin Li82; Cun Li83, 84; Andrea R. Lim1; David T.S. Lin85; Dana M. Lindemann43; Schuyler W. Liphardt86; Thomas J. Little87; Nicholas Macoretta6; Dewey Maddox88; Craig O. Matkin89; Julie A. Mattison90; Matthew McClure91; June Mergl92; Jennifer J. Meudt93; Gisele A. Montano5; Khyobeni Mozhui94; Jason Munshi-South95; William J. Murphy96, 97; Asieh Naderi76; Martina Nagy98; Pritika Narayan62; Peter W. Nathanielsz83, 84; Ngoc B. Nguyen14; Christof Niehrs99, 100; Batsaikhan Nyamsuren101; Justine K. O'Brien42; Perrie O'Tierney Ginn72; Duncan T Odom102, 103; Alexander G. Ophir104; Steve Osborn105; Elaine A. Ostrander65; Kim M. Parsons46; Kimberly C. Paul81; Amy B. Pedersen87; Matteo Pellegrini106; Katharina J. Peters60, 107; Jessica L. Petersen108; Darren W. Pietersen109; Gabriela M. Pinho22; Jocelyn Plassais65; Jesse R. Poganik61; Natalia A. Prado110, 26; Pradeep Reddy111, 2; Benjamin Rey57; Beate R. Ritz112, 113, 81; Jooke Robbins114; Magdalena Rodriguez115; Jennifer Russell105; Elena Rydkina6; Lindsay L. Sailer104; Adam B. Salmon116; Akshay Sanghavi73; Kyle M. Schachtschneider117, 118, 119; Dennis Schmitt120; Todd Schmitt64; Lars Schomacher99; Lawrence B. Schook117, 121; Karen E. Sears22; Ashley W. Seifert12; Aaron B.A. Shafer122; Anastasia V. Shindyapina61; Melanie Simmons45; Kavita Singh123; Ishani Sinha22; Jesse Slone67; Russel G. Snell62; Elham Soltanmohammadi76; Matthew L. Spangler108; Maria Spriggs21; Lydia Staggs43; Nancy Stedman21; Karen J. Steinman124; Donald T Stewart125; Victoria J. Sugrue66; Balazs Szladovits126; Joseph S. Takahashi7, 127; Masaki Takasugi6; Emma C. Teeling128; Michael J. Thompson106; Bill Van Bonn129; Sonja C. Vernes130, 131; Diego Villar132; Harry V. Vinters133; Ha Vu15, 16; Mary C. Wallingford72; Nan Wang37, 38; Gerald S. Wilkinson8; Robert W. Williams134; Qi Yan3, 2; Mingjia Yao3; Brent G. Young54; Bohan Zhang61; Zhihui Zhang6; Yang Zhao6; Peng Zhao14, 135; Wanding Zhou136, 137; Joseph A. Zoller3; Jason Ernst15, 16; Andrei Seluanov138; Vera Gorbunova138; X. William Yang37, 38; Ken Raj139; Steve Horvath1, 2 *" Date: "07-24-2023" output: html_document --- ```{r libraries, message=FALSE} library(easypackages) libraries("readxl", "psych", "tidyr", "dplyr","gplots", "RColorBrewer","limma", "ggplot2", "metap", "anRichment", "clusterProfiler", "RColorBrewer", "ggstatsplot", "gridExtra", "ggupset", "psych", "corrplot", "limma", "edgeR", "gridExtra", "ggupset", "ggrepel", "data.table", "stringr", "ComplexHeatmap", "extrafont", "Kmisc", "ggrastr", "WGCNA") source("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Human unique methylation project/summarizeFunctions.R") source("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Codes, and protocols/Manhattan plot function, Amin.R") # I created subsets of sample sheet for different analysis samples <- readRDS("samplesAll.RDS") trainSamples <- readRDS("samplesNoMars.Set1.RDS")%>% filter(Basename%in%samplesWithMonotremes$Basename) trainSamples <- readRDS("samplesNoMars.Set2.RDS") # table S1 of the paper sumSample <- read.csv("summary samples.csv") ``` ```{r} qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] %>% filter(colorblind==TRUE) col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) set.seed(2020) orCols <- c(sample(col_vector, length(levels(samples$Order)), replace = FALSE)) names(orCols) <- levels(samples$Order) orColsData <- data.frame(colors = orCols, Order = levels(samples$Order)) orColsData$Order <- factor(orColsData$Order, levels = levels(samples$Order)) orColsData$colors <- with(orColsData, reorder(colors, order(orColsData$Order))) marsupials <- c("10.Didelphimorphia", "18.Dasyuromorphia", "11.Diprotodontia", "23.Microbiotheria", "26.Paucituberculata") monotremes <- c("21.Monotremata") samplesNoMars <- samples %>% filter(!Order%in% c(marsupials, monotremes)) samplesWithMonotremes <- samples %>% dplyr::select(-SpeciesCommonName) %>% left_join(dplyr::select(.data=sumSample, SpeciesLatinName,SpeciesCommonName)) samples <- samples %>%filter(Order!=monotremes) %>% droplevels() %>% dplyr::select(-SpeciesCommonName) %>% left_join(dplyr::select(.data=sumSample, SpeciesLatinName,SpeciesCommonName)) samplesNoMars <- samplesNoMars %>% dplyr::select(-SpeciesCommonName) %>% left_join(dplyr::select(.data=sumSample, SpeciesLatinName,SpeciesCommonName)) ``` ```{r load data} mappability <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mappability file. Eutherians and Marsupials.RDS") geneMap <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS")%>%filter(CGid%in%mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes")]) geneMap$seqnames <- as.factor(geneMap$seqnames) chr <- levels(geneMap$seqnames)[c(1,12,16:22,2:11,13:15, 23:24)] geneMap$seqnames <- factor(geneMap$seqnames, levels = chr, labels = gsub("chr", "", chr)) load("~/Steve Horvath Lab Dropbox/Amin Haghani/MammalianMethCombined/StuffCaesar/NormalizedData/all_probes_all_samples_sesame.RData") bValsAll <- dat0sesame %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianAndMarsupialCor0.8=="yes")]) %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(samples$Basename) bValsNoMars <- dat0sesame %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes")]) %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(samplesNoMars$Basename) dataEpic <- dat0sesame %>% filter(CGid%in%mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes"& mappability$probesEPIC=="yes")]) %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(samplesNoMars$Basename) data450K <- dat0sesame %>% filter(CGid%in%mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes"& mappability$probes450=="yes")]) %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(samplesNoMars$Basename) bValsWithMonotremes <- dat0sesame %>% filter(CGid %in% mappability$CGid[which(mappability$mapToAllMammalsCor0.8=="yes")]) %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(samplesWithMonotremes$Basename) testData <- bValsNoMars[,which(!colnames(bValsNoMars)%in%trainSamples$Basename)] ``` ## Load results and match colors ```{r load results} MEs <- readRDS("WGCNA results/No marsupials/Module MEs merged net 1 May 2022.RDS") KMEs <- readRDS("WGCNA results/No marsupials/Merged modules KME net 1.RDS") geneTree <- readRDS("WGCNA results/No marsupials/geneTree net 1.RDS") mergedColors <- readRDS("WGCNA results/No marsupials/mergedColors net 1.RDS") samplesNoMars <- samplesNoMars %>% filter(Basename %in% rownames(MEs)) MEs <- MEs[samplesNoMars$Basename,] ``` ```{r KME test set} datExpr <- t(bValsNoMars) KMEtotal=signedKME(datExpr, MEs, outputColumnName="MM.") saveRDS(KMEtotal,"WGCNA results/No marsupials/Merged modules KME net 1, May2022.RDS") datExpr <- t(testData) testME=moduleEigengenes(datExpr,mergedColors, softPower = 12)$eigengenes KMEtest=signedKME(datExpr, testME, outputColumnName="MM.") saveRDS(KMEtotal,"WGCNA results/No marsupials/Merged modules KME net 1, testSet May2022.RDS") # KMEstrain <- KMEs %>% dplyr::select(names(KMEtest)) correlations <- rbindlist(lapply(1:ncol(KMEstrain), function(x){ a <- KMEstrain[,x, drop=F] b<- KMEtest[,x, drop=F] d <- cbind(a,b) c <- as.data.frame(corAndPvalue(a,b))[,1:3] %>% setnames(new = c("cor", "p", "z")) })) %>% mutate(modules = gsub("MM.", "",names(KMEtest))) gdat <- correlations %>% arrange(cor) %>% mutate(modules = factor(modules, levels = unique(modules)[order(cor)])) p1 <- gdat%>% ggplot(aes(y=cor, x=modules, fill=modules))+geom_bar(stat = "identity")+theme_bw(base_size = 15)+ylab("correlation")+theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")+scale_fill_manual(values = levels(gdat$modules))+ggtitle("Correlation of module connectivity, train vs test sets") gdat <- correlations %>% arrange(z) %>% mutate(modules = factor(modules, levels = unique(modules)[order(z)])) p2 <- gdat%>% ggplot(aes(y=z, x=modules, fill=modules))+geom_bar(stat = "identity")+theme_bw(base_size = 15)+ylab("zscore")+theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")+scale_fill_manual(values = levels(gdat$modules))+ggtitle("Zscore of connectivity correlations, train vs test sets") p <- ggpubr::ggarrange(p1, p2, ncol = 1) ggsave("preservation in test.jpeg", p, width = 12, height = 8, units = "in", dpi = 300) ``` ```{r proportion of variance} # datExpr <- t(bValsNoMars) allEigenGenes=moduleEigengenes(datExpr,mergedColors, softPower = 12) variance <- data.frame(modules = colnames(allEigenGenes$eigengenes), varianceExplainedAll = t(allEigenGenes$varExplained)) sum2 <- sum2 %>% left_join(variance) ## datExpr <- t(testData) allEigenGenes=moduleEigengenes(datExpr,mergedColors, softPower = 12) variance <- data.frame(modules = colnames(allEigenGenes$eigengenes), varianceExplainedTest = t(allEigenGenes$varExplained)) sum2 <- sum2 %>% left_join(variance) write.csv(sum2, "summaryVarianceMay2022.csv") ``` ```{r load results, consensus network} human <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS") %>% dplyr::select(CGid, CGstart, CGend, seqnames, strand, SYMBOL, annotation, distanceToTSS, flank_geneIds, flank_gene_distances, CpGisland) %>% setnames(new = c("CGid", paste("human.hg19.", names(.)[-1]))) mouse <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mouse.Mus_musculus.GRCm38.100.Amin.V8.RDS") %>% dplyr::select(CGid, CGstart, CGend, seqnames, strand, SYMBOL, annotation, distanceToTSS, flank_geneIds, flank_gene_distances, CpGisland) %>% setnames(new = c("CGid", paste("Mouse.mm10.", names(.)[-1]))) consNetworks <- readRDS("all networks with matched colors.RDS") %>% left_join(human) %>% left_join(mouse) #write.csv(consNetworks, "networks.csv") ``` ```{r load results, Netwrok 2} MEs2 <- readRDS("WGCNA results/All, mappability filter/net2MEs_matched May 2022.RDS") KMEs2 <- readRDS("WGCNA results/All, mappability filter/net2KMEs_matched.RDS") geneTree2 <- readRDS("WGCNA results/All, mappability filter/geneTree net 2.RDS") mergedColors2 <- readRDS("WGCNA results/All, mappability filter/net2mergedColors_matched.RDS") samples <- samples %>% filter(Basename %in% rownames(MEs2)) MEs2 <- MEs2[samples$Basename,] ``` ```{r results including Monotremes} netsWithMonotremes <- readRDS("all networks with matched colors.RDS") %>% left_join(mappability) %>% filter(mapToAllMammalsCor0.8=="yes") identical(netsWithMonotremes$CGid, rownames(bValsWithMonotremes)) MEs3 <- readRDS("WGCNA results/All, mappability filter/net1 with Monotremes May 2022.RDS") KMEs3 <- readRDS("WGCNA results/All, mappability filter/net1KMEs with Monotremes May 2022.RDS") samplesWithMonotremes <- samplesWithMonotremes %>% filter(Basename %in% rownames(MEs3)) MEs3 <- MEs3[samplesWithMonotremes$Basename,] ``` ```{r results including Marsupials} netsWithMarsupials <- readRDS("all networks with matched colors.RDS") %>% left_join(mappability) %>% filter(mapToEutherianAndMarsupialCor0.8=="yes") identical(netsWithMarsupials$CGid, rownames(bValsAll)) MEs4 <- readRDS("WGCNA results/All, mappability filter/net1 with Marsupials May 2022.RDS") KMEs4 <- readRDS("WGCNA results/All, mappability filter/net1KMEs with Marsupials May 2022.RDS") samples <- samples %>% filter(Basename %in% rownames(MEs4)) MEs4 <- MEs4[samples$Basename,] ``` ## All DNAm no Marsupials ```{r relate to traits} datTraits <- samplesNoMars %>% dplyr::select(Basename, Female,Age,log.Age,relativeAge, maximum_age, average_weight, Gestation.days, Age.SexualMaturity, Hibernation, Birth.weight)%>% mutate(Female = factor(Female, levels = c(0,1))) %>% droplevels() %>% mutate(log_max_age = log(maximum_age), log_ave_weight = log(average_weight)) %>% dplyr::select(Basename, Female, Age,log.Age,relativeAge,log_max_age,log_ave_weight, Gestation.days, Age.SexualMaturity,Birth.weight) datTraits <- samplesNoMars %>% filter(!is.na(relativeAge)&!is.na(Female))%>% mutate(log_max_age = log(maximum_age), log_ave_weight = log(average_weight)) %>% mutate(adjustedRelativeAge = residuals(lm(relativeAge~Tissue+SpeciesLatinName+Female))) %>% mutate(adjustedMaxAge = residuals(lm(log_max_age~relativeAge+Tissue+Female))) %>% dplyr::select(Basename, adjustedRelativeAge, adjustedMaxAge) %>% right_join(datTraits) %>% tibble::column_to_rownames(var = "Basename") %>% relocate(adjustedRelativeAge, .after=relativeAge) %>% relocate(adjustedMaxAge, .after=log_max_age) datTraits <- datTraits[rownames(MEs),] moduleTraitCor=cor(MEs,datTraits,use="p") moduleTraitPvalue=corPvalueStudent(moduleTraitCor,nrow(datTraits)) summaryResults <- as.data.frame(moduleTraitPvalue) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2)," (", formatC(moduleTraitPvalue, format = "e", digits = 0),")", sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(10, -pval)%>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) summaryResults4 <- summaryResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), NA, text)) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryResults4 <- summaryResults4[rownames(moduleTraitCor), colnames(moduleTraitCor)] identical(rownames(summaryResults4), rownames(moduleTraitCor)) textMatrix = paste(signif(moduleTraitCor, 2), "\n(", formatC(moduleTraitPvalue, format = "e", digits = 0), ")", sep = ""); dim(textMatrix) = dim(moduleTraitCor) sexModule <- summaryResults$modules[which(summaryResults$var=="Female"&summaryResults$pval==0)] association <- data.frame(correlation = moduleTraitCor, pval = moduleTraitPvalue) association$moduleColors <- substring(rownames(association), 3) association <- KMEs %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) %>% right_join(association) my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231] jpeg(file = "Association Heatmap no Marsupials.jpeg", width = 14, height = 12, family = "Helvetica", units = "in", res = 300) labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = gsub("ME", "",names(MEs)), ySymbols = names(MEs), colorLabels = FALSE, colors = my_palette, textMatrix = summaryResults4, setStdMargins = TRUE, cex.text = 0.8, cex.lab = 1, zlim = c(-1,1), main = paste("Network 1, no Marsupials")) dev.off() ``` ## Analysis with Marsupials ```{r relate to traits, Network2} datTraits <- samples %>% dplyr::select(Basename, Tissue, Female, Order,Age,log.Age, relativeAge, maximum_age, average_weight, Gestation.days, Age.SexualMaturity, Hibernation, Birth.weight) %>% mutate(Female = factor(Female, levels = c(0,1))) %>% droplevels() %>% mutate(Tissue = factor(Tissue, levels = levels(Tissue), labels = 1:length(levels(Tissue)))) %>% mutate(Order = factor(Order, levels = levels(Order), labels = 1:length(levels(Order)))) %>% mutate(log_max_age = log(maximum_age), log_ave_weight = log(average_weight)) %>% mutate(Hibernation = ifelse(Hibernation=="yes", 1, ifelse(Hibernation=="no", 0, NA))) %>% dplyr::select(Basename, Tissue, Female, Order,Age, log.Age, relativeAge,log_max_age,log_ave_weight, Gestation.days, Age.SexualMaturity, Hibernation, Birth.weight) %>% tibble::column_to_rownames(var = "Basename") datTraits <- samples %>% dplyr::select(Basename, Female,Age,log.Age,relativeAge, maximum_age, average_weight, Gestation.days, Age.SexualMaturity, Hibernation, Birth.weight)%>% mutate(Female = factor(Female, levels = c(0,1))) %>% droplevels() %>% mutate(log_max_age = log(maximum_age), log_ave_weight = log(average_weight)) %>% dplyr::select(Basename, Female, Age,log.Age,relativeAge,log_max_age,log_ave_weight, Gestation.days, Age.SexualMaturity,Birth.weight) datTraits <- samples %>% filter(!is.na(relativeAge)&!is.na(Female))%>% mutate(log_max_age = log(maximum_age), log_ave_weight = log(average_weight)) %>% mutate(adjustedRelativeAge = residuals(lm(relativeAge~Tissue+SpeciesLatinName+Female))) %>% mutate(adjustedMaxAge = residuals(lm(log_max_age~relativeAge+Tissue+Female))) %>% dplyr::select(Basename, adjustedRelativeAge, adjustedMaxAge) %>% right_join(datTraits) %>% tibble::column_to_rownames(var = "Basename") %>% relocate(adjustedRelativeAge, .after=relativeAge) %>% relocate(adjustedMaxAge, .after=log_max_age) datTraits <- datTraits[rownames(MEs2),] moduleTraitCor=cor(MEs2,datTraits,use="p") moduleTraitPvalue=corPvalueStudent(moduleTraitCor,nrow(datTraits)) summaryResults <- as.data.frame(moduleTraitPvalue) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2)," (", formatC(moduleTraitPvalue, format = "e", digits = 0),")", sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(10, -pval)%>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) summaryResults4 <- summaryResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), NA, text)) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryResults4 <- summaryResults4[rownames(moduleTraitCor), colnames(moduleTraitCor)] identical(rownames(summaryResults4), rownames(moduleTraitCor)) textMatrix = paste(signif(moduleTraitCor, 2), "\n(", formatC(moduleTraitPvalue, format = "e", digits = 0), ")", sep = ""); dim(textMatrix) = dim(moduleTraitCor) association2 <- data.frame(correlation = moduleTraitCor, pval = moduleTraitPvalue) association2$moduleColors <- substring(rownames(association2), 3) association2 <- KMEs2 %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors=modules,Freq = n) %>% right_join(association) my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231] pdf(file = "Association Heatmap with Marsupials, Network2.pdf", width = 14, height = 8) labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = gsub("ME", "",names(MEs2)), ySymbols = names(MEs2), colorLabels = FALSE, colors = my_palette, textMatrix = summaryResults4, setStdMargins = TRUE, cex.text = 0.8, cex.lab = 1, zlim = c(-1,1), main = paste("Network 2, with Marsupials")) dev.off() ``` ## Phylogenetic Order analysis based on both Networks ```{r preparing samples} orders1 <- levels(samplesNoMars$Order) Order1 <- as.data.frame(sapply(orders1, function(x){ a <- samplesNoMars %>% mutate(x = ifelse(Order==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Order1) <- orders1 Order1 <- cbind(Basename = samplesNoMars$Basename, Order1) ## Marsupials orders2 <- levels(samples$Order) Order2 <- as.data.frame(sapply(orders2, function(x){ a <- samples %>% mutate(x = ifelse(Order==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Order2) <- orders2 Order2 <- cbind(Basename = samples$Basename, Order2) ## Monotremes orders3 <- levels(samplesWithMonotremes$Order) Order3 <- as.data.frame(sapply(orders3, function(x){ a <- samplesWithMonotremes %>% mutate(x = ifelse(Order==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Order3) <- orders3 Order3 <- cbind(Basename = samplesWithMonotremes$Basename, Order3) ``` ```{r relate to Orders} # no Marsupials datTraits1 <- Order1 %>% filter(Basename %in% rownames(MEs)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(MEs,datTraits1,use="p") moduleTraitPvalue1=WGCNA::corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% mutate(dir = as.factor(sign(r))) %>% group_by(var, dir) %>% top_n(1, abs(r))%>% mutate(ID = paste(modules, var, sep = "_"))%>% filter(pval<0.05&abs(r)>0.3) %>% dplyr::select(-dir) summaryResults4 <- summaryResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", text)) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryResults4 <- summaryResults4[rownames(moduleTraitCor1), colnames(moduleTraitCor1)] identical(rownames(summaryResults4), rownames(moduleTraitCor1)) orderModules1 <- rownames(summaryResults4)[rowSums(summaryResults4!="")>0] ``` ```{r relate to Marsupials} # Marsupials datTraits2 <- Order2 %>% filter(Basename %in% rownames(MEs4)) %>% tibble::column_to_rownames("Basename") moduleTraitCor2=cor(MEs4,datTraits2,use="p") moduleTraitPvalue2=WGCNA::corPvalueStudent(moduleTraitCor2,nrow(datTraits2)) summaryMarResults <- as.data.frame(moduleTraitPvalue2) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryMarResults2 <- as.data.frame(moduleTraitCor2) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryMarResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue2, format = "e", digits = 0), sep = "")) summaryMarResults3 <- as.data.frame(moduleTraitCor2) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryMarResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(5, -pval)%>%top_n(2, abs(r)) %>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) %>% filter(pval<0.05&abs(r)>0.21) summaryMarResults4 <- summaryMarResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryMarResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", text)) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryMarResults4 <- summaryMarResults4[rownames(moduleTraitCor2), colnames(moduleTraitCor2)] identical(rownames(summaryMarResults4), rownames(moduleTraitCor2)) marsupialModules <- unique(summaryMarResults3$modules[which(summaryMarResults3$var %in% marsupials)]) summaryMarResults4 <- summaryMarResults4[marsupialModules, order(match(colnames(summaryMarResults4), levels(samples$Order)))] moduleTraitCor2 <- moduleTraitCor2[marsupialModules,order(match(colnames(moduleTraitCor2), levels(samples$Order))), drop=F] ``` ```{r Monotremes} # Monotremes datTraits3 <- Order3 %>% filter(Basename %in% rownames(MEs3)) %>% tibble::column_to_rownames("Basename") moduleTraitCor3=cor(MEs3,datTraits3,use="p") moduleTraitPvalue3=WGCNA::corPvalueStudent(moduleTraitCor3,nrow(datTraits3)) summaryMonoResults <- as.data.frame(moduleTraitPvalue3) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryMonoResults2 <- as.data.frame(moduleTraitCor3) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryMonoResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue3, format = "e", digits = 0), sep = "")) summaryMonoResults3 <- as.data.frame(moduleTraitCor3) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryMonoResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(5, -pval)%>%top_n(2, abs(r)) %>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) %>% filter(pval<0.05&abs(r)>0.2) summaryMonoResults4 <- summaryMonoResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryMonoResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", text)) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryMonoResults4 <- summaryMonoResults4[rownames(moduleTraitCor3), colnames(moduleTraitCor3), drop=F] identical(rownames(summaryMonoResults4), rownames(moduleTraitCor3)) monotremeModules <- unique(summaryMonoResults3$modules[which(summaryMonoResults3$var %in% monotremes)]) summaryMonoResults4 <- summaryMonoResults4[monotremeModules, order(match(colnames(summaryMonoResults4), levels(samplesWithMonotremes$Order))), drop=F] moduleTraitCor3 <- moduleTraitCor3[monotremeModules,order(match(colnames(moduleTraitCor3), levels(samplesWithMonotremes$Order))), drop=F] ## Merge with all samples summRes <- summaryResults4[,order(match(colnames(summaryResults4), levels(samples$Order)))] summRes <- summRes[which(!rownames(summRes)%in%c(marsupialModules, monotremeModules)),] summRes <- bind_rows(summRes, summaryMarResults4) summRes <- bind_rows(summRes, summaryMonoResults4) summRes[is.na(summRes)] <- "" corSum <- as.data.frame(moduleTraitCor1[,order(match(colnames(moduleTraitCor1), levels(samples$Order)))]) corSum <- corSum[which(!rownames(corSum)%in%c(marsupialModules, monotremeModules)),] corSum <- bind_rows(corSum, as.data.frame(moduleTraitCor2)) corSum <- bind_rows(corSum, as.data.frame(moduleTraitCor3)) ``` ```{r summ} sumTable <- summaryResults3 %>% dplyr::select(modules,r, var, pval)%>% #mutate(r = ifelse(r>0, "+", ifelse(r<0, "-", NA))) %>% group_by(modules) %>% summarize(order = paste(var, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) sumTable <-summaryMarResults3%>% dplyr::select(modules,r, var,pval) %>% filter(var%in% marsupials)%>% #mutate(r = ifelse(r>0, "+", ifelse(r<0, "-", NA))) %>% group_by(modules) %>% summarize(order = paste(var, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) %>% bind_rows(sumTable) sumTable <-summaryMonoResults3%>% dplyr::select(modules,r, var,pval) %>% filter(var%in% monotremes)%>% #mutate(r = ifelse(r>0, "+", ifelse(r<0, "-", NA))) %>% group_by(modules) %>% summarize(order = paste(var, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) %>% bind_rows(sumTable) orderModules <- summaryResults3 %>% ungroup()%>% dplyr::select(modules,var) orderModules <-summaryMarResults3%>% dplyr::select(modules,var) %>% filter(var%in% marsupials) %>% distinct()%>% bind_rows(orderModules) orderModules <-summaryMonoResults3%>% dplyr::select(modules,var) %>% filter(var%in% monotremes) %>% distinct()%>% bind_rows(orderModules) ``` ```{r heatmap for order} corSum <- t(corSum) summRes <- t(summRes) summRes[summRes!=""] <- "*" i <- which(rowSums(summRes=="*")>0) corSum <- corSum[i,] summRes <-summRes[i,] colnames(corSum) <- gsub("ME", "", colnames(corSum)) my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231] library(ComplexHeatmap) cols <- gsub("(ME)", "", colnames(corSum)) names(cols) <- colnames(corSum) association <- KMEs %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) %>% mutate(modules = paste("ME", moduleColors, sep = "")) association <- association[order(match(association$modules, colnames(corSum))),] # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(corSum), col = list(Modules = cols), CpG_Frequency = anno_barplot(association$Freq), show_legend = FALSE) Heatmap <- Heatmap(corSum, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows =F, row_dend_reorder=TRUE, row_title="Phylogenetic orders", cell_fun = function(j, i, x, y, w, h, col) { grid.text(summRes[i, j],vjust = 0.8, x, y, gp=gpar(fontsize=20))}, bottom_annotation = ha, clustering_method_rows ="complete", row_dend_side = "right", column_names_gp = gpar(fontsize = 10)) pdf(file = "Heatmap modules Order combined.pdf", width = 12, height = 4) draw(Heatmap, heatmap_legend_side = "left", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ```{r upset plot of overlap} top100 <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(100, abs(KME))) }) top100 <- bind_rows(top100)%>% filter(modules %in% c(gsub("ME", "", marsupialModules), gsub("ME", "", orderModules1))) geneMap <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V4.RDS") sumResults <- summaryMarResults3 %>% filter(modules %in% marsupialModules) %>% bind_rows(summaryResults3) input <- top100 %>% left_join(geneMap) %>% dplyr::select(CGid, SYMBOL,main_Categories, modules, KME) %>% mutate(modules = paste("ME", modules, sep = "")) %>% left_join(sumResults[,c("modules", "var")]) %>% dplyr::rename(Order = var) upsetCpG <- input %>% group_by(CGid) %>% summarise(Order = list(as.character(Order))) %>% left_join(input[,c(1:5)], by="CGid") %>% mutate(modules = factor(modules)) upsetCpG$overlap <- apply(upsetCpG,1, function(x){length(unlist(x[2]))}) upsetCpG$ov <- apply(upsetCpG, 1, function(x){ paste(unlist(x[2]), collapse = " ; ") }) p1 <- upsetCpG %>% distinct(CGid, Order, .keep_all=TRUE) %>% ggplot(aes(x=Order, fill = modules)) + geom_bar() + scale_x_upset(n_intersections = 50) + theme_bw()+ ggtitle("Upset plot modules (upto 100 hub CpGs) related to order")+ ylab("CpG count")+ theme_classic(base_size = 12)+ theme_combmatrix(combmatrix.label.text = element_text(color = "black", size=15))+ scale_fill_manual(values = gsub("ME", "", levels(upsetCpG$modules)))+ theme(plot.title = element_text(hjust = 0.5, size=20), plot.margin = margin(0, 0, 0, 1, "cm"), legend.position = "none") pdf(file = "Upset plot modules related to Order, CpGs.pdf", width = 12, height = 5) p1 dev.off() ## upset plot based on gene input <- top100 %>% left_join(geneMap) %>% dplyr::select(SYMBOL,modules) %>% distinct(.) %>% mutate(modules = paste("ME", modules, sep = "")) %>% left_join(sumResults[,c("modules", "var")]) %>% dplyr::rename(Order = var) upsetCpG <- input %>% group_by(SYMBOL) %>% summarise(Order = list(as.character(Order))) %>% left_join(input[,c(1:2)], by="SYMBOL") %>% mutate(modules = factor(modules)) upsetCpG$overlap <- apply(upsetCpG,1, function(x){length(unlist(x[2]))}) upsetCpG$ov <- apply(upsetCpG, 1, function(x){ paste(unlist(x[2]), collapse = " ; ") }) p2 <- upsetCpG %>% distinct(SYMBOL, Order, .keep_all=TRUE) %>% ggplot(aes(x=Order, fill = modules)) + geom_bar() + scale_x_upset(n_intersections = 50) + theme_bw()+ ggtitle("Upset plot modules (upto 100 hub CpGs) related to order")+ ylab("Unique gene count")+ theme_classic(base_size = 12)+ theme_combmatrix(combmatrix.label.text = element_text(color = "black", size=15))+ scale_fill_manual(values = gsub("ME", "", levels(upsetCpG$modules)))+ theme(plot.title = element_text(hjust = 0.5, size=12), plot.margin = margin(0, 0, 0, 1, "cm"), legend.position = "right") pdf(file = "Upset plot modules related to Order, Genes.pdf", width = 14, height = 9) p2 dev.off() ``` ```{r GREAT analysis} # library("rGREAT") # human = readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V4.RDS") # # # ## Select the CpGs in the mappability file # background1 <- human %>% dplyr::select(seqnames, CGstart, CGend, CGid) %>% filter(!is.na(CGstart)) %>% dplyr::rename(CHR = seqnames) %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes")]) # # background2 <- human %>% dplyr::select(seqnames, CGstart, CGend, CGid) %>% filter(!is.na(CGstart)) %>% dplyr::rename(CHR = seqnames) %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianAndMarsupialCor0.8=="yes")]) # # # top500 <- lapply(1:(ncol(KMEs)-1), function(x){ # n <- colnames(KMEs)[x] # nModule <- gsub("MM.", "", n) # assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(500, abs(KME))) # }) # # top500 <- bind_rows(top500)%>% filter(modules %in% gsub("ME", "", orderModules1)) # input1 <- top500 %>% dplyr::select(-KME) %>% left_join(background1) %>% filter(!is.na(CGstart)) %>% mutate(modules = paste("ME", modules, sep = "")) %>% left_join(summar) %>% dplyr::rename(Order = var) %>% group_split(modules) # names(input1) <- sapply(input1, function(x){x$modules[1]}) # # top_2_500 <- lapply(1:(ncol(KMEs2)-1), function(x){ # n <- colnames(KMEs2)[x] # nModule <- gsub("MM.", "", n) # assign(nModule, KMEs2 %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(500, abs(KME))) # }) # # top_2_500 <- bind_rows(top_2_500) %>% filter(modules %in% gsub("ME", "", marsupialModules)) # input2 <- top_2_500 %>% dplyr::select(-KME) %>% left_join(background2) %>% filter(!is.na(CGstart)) %>% mutate(modules = paste("ME", modules, sep = "")) %>% left_join(summar) %>% dplyr::rename(Order = var) %>% group_split(modules) # names(input2) <- sapply(input2, function(x){x$modules[1]}) # # # summar <- sumResults %>% group_by(modules) %>% summarise(var = paste(var, collapse = " \n ")) # # # input1 <- plyr::llply(input1, function(x){ # x <- x %>% dplyr::select(-modules) %>% group_split(Order) # names(x) <- sapply(x, function(y){y$Order[1]}) # x <- lapply(x, function(y){y = y %>% dplyr::select(-Order) %>% setnames(new = c("CGid", "chr", "start", "end")) %>% # relocate(CGid, .after=end) # }) # return(x) # }) # # input2 <- plyr::llply(input2, function(x){ # x <- x %>% dplyr::select(-modules) %>% group_split(Order) # names(x) <- sapply(x, function(y){y$Order[1]}) # x <- lapply(x, function(y){y = y %>% dplyr::select(-Order) %>% setnames(new = c("CGid", "chr", "start", "end")) %>% # relocate(CGid, .after=end) # }) # return(x) # }) # # results.combined <- rbindlist(lapply(1:2, function(j){ # input <- get(paste("input", j, sep = "")) # background <- get(paste("background", j, sep = "")) # results <- plyr::llply(input, function(y){ # result <- plyr::llply(y, function(x){ # job <- submitGreatJob(x, bg = background, # species = "hg19", # includeCuratedRegDoms = TRUE, # rule = c("basalPlusExt"), # adv_upstream = 5.0, # adv_downstream = 1.0, # adv_span = 50, # request_interval = 0, # version="3.0.0", # max_tries = 10) # ontology.all=availableOntologies(job) # output.all <- plyr::llply(ontology.all, function(j){ # cat(paste(j,"\n", sep = " ")) # out0.list = tryCatch(getEnrichmentTables(job, download_by="tsv",ontology=j),error=function(e){NULL}) # if(!is.null(out0.list)){ # out0.list <- out0.list[[1]] %>% dplyr::select(-FgRegionNames, -BgRegionNames, -BgGeneNames) %>% filter(HyperP<1e-3) # } else { out0.list=NULL} # }) # # output.all <- plyr::compact(output.all) %>% rbindlist(., use.names = FALSE, fill = FALSE) # # }) # # result2 <- result %>% rbindlist(idcol = "group") # }) # results.combined <- results %>% rbindlist(idcol = "class") # })) # # write.csv(results.combined, "enrichment results, Modules Order.csv") # # results.combined <- read.csv("enrichment results, Modules Order.csv") results.combined <- read.csv("enrichment results, Network1 all modules.csv") orderSummary <- summaryMarResults3 %>% filter(var%in%marsupials) %>% group_by(modules) %>% summarise(var=paste(var, collapse = "\n"))%>%bind_rows(summaryResults3) %>% dplyr::select(modules, var) results.combined <- results.combined%>%filter(class%in%orderSummary$modules)%>% left_join(orderSummary, by=c("class"="modules")) results.combined2 <- results.combined %>% filter(Ontology%in%c("GO Biological Process", "GO Cellular Component", "GO Molecular Function", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB miRNA Motifs", "MSigDB Perturbation", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>% filter(HyperFdrQ<1e-5&datasets=="Mouse Phenotype") %>% group_by(class,var, datasets) %>% top_n(10, -log10(HyperP)) combinedEnichemt <- results.combined2 %>% dplyr::rename(pValue = HyperP, nCommonGenes = NumFgGenesHit) %>% mutate(name = ifelse(datasets == "Upstream regulators", ID, Desc)) %>% mutate(blank = "") %>% mutate(class= as.factor(class)) #write.csv(combinedEnichemt, "enrichment generic top hits,Liver, NMR Mouse Human.csv") combinedEnichemt$name = with(combinedEnichemt, reorder(name, log10(pValue))) p4 <- combinedEnichemt %>% ggplot(aes(y = name, x= blank, size=nCommonGenes, colour = -log10(pValue), shape=datasets))+ geom_point_rast()+ scale_size_binned()+ scale_color_gradient(high="red", low="blue")+ theme_bw()+ ylab(label = "Mouse phenotypes")+ ggtitle("Enrichment analysis, modules related to Order")+ theme_classic(base_size = 25)+ facet_wrap(.~class+var, nrow = 1)+ theme(axis.text.x = element_blank(), axis.text.y = element_text(size=18), plot.margin = margin(0.5, 0, 0, 1, "cm"), plot.title = element_text(size=24, hjust = 0.5), axis.title.x = element_blank(), axis.ticks.x = element_blank(), strip.text = element_text(size=13))+ scale_shape_manual(values=c(17, 15, 3,7, 8,1))+ guides(shape = guide_legend(override.aes = list(size=5)))+ ggpubr::grids(linetype = "dotted", axis = "y") g <- ggplot_gtable(ggplot_build(p4)) strip_both <- which(grepl('strip-', g$layout$name)) fills <- gsub("ME","",levels(combinedEnichemt$class)) k <- 1 for (i in strip_both) { j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder)) g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k] k <- k+1 } grid::grid.draw(g) pdf(file = "enrichment modules Order.pdf", width = 25, height = 16) grid::grid.draw(g) dev.off() ``` ```{r scatter plot of the distances} phyloTree <- ape::read.nexus("tree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) corSum <- as.data.frame(moduleTraitCor1[,order(match(colnames(moduleTraitCor1), levels(samples$Order)))]) corSum <- bind_rows(corSum, as.data.frame(moduleTraitCor2)) # from the WGCNA orderDist <- cluster::daisy(1-t(corSum), metric = "euclidean") orderTree <- hclust(orderDist, method = "complete") OrderDistances <- cophenetic(orderTree) OrderDistances <- as.data.frame(as.matrix(OrderDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200)) p1 <- OrderDistances %>% ggplot(aes(x = x2, y=y2))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor()+theme_classic(base_size = 12)+ylab("Phlogenetic tree distance (million years)")+xlab("DNAm tree distance")+ggtitle("Comparison of the WGCNA DNAm and DNA sequence phylogenetic trees") pdf(file = "Comparison of the WGCNA DNAm and DNA sequence phylogenetic trees.pdf", width = 8, height = 4) p1 dev.off() ``` ## Order per tissue ```{r Order per tissue} ## no marsupials samples_tissue <- samplesNoMars %>% dplyr::select(Order, Tissue2) %>% distinct(.) %>% group_by(Tissue2) %>% tally() %>% right_join(samplesNoMars) %>% filter(n>=8) %>%group_split(Tissue2) names(samples_tissue) <- sapply(samples_tissue, function(x){x$Tissue2[1]}) samples_tissue$all <- samplesNoMars samples_tissue <- plyr::llply(samples_tissue, function(y){ orders2 <- unique(y$Order) Order2 <- as.data.frame(sapply(orders2, function(x){ a <- y %>% mutate(x = ifelse(Order==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Order2) <- orders2 Order2 <- cbind(Basename = y$Basename, Order2) }) results <- rbindlist(plyr::llply(samples_tissue, function(x){ ME <- MEs[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% mutate(dir = as.factor(sign(r))) %>% group_by(var, dir) %>% top_n(1, abs(r))%>% mutate(ID = paste(modules, var, sep = "_"))%>% filter(pval<0.05&abs(r)>0.5) %>% dplyr::select(-dir) }), use.names = T, idcol = "Tissue") ## marsupials samples_tissue_marsupials <- samples %>% dplyr::select(Order, Tissue) %>% distinct(.) %>% group_by(Tissue) %>% tally() %>% right_join(samples) %>% filter(n>=8) %>%group_split(Tissue) names(samples_tissue_marsupials) <- sapply(samples_tissue_marsupials, function(x){x$Tissue[1]}) samples_tissue_marsupials$all <- samples samples_tissue_marsupials <- plyr::llply(samples_tissue_marsupials, function(y){ orders2 <- unique(y$Order) Order2 <- as.data.frame(sapply(orders2, function(x){ a <- y %>% mutate(x = ifelse(Order==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Order2) <- orders2 Order2 <- cbind(Basename = y$Basename, Order2) }) results2 <- rbindlist(plyr::llply(samples_tissue_marsupials, function(x){ ME <- MEs4[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(5, -pval)%>%top_n(2, abs(r)) %>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) %>% filter(pval<0.05&abs(r)>0.2) }), use.names = T, idcol = "Tissue") %>% filter(var%in%marsupials) results <- dplyr::bind_rows(results, results2) ## marsupials samples_tissue_monotremes <- samplesWithMonotremes %>% dplyr::select(Order, Tissue) %>% distinct(.) %>% group_by(Tissue) %>% tally() %>% right_join(samplesWithMonotremes) %>% filter(n>=8) %>%group_split(Tissue) names(samples_tissue_monotremes) <- sapply(samples_tissue_monotremes, function(x){x$Tissue[1]}) samples_tissue_monotremes$all <- samples samples_tissue_monotremes <- plyr::llply(samples_tissue_monotremes, function(y){ orders2 <- unique(y$Order) Order2 <- as.data.frame(sapply(orders2, function(x){ a <- y %>% mutate(x = ifelse(Order==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Order2) <- orders2 Order2 <- cbind(Basename = y$Basename, Order2) }) results3 <- rbindlist(plyr::llply(samples_tissue_monotremes, function(x){ ME <- MEs3[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(5, -pval)%>%top_n(2, abs(r)) %>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) %>% filter(pval<0.05&abs(r)>0.2) }), use.names = T, idcol = "Tissue") %>% filter(var%in%monotremes) results <- dplyr::bind_rows(results, results2) results <- dplyr::bind_rows(results, results3) sumTable <- results %>% dplyr::select(modules,r, var,pval, Tissue) %>% group_by(modules) %>% summarize(OrderPerTissue = paste(var,"_",Tissue, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) sum2 <- sum2%>% left_join(sumTable) ``` ## Species level analysis combined models ```{r preparing samples} species1 <- unique(samplesNoMars$SpeciesCommonName) Species1 <- as.data.frame(sapply(species1, function(x){ a <- samplesNoMars %>% mutate() %>% mutate(x = ifelse(SpeciesCommonName==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Species1) <- species1 Species1 <- cbind(Basename = samplesNoMars$Basename, Species1) ## Marsupials species2 <- unique(samples$SpeciesCommonName) Species2 <- as.data.frame(sapply(species2, function(x){ a <- samples %>% mutate(x = ifelse(SpeciesCommonName==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Species2) <- species2 Species2 <- cbind(Basename = samples$Basename, Species2) ## monotremes species3 <- unique(samplesWithMonotremes$SpeciesCommonName) Species3 <- as.data.frame(sapply(species3, function(x){ a <- samplesWithMonotremes %>% mutate(x = ifelse(SpeciesCommonName==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Species3) <- species3 Species3 <- cbind(Basename = samplesWithMonotremes$Basename, Species3) ``` ```{r relate to species} # no Marsupials datTraits1 <- Species1 %>% filter(Basename %in% rownames(MEs)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(MEs,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% mutate(dir = as.factor(sign(r))) %>% group_by(var, dir) %>% top_n(1, abs(r))%>% mutate(ID = paste(modules, var, sep = "_"))%>% filter(pval<0.05&abs(r)>0.4) %>% dplyr::select(-dir) summaryResults4 <- summaryResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", text)) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryResults4 <- summaryResults4[rownames(moduleTraitCor1),] %>% dplyr::select(colnames(moduleTraitCor1)) identical(rownames(summaryResults4), rownames(moduleTraitCor1)) speciesModules1 <- rownames(summaryResults4)[rowSums(summaryResults4!="")>0] #moduleTraitCor <- moduleTraitCor[orderModules,] #summaryResults4 <- summaryResults4[orderModules,] ``` ```{r relate to Marsupials} # Marsupials datTraits2 <- Species2 %>% filter(Basename %in% rownames(MEs2)) %>% tibble::column_to_rownames("Basename") moduleTraitCor2=cor(MEs4,datTraits2,use="p") moduleTraitPvalue2=corPvalueStudent(moduleTraitCor2,nrow(datTraits2)) summaryMarResults <- as.data.frame(moduleTraitPvalue2) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryMarResults2 <- as.data.frame(moduleTraitCor2) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryMarResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue2, format = "e", digits = 0), sep = "")) summaryMarResults3 <- as.data.frame(moduleTraitCor2) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryMarResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(5, -pval)%>%top_n(2, abs(r)) %>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) %>% filter(pval<0.05&abs(r)>0.2) summaryMarResults4 <- summaryMarResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryMarResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", text)) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryMarResults4 <- summaryMarResults4[rownames(moduleTraitCor2), colnames(moduleTraitCor2)] identical(rownames(summaryMarResults4), rownames(moduleTraitCor2)) marsupialsSpecies <- unique(samples$SpeciesCommonName[which(samples$Order%in%marsupials)]) marsupialSpecModules <- unique(summaryMarResults3$modules[which(summaryMarResults3$var %in% marsupialsSpecies)]) summaryMarResults4 <- summaryMarResults4[marsupialSpecModules, order(match(colnames(summaryMarResults4), colnames(summaryResults4)))] moduleTraitCor2 <- moduleTraitCor2[marsupialSpecModules, order(match(colnames(moduleTraitCor2), colnames(moduleTraitCor1))), drop=F] ``` ```{r Monotremes} # Monotremes datTraits3 <- Species3 %>% filter(Basename %in% rownames(MEs3)) %>% tibble::column_to_rownames("Basename") moduleTraitCor3=cor(MEs3,datTraits3,use="p") moduleTraitPvalue3=corPvalueStudent(moduleTraitCor3,nrow(datTraits3)) summaryMonoResults <- as.data.frame(moduleTraitPvalue3) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryMonoResults2 <- as.data.frame(moduleTraitCor3) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryMonoResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue3, format = "e", digits = 0), sep = "")) summaryMonoResults3 <- as.data.frame(moduleTraitCor3) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryMonoResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(5, -pval)%>%top_n(2, abs(r)) %>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) %>% filter(pval<0.05&abs(r)>0.2) summaryMonoResults4 <- summaryMonoResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryMonoResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", text)) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryMonoResults4 <- summaryMonoResults4[rownames(moduleTraitCor3), colnames(moduleTraitCor3), drop=F] identical(rownames(summaryMonoResults4), rownames(moduleTraitCor3)) monotremeSpecies <- unique(samplesWithMonotremes$SpeciesCommonName[which(samplesWithMonotremes$Order%in%monotremes)]) monotremSpecModules <- unique(summaryMonoResults3$modules[which(summaryMonoResults3$var %in% monotremeSpecies)]) summaryMonoResults4 <- summaryMonoResults4[monotremSpecModules, order(match(colnames(summaryMonoResults4), colnames(summaryResults4))), drop=F] moduleTraitCor3 <- moduleTraitCor3[monotremeModules,order(match(colnames(moduleTraitCor3), colnames(moduleTraitCor1))), drop=F] ## Merge with all samples summRes <- summaryResults4[,order(match(colnames(summaryResults4), colnames(moduleTraitCor1)))] summRes <- summRes[which(!rownames(summRes)%in%c(marsupialSpecModules, monotremSpecModules)),] summRes <- bind_rows(summRes, summaryMarResults4) summRes <- bind_rows(summRes, summaryMonoResults4) summRes[is.na(summRes)] <- "" corSum <- as.data.frame(moduleTraitCor1[,order(match(colnames(moduleTraitCor1), colnames(moduleTraitCor1)))]) corSum <- corSum[which(!rownames(corSum)%in%c(marsupialSpecModules, monotremSpecModules)),] corSum <- bind_rows(corSum, as.data.frame(moduleTraitCor2)) corSum <- bind_rows(corSum, as.data.frame(moduleTraitCor3)) ``` ```{r summ} sumTable <- summaryResults3 %>% dplyr::select(modules,r, var,pval) %>% group_by(modules) %>% summarize(species = paste(var, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) sumTable <-summaryMarResults3%>% dplyr::select(modules, r,var,pval) %>% filter(modules%in% marsupialSpecModules) %>% group_by(modules) %>% summarize(species = paste(var, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) %>% bind_rows(sumTable) %>% group_by(modules) %>% summarize(species = paste(species, collapse = " ; ")) sumTable <-summaryMonoResults3%>% dplyr::select(modules,r, var,pval) %>% filter(var%in% monotremeSpecies)%>% #mutate(r = ifelse(r>0, "+", ifelse(r<0, "-", NA))) %>% group_by(modules) %>% summarize(species = paste(var, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) %>% bind_rows(sumTable) ``` ```{r heatmap for species} corSum <- t(corSum) summRes <- t(summRes) summRes[summRes!=""] <- "*" i <- which(rowSums(summRes=="*")>0) corSum <- corSum[i,] summRes <-summRes[i,] my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231] library(ComplexHeatmap) cols <- gsub("(ME)", "", colnames(corSum)) names(cols) <- colnames(corSum) association <- KMEs %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) %>% mutate(modules = paste("ME", moduleColors, sep = "")) #%>% filter(!modules%in%marsupialSpecModules) # association2 <- KMEs2 %>% dplyr::select(modules) %>% filter(modules%in%gsub("(ME)", "", marsupialModules)) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) %>% mutate(modules = paste("ME", moduleColors, sep = "")) %>% bind_rows(association)%>% right_join(data.frame(modules = colnames(corSum))) # # association2 <- association2[order(match(association2$modules, colnames(corSum))),] # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(corSum), col = list(Modules = cols), CpG_Frequency = anno_barplot(association$Freq), show_legend =F) # row annotation df <- samples %>% dplyr::select(SpeciesCommonName, Order)%>% filter(SpeciesCommonName%in%rownames(corSum)) %>%filter(!duplicated(SpeciesCommonName)) %>% distinct(.) %>%left_join(orColsData) %>% mutate() df <- df[order(match(df$SpeciesCommonName,rownames(corSum))),] identical(df$SpeciesCommonName, rownames(corSum)) orCols3 <- df$colors names(orCols3) <- df$Order ra =rowAnnotation(Order = df$Order, col = list(Order = orCols3)) # Heatmap <- Heatmap(corSum, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = TRUE, row_dend_reorder=TRUE, column_title = "Modules", # cell_fun = function(j, i, x, y, w, h, col) { # grid.text(summRes[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=20))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 20), row_dend_side = "right") colnames(corSum) <- gsub("ME", "", colnames(corSum)) Heatmap <- Heatmap(corSum, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = TRUE, row_dend_reorder=TRUE, row_title="Phylogenetic orders", cell_fun = function(j, i, x, y, w, h, col) { grid.text(summRes[i, j],vjust = 0.8, x, y, gp=gpar(fontsize=17))}, bottom_annotation = ha, clustering_method_rows ="complete", row_dend_side = "right", column_names_gp = gpar(fontsize = 8)) jpeg(file = "Heatmap modules Species.jpeg", width = 12, height = 5, units = "in", res = 300) draw(Heatmap, heatmap_legend_side = "left",annotation_legend_side="left", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() Heatmap <- Heatmap(corSum, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = TRUE, row_dend_reorder=TRUE, row_title="Species", cell_fun = function(j, i, x, y, w, h, col) { grid.text(summRes[i, j],vjust = 0.8, x, y, gp=gpar(fontsize=20))}, bottom_annotation = ha, clustering_method_rows ="complete", row_dend_side = "right", column_names_gp = gpar(fontsize = 10)) pdf(file = "Heatmap modules Order combined.pdf", width = 12, height = 5) draw(Heatmap, heatmap_legend_side = "left", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ```{r heatMap with arranged modules} # take the final summary table corSum <- corSum[,sum2$modules] summRes <- summRes[,sum2$modules] # Take sum2 from network analysis, I wanted to get the grouping from Netwrok analysis trCols <- as.character(sum2$color) names(trCols) <- sum2$group # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(corSum), CpG_Frequency = anno_barplot(sum2$Freq), Traits = sum2$group, col = list(Modules = cols, Traits = trCols)) Heatmap <- Heatmap(corSum, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = TRUE, row_dend_reorder=TRUE, column_title = "Modules", cell_fun = function(j, i, x, y, w, h, col) { grid.text(summRes[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=25))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 12) , row_dend_side = "right") jpeg(file = "Heatmap modules Species.jpeg", width = 20, height = 16, units = "in", res = 300) draw(ra+Heatmap, heatmap_legend_side = "left",annotation_legend_side="left", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` # Phyloepigenetic tree ```{r phylogenetic analysis of all species} # library(ggtree) # # samp <- samples %>% mutate(SpeciesCommonName = ifelse(SpeciesCommonName=="Eastern mole ", "Eastern mole", SpeciesCommonName)) %>% filter(!is.na(SpeciesCommonName)) %>% filter(!is.na(Order)) # # # samp <- samples %>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order)) # # #allData <- bValsAll %>% tibble::rownames_to_column(var="CGid") %>% gather(key = "Basename", value = "bval", -CGid) %>% left_join(samp[,c("Basename", "SpeciesCommonName")]) %>% group_by(SpeciesCommonName, CGid) %>% summarise(bval = mean(bval)) # # allData <- bValsAll %>% tibble::rownames_to_column(var="CGid") %>% gather(key = "Basename", value = "bval", -CGid) %>% left_join(samp[,c("Basename", "SpeciesCommonName", "Tissue")]) %>% group_by(SpeciesCommonName, CGid, Tissue) %>% summarise(bval = mean(bval)) # # # adjust for tissue difference # allData$res <- residuals(lm(bval~Tissue, data = allData)) # # # # allData <- allData %>% dplyr::select(-bval) %>% group_by(SpeciesCommonName, CGid) %>% summarise(res = mean(res))%>%spread(key = SpeciesCommonName, value = res) %>% tibble::column_to_rownames(var = "CGid") # allData$`` <- NULL # # corSpecies <- corAndPvalue(allData) # # # # datColors <- samp %>% dplyr::select(SpeciesCommonName, Order) %>% mutate(SpeciesCommonName = ifelse(SpeciesCommonName=="Eastern mole ", "Eastern mole", SpeciesCommonName))%>% filter(!duplicated(SpeciesCommonName)) %>% mutate(SpeciesCommonName = make.unique(SpeciesCommonName)) %>% tibble::column_to_rownames(var = "SpeciesCommonName") # # # Euclidean # corDist <- cluster::daisy(corSpecies$cor, metric = "euclidean") # speciesTree <- hclust(corDist, method = "complete") # tree <- ggtree(speciesTree, layout = "circular") +geom_tiplab(size=2)+theme_tree() # # p1 <- gheatmap(tree, datColors, offset=.1, width=.2, # colnames_angle=95, colnames_offset_y = .2) + # scale_fill_manual(values = orColsData$colors)+labs(fill="Order")+ggtitle("Clustering Euclidean distance") # # pdf("DNAm clustering, Euclidean.pdf", width = 12, height = 12) # p1 # dev.off() # # # 1-cor # corDist2 <- as.dist(1-corSpecies$cor) # speciesTree2 <- hclust(corDist2, method = "complete") # # # Circular # tree2 <- ggtree(speciesTree2, layout = "circular") +geom_tiplab(size=2.5)+theme_tree() # p2 <- gheatmap(tree2, datColors, offset=.05, width=.05, # colnames_angle=95, colnames_offset_y = .05) + # scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+theme(text = element_text(size = 15)) # # # slanted # tree2 <- ggtree(speciesTree2, layout = "slanted") +geom_tiplab(size=4.5,aes(angle=90))+theme_tree() # p2 <- gheatmap(tree2, datColors, offset=.3, width=.083, # colnames_angle=90, colnames_offset_y = .1) + # scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+theme(text = element_text(size = 20))+coord_flip() # # # pdf("DNAm clustering, 1-cor.pdf", width = 30, height = 6) # p2 # dev.off() # # # ## clustering without Marsupials # samp <- samplesNoMars %>% mutate(SpeciesCommonName = ifelse(SpeciesCommonName=="Eastern mole ", "Eastern mole", SpeciesCommonName)) %>% filter(!is.na(SpeciesCommonName)) %>% filter(!is.na(Order)) # # allData <- bValsNoMars %>% tibble::rownames_to_column(var="CGid") %>% gather(key = "Basename", value = "bval", -CGid) %>% left_join(samp[,c("Basename", "SpeciesCommonName")]) %>% group_by(SpeciesCommonName, CGid) %>% summarise(bval = mean(bval)) # # allData <- allData %>% spread(key = SpeciesCommonName, value = bval) %>% tibble::column_to_rownames(var = "CGid") # allData$`` <- NULL # # corSpecies <- corAndPvalue(allData) # # # # datColors <- samp %>% dplyr::select(SpeciesCommonName, Order) %>% mutate(SpeciesCommonName = ifelse(SpeciesCommonName=="Eastern mole ", "Eastern mole", SpeciesCommonName))%>% filter(!duplicated(SpeciesCommonName)) %>% mutate(SpeciesCommonName = make.unique(SpeciesCommonName)) %>% tibble::column_to_rownames(var = "SpeciesCommonName") # # # Euclidean # corDist <- cluster::daisy(corSpecies$cor, metric = "euclidean") # speciesTree <- hclust(corDist, method = "complete") # tree <- ggtree(speciesTree, layout = "circular") +geom_tiplab(size=2)+theme_tree() # # p1 <- gheatmap(tree, datColors, offset=.1, width=.2, # colnames_angle=95, colnames_offset_y = .2) + # scale_fill_manual(values = orCols[c(-7:-9)])+labs(fill="Order")+ggtitle("Clustering Euclidean distance") # # pdf("DNAm clustering, Euclidean, no Marsupials.pdf", width = 12, height = 12) # p1 # dev.off() # # # # # 1-cor # corDist2 <- as.dist(1-corSpecies$cor) # speciesTree <- hclust(corDist2, method = "complete") # tree <- ggtree(speciesTree, layout = "circular") +geom_tiplab(size=2)+theme_tree() # # p1 <- gheatmap(tree, datColors, offset=.1, width=.2, # colnames_angle=95, colnames_offset_y = .2) + # scale_fill_manual(values = orCols[c(-7:-9)])+labs(fill="Order")+ggtitle("Clustering Euclidean distance") # # pdf("DNAm clustering, 1-cor, no Marsupials.pdf", width = 12, height = 12) # p1 # dev.off() ``` ```{r phylogenetic analysis of all species} library(ggtree) require(treeio) library("WGCNA") #bValsAll <- readRDS("DNAmDataAll.RDS") samp <- samplesWithMonotremes %>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order))%>%mutate(Tissue2 = ifelse(Tissue%in%c("Cortex", "Neocortex", "Striatum", "SVZ", "FrontalCortex", "Hippocampus", "OccipitalCortex", "Substantia nigra", "WholeBrain", "Brain", "Cerebellum", "Hypothalamus", "TemporalCortex"), "Brain", as.character(Tissue))) %>% mutate(orCol = as.character(factor(Order, levels= levels(Order), labels = orCols))) tissueCount <- samp%>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order))%>%group_by(Tissue2,SpeciesLatinName)%>%tally()%>%filter(n>2)%>% group_by(Tissue2) %>%tally()%>%filter(n>40) samp <- samp %>% filter(Tissue%in%tissueCount$Tissue2) allData <- bValsWithMonotremes %>% dplyr::select(samp$Basename) %>% tibble::rownames_to_column(var="CGid") %>% gather(key = "Basename", value = "bval", -CGid) %>% left_join(samp[,c("Basename", "SpeciesLatinName", "Tissue", "spec")]) %>% group_split(Tissue) names(allData) <- sapply(allData, function(x){x$Tissue[1]}) corDat <- plyr::llply(allData, function(x){ tis <- x$Tissue[1] datColors <- samp %>% filter(SpeciesLatinName%in%x$SpeciesLatinName)%>% dplyr::select(SpeciesLatinName, Order)%>% filter(!duplicated(SpeciesLatinName)) %>% mutate(SpeciesLatinName = make.unique(SpeciesLatinName)) %>% tibble::column_to_rownames(var = "SpeciesLatinName") x <- x %>% group_by(SpeciesLatinName, CGid) %>% summarise(bval = mean(bval))%>% spread(key = SpeciesLatinName, value = bval) %>% tibble::column_to_rownames(var = "CGid") corSpecies <- corAndPvalue(x) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) #corDist2 <- dist(corSpecies$cor, method = "minkowski") speciesTree2 <- hclust(corDist2, method = "average") tree2 <- ggtree(speciesTree2, layout = "slanted")+ #geom_tiplab(size=7.5,aes(angle=0))+ theme_tree() d <- data.frame(label = tree2$data$label) %>% left_join(dplyr::select(.data=sumSample, SpeciesLatinName, Number, Order), by=c("label"="SpeciesLatinName")) plot <- tree2 %<+% d + geom_tiplab(aes(label=Number, fill=Order),size=6,aes(angle=0), geom = "label", label.padding = unit(0.01, "lines"), align = T)+ geom_hilight(mapping=aes(subset=node%in%node[which(!is.na(Order))],fill=Order))+ scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ theme(text = element_text(size = 30), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ guides(fill = guide_legend(ncol = 1))+ #coord_flip()+ ggtitle(paste(tis))+ geom_treescale(linesize = 1, fontsize = 12, width = 0.05)+ xlim(-0.5, NA) # plot <- gheatmap(tree2, datColors, offset=.2, width=.083, # colnames_angle=0, colnames_offset_y = .1) + # scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ # theme(text = element_text(size = 30), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ # guides(fill = guide_legend(ncol = 1))+ # #coord_flip()+ # ggtitle(paste(tis))+ geom_treescale(linesize = 1, fontsize = 6, width = 0.05) return(list("tree"=speciesTree2, "plot"=plot)) }) plots <- lapply(corDat, function(x){x[[2]]}) pdf("DNAm clustering blood, 1-cor.pdf", width = 18, height = 28) plots[[1]]+theme(text = element_text(size = 20))+ xlim(-0.3, NA) dev.off() plots <- ggpubr::ggarrange(plotlist = plots, ncol = 3, common.legend = TRUE, legend = "right") pdf("DNAm clustering, 1-cor.pdf", width = 45, height = 30) plots dev.off() ``` ```{r circular plot} x <- allData$Blood tis <- x$Tissue[1] datColors <- samp %>% filter(SpeciesLatinName%in%x$SpeciesLatinName)%>% dplyr::select(SpeciesLatinName, Order)%>% filter(!duplicated(SpeciesLatinName)) %>% mutate(SpeciesLatinName = make.unique(SpeciesLatinName)) %>% tibble::column_to_rownames(var = "SpeciesLatinName") x <- x %>% group_by(SpeciesLatinName, CGid) %>% summarise(bval = mean(bval))%>% spread(key = SpeciesLatinName, value = bval) %>% tibble::column_to_rownames(var = "CGid") corSpecies <- corAndPvalue(x) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) #corDist2 <- dist(corSpecies$cor, method = "minkowski") speciesTree2 <- hclust(corDist2, method = "average") tree2 <- ggtree(speciesTree2, layout = "circular")+ #geom_tiplab(size=7.5,aes(angle=0))+ theme_tree() d <- data.frame(label = tree2$data$label) %>% left_join(dplyr::select(.data=sumSample, SpeciesLatinName, Number, Order), by=c("label"="SpeciesLatinName")) %>% mutate(Order = factor(Order, levels = names(orCols))) plot <- tree2 %<+% d + geom_tippoint(aes(color=Order),size=6)+ scale_color_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ theme(text = element_text(size = 30), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ guides(fill = guide_legend(ncol = 1))+ #coord_flip()+ ggtitle("Blood phyloepigenetic tree")+ geom_treescale(linesize = 1, fontsize = 4, width = 0.05)+ xlim(-0.5, NA) pdf("blood phyloepi tree, circular.pdf", width = 12, height = 12) plot dev.off() ``` ```{r create timetree phylotree} set.seed(2021) images <- rbindlist(readRDS("~/Google drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Caesar analysis/Figure for the paper/images of animals all.RDS")) %>% filter(complete.cases(.)&uid!="") %>% dplyr::rename(SpeciesLatinName=name) %>% left_join(sumSample) %>% mutate(maximum_age=log(as.numeric(maximum_age))) %>% mutate(average_weight=log(as.numeric(average_weight)))%>% mutate(ratio=scales::rescale(maximum_age, mean = 1, sd = 0.05)[[1]]*max(maximum_age)/100000000)%>% mutate(life= Hmisc::cut2(as.numeric(maximum_age), g = 4))%>% group_by(Order, life) %>% filter(SpeciesLatinName%in%sample(SpeciesLatinName, 5, replace = T)| SpeciesLatinName%in%c("Homo sapiens", "Canis lupus familiaris", "Balaena mysticetus", "Heterocephalus glaber")) %>% filter(!duplicated(SpeciesLatinName)& !SpeciesLatinName%in%c("Elephas maximus", "Crocuta crocuta", "Sarcophilus harrisii", "Eumetopias jubatus", "Chinchilla lanigera", "Oryctolagus cuniculus", "Rhinolophus alcyone", "Atelerix albiventris", "Mus musculus")& !is.na(Order)) %>% ungroup() phyloTree <- ape::read.nexus("Mammalia_species") phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" phyloTree <- castor::get_subtree_with_tips(phyloTree, only_tips = gsub(" ", "_",sumSample$SpeciesLatinName)) tree2 <- ggtree(phyloTree$subtree, layout = "slanted") tree2$data$label <- gsub("_", " ", tree2$data$label) d <- data.frame(label = tree2$data$label) %>% left_join(dplyr::select(.data=sumSample, SpeciesLatinName, Number, Order, maximum_age), by=c("label"="SpeciesLatinName")) %>% left_join(dplyr::select(.data=images, SpeciesLatinName, uid, ratio), by=c("label"="SpeciesLatinName")) plot <- tree2 %<+% d + geom_tiplab(aes(label=Number, fill=Order,angle=90),size=5, geom = "label", label.padding = unit(0.01, "lines"), offset = 0.1)+ geom_tiplab(aes(image=uid, x = branch, color=as.numeric(maximum_age)), size=0.05,offset = -10, geom="phylopic", align=F)+ geom_hilight(mapping=aes(subset=node%in%node[which(!is.na(Order))],fill=Order))+ scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Phylogenetic tree of\nspecies in our study")+ theme(text = element_text(size = 25), legend.text = element_text(size=20), legend.position = "right", plot.margin = unit(c(14,8,14,8), "mm"))+ guides(fill = guide_legend(ncol = 1))+ geom_treescale(linesize = 1, fontsize = 6, width = 50)+ scale_color_gradient(low="darkblue", high = "maroon")+labs(color="maximum lifespan (years)") pdf("Phylogenetic tree.pdf", width = 18, height = 44) plot dev.off() ``` ```{r scatter plot of the distances} phyloTree <- ape::read.nexus("Mammalia_species") phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" #phyloTree <- ape::read.nexus("speciesTimeTree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # from the DNAm data specDist <- rbindlist(plyr::llply(corDat, function(x){ x <- x[[1]] specDistances <- cophenetic(x) specDistances <- as.data.frame(as.matrix(specDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200)) }), idcol = "Tissue") %>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) specDist <- specDist %>% group_by(Tissue) %>% mutate(residuals = residuals(lm(PhyloDist~dist))) %>% mutate(outlier= ifelse(abs(residuals)>80, "outlier", "goodMatch")) p1 <- specDist %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(size=6, color="blue")+theme_classic(base_size = 20)+ylab("Phlogenetic tree distance\n(million years)")+xlab("DNAm tree distance")+facet_wrap(.~Tissue, nrow = 1)+labs(size="Number of overlapped\npoints")+theme(axis.text.y = element_text(size=20))+scale_color_manual(values=c("black","red")) pdf(file = "Comparison of the DNAm and DNA sequence phylogenetic trees.pdf", width = 13, height = 4) p1 dev.off() p2 <- specDist %>% filter(Tissue=="Blood") %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(size=8, color="blue")+theme_classic(base_size = 20)+ylab("Phlogenetic tree distance\n(million years)")+xlab("DNAm tree distance")+facet_wrap(.~Tissue, nrow = 1)+labs(size="Number of overlapped\npoints")+theme(axis.text.y = element_text(size=20))+scale_color_manual(values=c("black","red")) pdf(file = "Comparison of the DNAm and DNA sequence phylogenetic trees, blood.pdf", width = 8, height = 4) p2 dev.off() ``` #EWAS of phylogenetic relationship ```{r EWAS of phylogenetic relationship} phyloTree <- ape::read.nexus("Mammalia_species") phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" #phyloTree <- ape::read.nexus("speciesTimeTree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # meanDatPerTissue <- lapply(c("Blood", "Liver", "Skin"), function(x){ # samp <- samplesWithMonotremes %>% filter(Tissue==x) # i <- phyloTree$tip.label[which(phyloTree$tip.label%in%samp$labelsCaesar)] # phyloTreeSubset <- castor::get_subtree_with_tips(phyloTree, only_tips = i) # # # # dat0 <- bValsWithMonotremes %>% dplyr::select(samp$Basename) %>% t() %>% as.data.frame() %>% tibble::rownames_to_column(var = "Basename") %>% left_join(samp[,c("Basename", "labelsCaesar")]) %>% dplyr::select(-Basename) %>% group_by(labelsCaesar)%>% summarize(across(everything(), ~mean(.,na.rm=T))) %>% tibble::column_to_rownames(var = "labelsCaesar")%>% t()%>% as.data.frame() # # dat0 <- dat0 %>% dplyr::select(phyloTreeSubset$subtree$tip.label) # subTree <- phyloTreeSubset$subtree # # return(list(dat = dat0, samp = samp, tree=subTree)) # }) # # names(meanDatPerTissue) <- c("Blood", "Liver", "Skin") # # saveRDS(meanDatPerTissue, "meanDatPerTissue for phylogenetic analysis of CpGs.RDS") # samp <- samplesWithMonotremes %>% filter(Tissue=="Blood") # i <- phyloTree$tip.label[which(phyloTree$tip.label%in%samp$labelsCaesar)] # phyloTreeSubset <- castor::get_subtree_with_tips(phyloTree, only_tips = i) # # # # dat0 <- bValsWithMonotremes %>% dplyr::select(samp$Basename) %>% t() %>% as.data.frame() %>% tibble::rownames_to_column(var = "Basename") %>% left_join(samp[,c("Basename", "labelsCaesar")]) %>% dplyr::select(-Basename) %>% group_by(labelsCaesar)%>% summarize(across(everything(), ~mean(.,na.rm=T))) %>% tibble::column_to_rownames(var = "labelsCaesar")%>% t()%>% as.data.frame() # # dat0 <- dat0 %>% dplyr::select(phyloTreeSubset$subtree$tip.label) # subTree <- phyloTreeSubset$subtree # # results <- bettermc::mclapply(1:nrow(dat0), function(x){ # d <- unlist(dat0[x,]) # a <- picante::phylosignal(d[subTree$tip.label], subTree, reps = 999) # }, mc.cores = 4) # names(results) <- rownames(dat0) # # results <- rbindlist(results, idcol = "CGid") # # saveRDS(results, "EWAS of phylogenetic tree analysis.RDS") # p1 <- results %>% mutate(phylogenetic.signal = ifelse(PIC.variance.P==0.0001&PIC.variance.Z< -3, "High signal", ifelse(PIC.variance.P>0.05, "Low signal", "Medium signal")))%>% ggplot(aes(x=-log10(PIC.variance.P), y=PIC.variance.Z, color=phylogenetic.signal))+geom_point()+xlab("- log10(Variance Pvalue)")+ylab("Variance Z") # p1 <- results %>% mutate(phylogenetic.signal = ifelse(K>1, "significant", "not significant")) %>% mutate(phylogenetic.signal=factor(phylogenetic.signal, levels = c("significant", "not significant"))) %>% ggplot(aes(x=K, fill=phylogenetic.signal))+geom_histogram(binwidth = 0.08)+geom_vline(xintercept = 1, linetype="dashed")+theme_classic()+ggtitle("CpGs with phylogenetic signal")+xlab("K statistics")+ylab("CpG count")+annotate(label=paste("811 CpGs with K>1"), x=10, y=500, geom="text", color="blue", size=2) # # ggsave("CpGs with phylo signal.pdf", p1, width = 5, height = 3) # manhattan plot meanDatPerTissue <- readRDS("meanDatPerTissue for phylogenetic analysis of CpGs.RDS") phyResults <- readRDS("EWAS of phyloepigenetic tree.RDS") manhattanPlots <- lapply(1:3, function(t){ results <- as.data.frame(phyResults[[t]]) dat0 <- meanDatPerTissue[[t]]$dat samp <- meanDatPerTissue[[t]]$samp subTree <- meanDatPerTissue[[t]]$tree selectedCpGs <- results %>% filter(PIC.variance.P==0.001) %>% top_n(500, -PIC.variance.Z) selectedCpGs <- selectedCpGs$CGid tis <- names(meanDatPerTissue)[t] nCpGs <- length(selectedCpGs) geneMapSubset <- geneMap %>% filter(CGid%in%results$CGid) min <- min(results$PIC.variance.Z)-2 max <- max(results$PIC.variance.Z) p <- manhattanForZ(results, geneMap, highlightTrNeg = -3 ,highlightTrPos = 5 , annotateTr = 3, title = tis, annotateTop = 15, labelSize = 5, stripSize = 26, maxTextOverlap = 8, ylim = c(min, max))+theme(axis.text.y = element_text(size = 10), axis.title = element_text(size=25)) }) p1 <- ggpubr::ggarrange(plotlist = manhattanPlots, nrow = 1, ncol = 3, align = "v") pdf(file = "Manhattan plot EWAS of phyloepigenetic tree.pdf", width = 10, height = 4) p1 dev.off() # distancePlots <- lapply(1:3, function(t){ results <- as.data.frame(phyResults[[t]]) dat0 <- meanDatPerTissue[[t]]$dat samp <- meanDatPerTissue[[t]]$samp subTree <- meanDatPerTissue[[t]]$tree min <- min(results$PIC.variance.Z)+1 zRange <- seq(-2, min, by = -0.5) tunedZ <- sapply(zRange, function(test.z){ selectedCpGs <- results$CGid[which(results$PIC.variance.Z< test.z)] tis <- names(meanDatPerTissue)[t] nCpGs <- length(selectedCpGs) geneMapSubset <- geneMap %>% filter(CGid%in%results$CGid) # dat.selected <- dat0[selectedCpGs, ] corSpecies <- corAndPvalue(dat.selected) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) #corDist2 <- dist(corSpecies$cor, method = "minkowski") speciesTree2 <- hclust(corDist2, method = "average") distances <- cophenetic(subTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) phyloEpiDistances <- cophenetic(speciesTree2) phyloEpiDistances <- as.data.frame(as.matrix(phyloEpiDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) %>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200))%>% mutate(residuals = residuals(lm(PhyloDist~dist))) %>% mutate(outlier= ifelse(abs(residuals)>80, "outlier", "goodMatch")) cor(phyloEpiDistances$dist, phyloEpiDistances$PhyloDist)[[1]] }) tunedZ <- zRange[which(tunedZ==max(tunedZ))] selectedCpGs <- results$CGid[which(results$PIC.variance.Z< tunedZ)] tis <- names(meanDatPerTissue)[t] nCpGs <- length(selectedCpGs) tis <- paste(tis, "\n(n selected CpGs = ", nCpGs,"; z threshold = ",tunedZ, ")" ,sep = "") geneMapSubset <- geneMap %>% filter(CGid%in%results$CGid) # dat.selected <- dat0[selectedCpGs, ] corSpecies <- corAndPvalue(dat.selected) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) #corDist2 <- dist(corSpecies$cor, method = "minkowski") speciesTree2 <- hclust(corDist2, method = "average") distances <- cophenetic(subTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) phyloEpiDistances <- cophenetic(speciesTree2) phyloEpiDistances <- as.data.frame(as.matrix(phyloEpiDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) %>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200))%>% mutate(residuals = residuals(lm(PhyloDist~dist))) %>% mutate(outlier= ifelse(abs(residuals)>80, "outlier", "goodMatch")) p1 <- phyloEpiDistances %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(size=6, color="blue")+theme_classic(base_size = 16)+ylab("Phlogenetic tree distance\n(million years)")+xlab("DNAm tree distance")+labs(size="Number of overlapped\npoints")+theme(axis.text.y = element_text(size=20))+scale_color_manual(values=c("black","red"))+ggtitle(tis) }) p2 <- ggpubr::ggarrange(plotlist = distancePlots, nrow = 1, ncol = 3, align = "v", common.legend = T, legend = "right") pdf(file = "Distance plots EWAS of phyloepigenetic tree.pdf", width = 25, height = 5) p2 dev.off() # example tree dat.selected <- dat0[selectedCpGs, ] corSpecies <- corAndPvalue(dat.selected) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) #corDist2 <- dist(corSpecies$cor, method = "minkowski") speciesTree2 <- hclust(corDist2, method = "average") distances <- cophenetic(subTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) phyloEpiDistances <- cophenetic(speciesTree2) phyloEpiDistances <- as.data.frame(as.matrix(phyloEpiDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) %>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200))%>% mutate(residuals = residuals(lm(PhyloDist~dist))) %>% mutate(outlier= ifelse(abs(residuals)>80, "outlier", "goodMatch")) p1 <- phyloEpiDistances %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(size=6, color="blue")+theme_classic(base_size = 20)+ylab("Phlogenetic tree distance\n(million years)")+xlab("DNAm tree distance")+labs(size="Number of overlapped\npoints")+theme(axis.text.y = element_text(size=20))+scale_color_manual(values=c("black","red")) ggsave("Distance vs Phylogenetic top CpGs sig.pdf", p1, width = 12, height = 5) # datColors <- samp %>% filter(labelsCaesar%in%subTree$tip.label)%>% dplyr::select(labelsCaesar, Order) %>% filter(!is.na(Order))%>% filter(!duplicated(labelsCaesar)) %>% mutate(labelsCaesar = make.unique(labelsCaesar)) %>% tibble::column_to_rownames(var = "labelsCaesar") sum <- sumSample%>% left_join(species[,c("SpeciesLatinName", "labelsCaesar")]) tree2 <- ggtree(speciesTree2, layout = "slanted")+ #geom_tiplab(size=7.5,aes(angle=0))+ theme_tree() d <- data.frame(label = tree2$data$label) %>% left_join(dplyr::select(.data=sum, labelsCaesar, Number, Order), by=c("label"="labelsCaesar")) plot <- tree2 %<+% d + geom_tiplab(aes(label=Number, fill=Order),size=6,aes(angle=0), geom = "label", label.padding = unit(0.01, "lines"))+ geom_hilight(mapping=aes(subset=node%in%node[which(!is.na(Order))],fill=Order))+ scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ theme(text = element_text(size = 30), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ guides(fill = guide_legend(ncol = 1))+ #coord_flip()+ ggtitle("Phyloepigenetic tree from selected CpGs")+ geom_treescale(linesize = 1, fontsize = 4, width = 0.05) + xlim(-2, NA) pdf("DNAm clustering blood, 1-cor, subset CpGs.pdf", width = 18, height = 28) plot+theme(text = element_text(size = 20))+ xlim(-0.1, NA) dev.off() ``` ```{r enrichment for EWAS of phylogenetic tree} source("~/Google drive/My Drive/Amin documents/Steve projects/Research projects/Codes, and protocols/ChromHMM enrichment/ChromHMM enrichment Source Code.R") meanDatPerTissue <- readRDS("meanDatPerTissue for phylogenetic analysis of CpGs.RDS") phyResults <- readRDS("EWAS of phyloepigenetic tree.RDS") phyResultsTop <- plyr::llply(phyResults, function(x){ x <- x %>% top_n(500, -PIC.variance.Z) %>% left_join(geneMap) %>% dplyr::rename(EWAS_z=PIC.variance.Z) }) geneMap2 <- geneMap %>% filter(CGid%in%rownames(bValsWithMonotremes)) # proportion test total <- geneMap2 %>% group_by(main_Categories) %>% tally()%>% dplyr::rename(total=n) plots <- lapply(1:3, function(x){ propTest <- phyResultsTop[[x]]%>% mutate(dir= sign(EWAS_z)) %>% mutate(dir=factor(dir, levels=c(-1,1),labels = c("decrease", "increase"))) %>% group_by(dir,main_Categories)%>% tally() %>% dplyr::rename(sigs=n)%>% group_by(dir)%>% mutate(allSig = sum(sigs)) %>% left_join(total) %>% mutate(allProbes = sum(total)) %>% ungroup() propTest$fisher.exact.pval <- apply(propTest, 1, function(j){ x <- as.numeric(unlist(c(j[c(3,4)], j[c(5,6)]))) x <- matrix(x, nrow = 2, ncol = 2, byrow = F) a <- epitools::oddsratio.wald(x)$p.value[2,2] }) propTest$OR <- apply(propTest, 1, function(j){ x <- as.numeric(unlist(c(j[c(3,4)], j[c(5,6)]))) x <- matrix(x, nrow = 2, ncol = 2, byrow = F) epitools::oddsratio.wald(x)$measure[2,1] }) propTest <- propTest %>% mutate(OR2 =round(OR, 2)) %>% mutate(p = ifelse(fisher.exact.pval<1e-4&OR<1, "****", ifelse(fisher.exact.pval<1e-3&OR<1, "***", ifelse(fisher.exact.pval<1e-2&OR<1, "**",ifelse(fisher.exact.pval<0.05&OR<1, "*","")))))%>% mutate(p = ifelse(fisher.exact.pval<1e-4&OR>1, "####", ifelse(fisher.exact.pval<1e-3&OR>1, "###", ifelse(fisher.exact.pval<1e-2&OR>1, "##",ifelse(fisher.exact.pval<0.05&OR>1, "#",p))))) %>% mutate(OR2= ifelse(p!="", OR2, "")) %>% mutate(text = paste(OR2,p, sep = ""))%>% mutate(region = factor(main_Categories, levels = rev(c("Intergenic_upstream", "Promoter", "fiveUTR", "Exon", "Intron", "threeUTR", "Intergenic_downstream")))) %>% mutate(sig2=max(sigs)) ## }) names(plots) <- names(phyResultsTop) input <- rbindlist(plots, use.names = T, idcol = "model") p <- input %>%ggplot( aes(x=sigs, y=region, fill=dir))+geom_bar(stat = "identity",position=position_dodge())+geom_text(aes(label=text, x=sigs), position = position_dodge(0.9), size=10)+facet_wrap(~model, nrow = 1, scales = "free_x")+scale_fill_manual(values = c("#FF9999"))+ ylab("CpG location")+ xlab("CpG count")+ theme_classic(base_size = 35)+ theme(strip.text = element_text(size = 30), axis.text.x = element_text(size=30, angle = 45, hjust = 1), legend.position = "none", legend.text = element_text(size = 26), legend.title = element_text(size=32))+labs(fill="Direction of association")+scale_x_continuous(expand = expansion(mult = c(0.1, .3))) ggsave("Gene region EWAS of phylogenetic tree.pdf", p, width = 25, height = 6) # distance to tss tssPlots <- plyr::llply(phyResults, function(x){ top <- x %>% top_n(500, -PIC.variance.Z) x <- x %>% mutate(strong.phylogenetic.signal = ifelse(CGid%in%top$CGid, "top.hits", "remaning.CpGs"))%>% dplyr::rename(EWAS_z=PIC.variance.Z) %>% left_join(geneMap) %>% mutate(strong.phylogenetic.signal=factor(strong.phylogenetic.signal, levels = c( "top.hits", "remaning.CpGs"))) p <- x %>% mutate(region = factor(main_Categories, levels = rev(c("Intergenic_upstream", "fiveUTR", "Promoter", "Exon", "Intron", "threeUTR", "Intergenic_downstream")))) %>% ggplot(aes(x=distanceToTSS, fill=strong.phylogenetic.signal))+geom_density(alpha=0.5)+facet_wrap(~region, scales = "free", nrow = 1)+ggtitle(top$Tissue[1]) }) p2 <- ggpubr::ggarrange(plotlist = tssPlots, nrow = 3, ncol = 1, align = "v", common.legend = T, legend = "right") pdf(file = "Distance to TSS EWAS of phyloepigenetic tree.pdf", width = 25, height = 10) p2 dev.off() # promoter pattern distance to tss tssPlots <- plyr::llply(phyResults, function(x){ top <- x %>% top_n(500, -PIC.variance.Z) x <- x %>% mutate(strong.phylogenetic.signal = ifelse(CGid%in%top$CGid, "top.hits", "remaning.CpGs"))%>% dplyr::rename(EWAS_z=PIC.variance.Z) %>% left_join(geneMap) %>% mutate(strong.phylogenetic.signal=factor(strong.phylogenetic.signal, levels = c( "top.hits", "remaning.CpGs")))%>% mutate(distanceToTSS2=sign(distanceToTSS)*log10(abs(distanceToTSS))) %>% mutate(distanceToTSS2=ifelse(is.na(distanceToTSS2),0, distanceToTSS2)) p <- x %>% mutate(region = ifelse(distanceToTSS> -1000& distanceToTSS <1000, "Promoter [-1000,1000]",ifelse(distanceToTSS> -10000& distanceToTSS < -1000, "Promoter [-10000,-1000]",ifelse(distanceToTSS> 1000& distanceToTSS < 10000, "Promoter [1000,10000]",main_Categories)))) %>% mutate(region = factor(region, levels = c("Intergenic_upstream", "Promoter [-10000,-1000]","Promoter [-1000,1000]","Promoter [1000,10000]", "fiveUTR", "Exon", "Intron", "threeUTR", "Intergenic_downstream"))) %>% ggplot(aes(x=distanceToTSS2, fill=strong.phylogenetic.signal))+geom_density(alpha=0.5)+ggtitle(top$Tissue[1])+xlab("signed log10 of the distance to TSS")+geom_vline(xintercept = c(-3.3, 3.3, 6.36), color="blue", linetype="dashed")+annotate(geom = "text", x=0, y=0.4, label="Promoter\n(-2000 to 2000 of TSS)", size=2.5)+annotate(geom = "text", x=4.5, y=0.4, label="Gene bodies\nor downstream\n regions", size=2.5)+annotate(geom = "text", x= -5, y=0.4, label="Distal\nupstream", size=2.5)+ylab("density") #+facet_wrap(~region, scales = "free", nrow = 1) }) p2 <- ggpubr::ggarrange(plotlist = tssPlots, nrow = 1, ncol = 3, align = "v", common.legend = T, legend = "right") pdf(file = "Log Distance to TSS EWAS of phyloepigenetic tree.pdf", width = 15, height = 3) p2 dev.off() # distance to tss promoter tssPlots <- plyr::llply(phyResults, function(x){ top <- x %>% top_n(500, -PIC.variance.Z) x <- x %>% mutate(strong.phylogenetic.signal = ifelse(CGid%in%top$CGid, "top.hits", "remaning.CpGs"))%>% dplyr::rename(EWAS_z=PIC.variance.Z) %>% left_join(geneMap) %>% mutate(strong.phylogenetic.signal=factor(strong.phylogenetic.signal, levels = c( "top.hits", "remaning.CpGs"))) p <- x %>% filter(distanceToTSS> -10000, distanceToTSS<10000) %>% mutate(region = "Promoter") %>% ggplot(aes(x=distanceToTSS, fill=strong.phylogenetic.signal))+geom_density(alpha=0.5)+facet_wrap(~region, scales = "free", nrow = 1)+ggtitle(top$Tissue[1]) }) p2 <- ggpubr::ggarrange(plotlist = tssPlots, nrow = 1, ncol = 3, align = "v", common.legend = T, legend = "right") pdf(file = "Distance to TSS EWAS of phyloepigenetic tree promoter.pdf", width = 15, height = 3) p2 dev.off() # CpG island tssPlots <- plyr::llply(phyResults, function(x){ top <- x %>% top_n(500, -PIC.variance.Z) x <- x %>% mutate(strong.phylogenetic.signal = ifelse(CGid%in%top$CGid, "top.hits", "remaning.CpGs"))%>% dplyr::rename(EWAS_z=PIC.variance.Z) %>% left_join(geneMap) %>% mutate(strong.phylogenetic.signal=factor(strong.phylogenetic.signal, levels = c( "top.hits", "remaning.CpGs")))%>% mutate(distanceToTSS2=sign(distanceToTSS)*log10(abs(distanceToTSS))) %>% mutate(distanceToTSS2=ifelse(is.na(distanceToTSS2),0, distanceToTSS2)) %>% mutate(CpGisland=ifelse(is.na(CpGisland), "No.CpGisland",CpGisland)) p <- x %>% mutate(region = ifelse(distanceToTSS> -1000& distanceToTSS <1000, "Promoter [-1000,1000]",ifelse(distanceToTSS> -10000& distanceToTSS < -1000, "Promoter [-10000,-1000]",ifelse(distanceToTSS> 1000& distanceToTSS < 10000, "Promoter [1000,10000]",main_Categories)))) %>% mutate(region = factor(region, levels = c("Intergenic_upstream", "Promoter [-10000,-1000]","Promoter [-1000,1000]","Promoter [1000,10000]", "fiveUTR", "Exon", "Intron", "threeUTR", "Intergenic_downstream"))) %>% ggplot(aes(x=distanceToTSS2, fill=strong.phylogenetic.signal))+geom_density(alpha=0.5)+ggtitle(top$Tissue[1])+xlab("signed log10 of the distance to TSS")+geom_vline(xintercept = c(-3.3, 3.3, 6.36), color="blue", linetype="dashed")+annotate(geom = "text", x=0, y=0.4, label="Promoter\n(-2000 to 2000 of TSS)", size=2.5)+annotate(geom = "text", x=4.5, y=0.4, label="Gene bodies\nor downstream\n regions", size=2.5)+annotate(geom = "text", x= -5, y=0.4, label="Distal\nupstream", size=2.5)+ylab("density")+facet_wrap(~CpGisland, nrow = 1) }) p2 <- ggpubr::ggarrange(plotlist = tssPlots, nrow = 1, ncol = 3, align = "v", common.legend = T, legend = "right") pdf(file = "Log Distance to TSS EWAS of phyloepigenetic tree CpG islands.pdf", width = 25, height = 3) p2 dev.off() # proportion test by CpG island geneMap2 <- geneMap %>% filter(CGid%in%results$CGid)%>% mutate(CpGisland=ifelse(is.na(CpGisland), "No.CpGisland",CpGisland)) %>% mutate(main_Categories=paste(CpGisland, main_Categories, sep = ".")) total <- geneMap2%>% group_by(main_Categories) %>% tally()%>% dplyr::rename(total=n) plots <- lapply(1:3, function(x){ propTest <- phyResultsTop[[x]]%>% mutate(CpGisland=ifelse(is.na(CpGisland), "No.CpGisland",CpGisland)) %>% mutate(main_Categories=paste(CpGisland, main_Categories, sep = "."))%>% mutate(dir= sign(EWAS_z)) %>% mutate(dir=factor(dir, levels=c(-1,1),labels = c("decrease", "increase"))) %>% group_by(dir,main_Categories)%>% tally() %>% dplyr::rename(sigs=n)%>% group_by(dir)%>% mutate(allSig = sum(sigs)) %>% left_join(total, by="main_Categories")%>% mutate(allProbes = sum(total)) %>% ungroup() propTest$fisher.exact.pval <- apply(propTest, 1, function(j){ x <- as.numeric(unlist(c(j[c(3,4)], j[c(5,6)]))) x <- matrix(x, nrow = 2, ncol = 2, byrow = F) a <- epitools::oddsratio.wald(x)$p.value[2,2] }) propTest$OR <- apply(propTest, 1, function(j){ x <- as.numeric(unlist(c(j[c(3,4)], j[c(5,6)]))) x <- matrix(x, nrow = 2, ncol = 2, byrow = F) epitools::oddsratio.wald(x)$measure[2,1] }) propTest <- propTest %>% mutate(OR2 =round(OR, 2)) %>% mutate(p = ifelse(fisher.exact.pval<1e-4&OR<1, "****", ifelse(fisher.exact.pval<1e-3&OR<1, "***", ifelse(fisher.exact.pval<1e-2&OR<1, "**",ifelse(fisher.exact.pval<0.05&OR<1, "*","")))))%>% mutate(p = ifelse(fisher.exact.pval<1e-4&OR>1, "####", ifelse(fisher.exact.pval<1e-3&OR>1, "###", ifelse(fisher.exact.pval<1e-2&OR>1, "##",ifelse(fisher.exact.pval<0.05&OR>1, "#",p))))) %>% mutate(OR2= ifelse(p!="", OR2, "")) %>% mutate(text = paste(OR2,p, sep = ""))%>% mutate(region = factor(main_Categories, levels = rev(c("CpGisland.Intergenic_upstream", "CpGisland.Promoter", "CpGisland.fiveUTR", "CpGisland.Exon", "CpGisland.Intron", "CpGisland.threeUTR", "CpGisland.Intergenic_downstream","No.CpGisland.Intergenic_upstream", "No.CpGisland.Promoter", "No.CpGisland.fiveUTR", "No.CpGisland.Exon", "No.CpGisland.Intron", "No.CpGisland.threeUTR", "No.CpGisland.Intergenic_downstream")))) %>% mutate(sig2=max(sigs)) ## }) names(plots) <- names(phyResultsTop) input <- rbindlist(plots, use.names = T, idcol = "model") p <- input %>%ggplot( aes(x=sigs, y=region, fill=dir))+geom_bar(stat = "identity",position=position_dodge())+geom_text(aes(label=text, x=sigs), position = position_dodge(0.9), size=10)+facet_wrap(~model, nrow = 1, scales = "free_x")+scale_fill_manual(values = c("#FF9999"))+ ylab("CpG location")+ xlab("CpG count")+ theme_classic(base_size = 35)+ theme(strip.text = element_text(size = 30), axis.text.x = element_text(size=30, angle = 45, hjust = 1), legend.position = "none", legend.text = element_text(size = 26), legend.title = element_text(size=32))+labs(fill="Direction of association")+scale_x_continuous(expand = expansion(mult = c(0.1, .3))) ggsave("Gene region by Island EWAS of phylogenetic tree.pdf", p, width = 28, height = 9) # # proportion test per distnace to TSS geneMap2 <- geneMap %>% filter(CGid%in%rownames(bValsWithMonotremes)) geneMap2 <- geneMap2 %>% mutate(distanceGroup = Hmisc::cut2(distanceToTSS,m = 10))%>% #mutate(distanceGroup = cut(distanceToTSS, breaks = seq(min(distanceToTSS), max(distanceToTSS), by = 1000))) #mutate(distanceGroup=ifelse(distanceToTSS> -2000& distanceToTSS< -1000, "promoter1", ifelse(distanceToTSS> -1000& distanceToTSS< 0, "promoter2",ifelse(distanceToTSS> 0& distanceToTSS< 1000, "promoter3",ifelse(distanceToTSS> 1000& distanceToTSS< 2000, "promoter4", distanceGroup)))))%>% filter(!is.na(distanceGroup)) total <- geneMap2 %>% group_by(distanceGroup) %>% add_count(name = "total") %>% group_by(distanceGroup) %>% summarize(total=unique(total), distanceToTSS=mean(distanceToTSS, na.rm = T)) geneMap2 <- geneMap2 %>% filter(distanceGroup%in%total$distanceGroup) plots <- lapply(1:3, function(y){ propTest <- phyResultsTop[[y]]%>% mutate(dir= sign(EWAS_z)) %>% mutate(dir=factor(dir, levels=c(-1,1),labels = c("decrease", "increase"))) %>% left_join((geneMap2[c("CGid", "distanceGroup")]))%>% filter(!is.na(distanceGroup)) %>% group_by(dir,distanceGroup)%>% tally() %>% dplyr::rename(sigs=n)%>% group_by(dir)%>% mutate(allSig = sum(sigs)) %>% left_join(total) %>% mutate(allProbes = sum(total)) %>% ungroup() %>%relocate(distanceToTSS,.after="allProbes") propTest$fisher.exact.pval <- apply(propTest, 1, function(j){ x <- as.numeric(unlist(c(j[c(3,4)], j[c(5,6)]))) x <- matrix(x, nrow = 2, ncol = 2, byrow = F) a <- epitools::oddsratio.wald(x)$p.value[2,2] }) propTest$OR <- apply(propTest, 1, function(j){ x <- as.numeric(unlist(c(j[c(3,4)], j[c(5,6)]))) x <- matrix(x, nrow = 2, ncol = 2, byrow = F) epitools::oddsratio.wald(x)$measure[2,1] }) p <- propTest %>% mutate(distanceToTSS2=sign(distanceToTSS)*log10(abs(distanceToTSS))) %>% mutate(distanceToTSS2=ifelse(is.na(distanceToTSS2),0, distanceToTSS2))%>% ggplot(aes(x=distanceToTSS2,y=log(OR)))+geom_point(data = .%>%filter(fisher.exact.pval<0.05),alpha=0.5, color="red", size=4)+geom_point(data = .%>%filter(fisher.exact.pval<0.05),alpha=0.5, color="red", size=4)+geom_smooth()+ggtitle(names(phyResultsTop)[y])+xlab("signed log10 of the distance to TSS")+geom_vline(xintercept = c(-3.3, 3.3, 6.36), color="blue", linetype="dashed")+annotate(geom = "text", x=0, y=0.4, label="Promoter\n(-2000 to 2000 of TSS)", size=2.5)+annotate(geom = "text", x=4.5, y=0.4, label="Gene bodies\nor downstream\n regions", size=2.5)+annotate(geom = "text", x= -5, y=0.4, label="Distal\nupstream", size=2.5)+ylab("Log(odds ratio for phylogenetic signal)")+theme_bw()+geom_hline(yintercept = 0) }) p2 <- ggpubr::ggarrange(plotlist = plots, nrow = 1, ncol = 3, align = "v", common.legend = T, legend = "right") pdf(file = "Distance to TSS EWAS of phyloepigenetic tree, Odds ratio.pdf", width = 14, height = 3) p2 dev.off() ``` ```{r GREAT enrichment} source("~/Google drive/My Drive/Amin documents/Steve projects/Research projects/Codes, and protocols/ChromHMM enrichment/ChromHMM enrichment Source Code.R") meanDatPerTissue <- readRDS("meanDatPerTissue for phylogenetic analysis of CpGs.RDS") phyResults <- readRDS("EWAS of phyloepigenetic tree.RDS") phyResultsTop <- plyr::llply(phyResults, function(x){ x <- x %>% top_n(500, -PIC.variance.Z) %>% left_join(geneMap) %>% dplyr::rename(EWAS_z=PIC.variance.Z) }) geneMap2 <- geneMap %>% filter(CGid%in%rownames(bValsWithMonotremes)) library("rGREAT") human = readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS") %>% filter(CGid%in%geneMap2$CGid) background <- human %>% dplyr::select(seqnames, CGstart, CGend, CGid) %>% filter(!is.na(CGstart)) %>% dplyr::rename(CHR = seqnames) # prepare input for enrichment analysis input <- plyr::llply(phyResultsTop, function(x){ d <- x %>% dplyr::select(CGid) %>% left_join(background) %>% relocate(CGid, .after = CGend) %>% filter(!is.na(CGstart)) %>% setnames(new = c("chr", "start", "end", "CGid")) }) result <- plyr::llply(input, function(x){ job <- submitGreatJob(x, bg = background, species = "hg19", includeCuratedRegDoms = TRUE, rule = c("basalPlusExt"), adv_upstream = 5.0, adv_downstream = 1.0, adv_span = 50, request_interval = 0, version="3.0.0", max_tries = 10) ontology.all=availableOntologies(job) output.all <- plyr::llply(ontology.all, function(j){ cat(paste(j,"\n", sep = " ")) out0.list = tryCatch(getEnrichmentTables(job, download_by="tsv",ontology=j),error=function(e){NULL}) if(!is.null(out0.list)){ out0.list <- out0.list[[1]] %>% dplyr::select(-FgRegionNames, -BgRegionNames, -BgGeneNames) %>% filter(HyperP<0.05) } else { out0.list=NULL} }) output.all <- plyr::compact(output.all) %>% rbindlist(., use.names = FALSE, fill = FALSE) }) result2 <- result %>% rbindlist(idcol = "Tissue") write.csv(result2, "PhyloSignal CpGs enrichment results.csv") results.combined <- read.csv("PhyloSignal CpGs enrichment results.csv") GOterms <- results.combined %>% filter(Ontology%in% "GO Biological Process") scores <- setNames(-log10(GOterms$HyperP), GOterms$ID) library(rrvgo) simMatrix <- calculateSimMatrix(GOterms$ID, orgdb="org.Hs.eg.db", ont="BP", method="Rel") reducedTerms <- reduceSimMatrix(simMatrix, scores, threshold=0.7, orgdb="org.Hs.eg.db") reducedTerms <- reducedTerms %>% dplyr::select(go, parentTerm) %>% dplyr::rename(ID=go) %>% right_join(GOterms) reducedTerms <- reducedTerms %>% filter(!is.na(parentTerm)) %>% group_by(Tissue, parentTerm) %>% top_n(1, -HyperP) write.csv(reducedTerms, "PhyloSignal CpGs reduced terms.csv") ## results.combined <- read.csv("PhyloSignal CpGs enrichment results.csv") reducedTerms <- read.csv("PhyloSignal CpGs reduced terms.csv") results.combined <- results.combined %>%filter(!Ontology%in% "GO Biological Process") %>% bind_rows(reducedTerms) %>% mutate(Desc = ifelse(is.na(parentTerm), Desc, parentTerm)) results.combined2 <- results.combined %>% filter(Ontology%in%c("GO Biological Process", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway","HGNC Gene Families", "MSigDB Oncogenic Signatures", "MSigDB Predicted Promoter Motifs")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Promoter Motifs", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>% group_by(Tissue, datasets) %>% filter(HyperP<0.005)%>% filter(NumFgGenesHit>3) %>% top_n(3, -log10(HyperP)) results.combined3 <- results.combined[grep("(mortality)|(aging)|(survival)|(perinatal lethality)|(Motif AWAATTRG matches HOXA4)|(regulation of RNA metabolic process)|(HOX)|(HOXL)", results.combined$Desc),] %>% filter(Ontology%in%c("GO Biological Process", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway", "HGNC Gene Families", "MSigDB Oncogenic Signatures", "MSigDB Predicted Promoter Motifs" )) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Promoter Motifs", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>%filter(HyperFdrQ<0.05) %>% filter(NumFgGenesHit>5) results.combined4 <- results.combined%>% filter(Ontology%in%c("GO Biological Process", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway", "HGNC Gene Families", "MSigDB Predicted Promoter Motifs")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Promoter Motifs", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology)))))%>%filter(Desc %in% results.combined3$Desc)%>% filter(HyperP<0.005)%>% filter(NumFgGenesHit>5) results.combined5 <- rbind(results.combined2, results.combined4) combinedEnichemt <- results.combined5 %>% dplyr::rename(pValue = HyperP, nCommonGenes = NumFgGenesHit) %>% mutate(class = Tissue) %>% mutate(blank = "") %>% mutate(name = ifelse(datasets == "Promoter Motifs", ID, Desc)) %>% mutate(datasets = factor(datasets, levels=c("Gene ontology", "Canonical pathways", "Diseases", "Mouse Phenotype", "Human Phenotype", "Promoter Motifs", "HGNC Gene Families", "MSigDB Predicted Promoter Motifs"))) %>% droplevels() %>% dplyr::select(class, pValue, name, datasets)%>% filter(!datasets%in%c("Diseases", "Human Phenotype")&!name%in%c("abnormal appendicular skeleton morphology", "complete perinatal lethality", "Abnormality of the digits")) %>% mutate(name=ifelse(name =="partial perinatal lethality", "perinatal lethality", name))%>% mutate(name=ifelse(name =="modulation of process of other organism involved in symbiotic interaction", "symbiotic interaction", name)) %>% filter(!is.na(datasets)) # enrichTable <- combinedEnichemt %>% droplevels() %>% filter(!is.na(datasets)) enrichTable$name = with(enrichTable, reorder(name, -pValue)) cols <- brewer.pal(n = 8, name = 'Set2') max <- max(-log10(enrichTable$pValue)) p4 <- enrichTable %>% ggplot(aes(y = name, x=-log10(pValue), color = datasets))+ geom_point(size=8)+ #geom_bar(stat = "identity")+ geom_segment(aes(xend=-log10(pValue), x=0, yend=name), color="darkgrey")+ theme_bw()+ ylab(label = "top datasets")+ theme_classic(base_size = 25)+ facet_wrap(.~class, nrow = 1)+ guides(shape = guide_legend(color = guide_legend(override.aes = list(size = 0.2))))+ theme(axis.text.x = element_text(size=25, angle =45, hjust = 0.5), axis.text.y = element_text(size=40), plot.margin = margin(0.5, 1, 0, 1, "cm"), plot.title = element_text(size=35, hjust = 0.5, face="bold"), axis.title.x = element_text(size=30), axis.ticks.x = element_blank(), strip.text = element_text(size=23), axis.title.y = element_blank(), legend.text = element_text(size=17), legend.title = element_text(size=12), panel.spacing = unit(0.1, "lines"), legend.key = element_rect(fill = "grey", color = NA), legend.position=c(0.99,0.94),legend.justification=c(1,1), legend.direction="vertical", legend.box="horizontal", legend.box.just = c("top"), )+ ggpubr::grids(axis = "xy", size = 1)+labs(color="Datasets")+xlab("-log10(Pvalue)") #+scale_color_manual(values = cols[1:3]) pdf(file = "enrichment great, PhyloSignal CpGs.pdf", width = 26, height = 14) p4 dev.off() ``` ```{r EWAS of phylogeny based on Order} meanDatPerTissue <- readRDS("meanDatPerTissue for phylogenetic analysis of CpGs.RDS") phylogenyPerOrder <- readRDS("EWAS of phyloepigenetic tree per Order.RDS") distancePlots <- lapply(1:3, function(t){ results <- as.data.frame(phylogenyPerOrder[[t]]) dat0 <- meanDatPerTissue[[t]]$dat samp <- meanDatPerTissue[[x]]$samp sum <- samp %>% filter(!duplicated(SpeciesLatinName))%>% group_by(Order) %>% tally()%>% filter(n>20) orders <- as.vector(sum$Order) res <- lapply(1:length(orders), function(o){ samp2 <- samp %>% filter(Order%in%orders[o]) selectedCpGs <- results$CGid[which(results$Order==orders[x]&results$PIC.variance.P>0.05)] nCpGs <- length(selectedCpGs) tis <- names(meanDatPerTissue)[t] tis <- paste(tis, "\n(n selected CpGs = ", nCpGs,"; z threshold = ",tunedZ, ")" ,sep = "") dat.selected <- dat0[selectedCpGs, which(names(dat0)%in%samp2$labelsCaesar)] i <- phyloTree$tip.label[which(phyloTree$tip.label%in%names(dat.selected))] phyloTreeSubset <- castor::get_subtree_with_tips(phyloTree, only_tips = i) subTree <- phyloTreeSubset$subtree dat.selected <- dat.selected[,subTree$tip.label] corSpecies <- corAndPvalue(dat.selected) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) #corDist2 <- dist(corSpecies$cor, method = "minkowski") speciesTree2 <- hclust(corDist2, method = "average") # distances <- cophenetic(subTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) phyloEpiDistances <- cophenetic(speciesTree2) phyloEpiDistances <- as.data.frame(as.matrix(phyloEpiDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) %>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200))%>% mutate(residuals = residuals(lm(PhyloDist~dist))) %>% mutate(outlier= ifelse(abs(residuals)>80, "outlier", "goodMatch")) p1 <- phyloEpiDistances %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(size=6, color="blue")+theme_classic(base_size = 16)+ylab("Phlogenetic tree distance\n(million years)")+xlab("DNAm tree distance")+labs(size="Number of overlapped\npoints")+theme(axis.text.y = element_text(size=20))+scale_color_manual(values=c("black","red"))+ggtitle(tis) }) }) p2 <- ggpubr::ggarrange(plotlist = distancePlots, nrow = 1, ncol = 3, align = "v", common.legend = T, legend = "right") pdf(file = "Distance plots EWAS of phyloepigenetic tree per Order.pdf", width = 25, height = 5) p2 dev.off() ``` ```{r EWAS of phylogeny based on mappability} phyloTree <- ape::read.nexus("Mammalia_species") phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" #phyloTree <- ape::read.nexus("speciesTimeTree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # mappability <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mappability file. new annotations.Amin.V2.RDS") detectableProbesInAllSamples <- readRDS("detectable probes in all species p<0.01.RDS") detectableProbesInAllSamples2 <- readRDS("detectable probes in all species p<0.05.RDS") nonDetectableProbes <- readRDS("nondetectable probes in all species p>0.3.RDS") targets <- colnames(mappability)[-1] targets <- gsub("(Bat\\.)|(Dog\\.)|(Cat\\.)|(Cattle\\.)|(Elephant\\.)|(Horse\\.)|(Human\\.)|(Killer whale\\.)|(Marmot\\.alpine\\.)|(Marmot\\.yellow-bellied\\.)|(Mouse\\.)|(Olive baboon\\.)|(Opossum\\.)|(Pig\\.)|(Prairie_vole\\.)|(Rat\\.)|(Rhesus_Macaque\\.)|(Sheep\\.)|(Tasmanian_devil\\.)|(White tailed deer\\.)|(Beluga_whale\\.)|(Asian_elephant\\.)|(Marmoset\\.)|(Vervet_monkey\\.)", "", targets) targets <- str_to_sentence(targets) speciesLatinName <- str_extract("(^\\w+\\.)|(^\\w+v4)", string = targets) speciesLatinName <- gsub("\\.", "", speciesLatinName) speciesLatinName <- gsub("_v+[1-9]", "", speciesLatinName) speciesLatinName <- gsub("Canfam3", "Canis_familiaris", speciesLatinName) speciesLatinName <- gsub("_9$", "", speciesLatinName) speciesLatinName <- gsub("_", " ", speciesLatinName) names(mappability) <- c("CGid", speciesLatinName) samp <- data.frame("SpeciesLatinName"=speciesLatinName) %>% filter(SpeciesLatinName%in%species$SpeciesLatinName&!duplicated(SpeciesLatinName)) %>% left_join(species) %>% filter(samp$labelsCaesar%in%phyloTree$tip.label) mappability <- mappability%>% dplyr::select(CGid, samp$SpeciesLatinName) %>% tibble::column_to_rownames(var = "CGid") mappability[mappability=="yes"] <- 1 mappability[mappability=="no"] <- 0 mappability <- mappability %>%mutate_all(as.numeric) %>% filter(rowSums(.)>0) names(mappability) <- samp$labelsCaesar i <- phyloTree$tip.label[which(phyloTree$tip.label%in%samp$labelsCaesar)] phyloTreeSubset <- castor::get_subtree_with_tips(phyloTree, only_tips = i) # mappability <- mappability %>% dplyr::select(phyloTreeSubset$subtree$tip.label) subTree <- phyloTreeSubset$subtree i <- which(rowSums(mappability)==111) mappability <- mappability[-i,] resultsForMappability <- bettermc::mclapply(1:nrow(mappability), function(x){ d <- unlist(mappability[x,]) a <- picante::phylosignal(d[subTree$tip.label], subTree, reps = 999) }, mc.cores = 4) names(resultsForMappability) <- rownames(mappability) resultsForMappability <- rbindlist(resultsForMappability, idcol = "CGid") ``` ```{r } meanDatPerTissue <- readRDS("meanDatPerTissue for phylogenetic analysis of CpGs.RDS") results <- lapply(1:3, function(x){ dat0 <- meanDatPerTissue[[x]]$dat corSpecies <- corAndPvalue(dat0) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) speciesTree2 <- makeNodeLabel(as.phylo(hclust(corDist2, method = "average")), method = "number") samp <- meanDatPerTissue[[x]]$samp a <-getMRCA(speciesTree2, c("Homo_sapiens", "Procavia_capensis")) }) ``` ```{r} d1 <- specDist %>% filter(Tissue=="Blood"&outlier=="outlier") %>% mutate(spec1 = str_extract(pattern = "([A-Z][a-z]+)_([a-z]+)", ID))%>% mutate(spec1=gsub("_", " ", spec1))%>%droplevels() %>% group_by(spec1) %>% tally() %>% filter(n>10)%>% left_join(sumSample, by=c(spec1="SpeciesLatinName")) d2 <- specDist %>% filter(Tissue=="Liver"&outlier=="outlier") %>% mutate(spec1 = str_extract(pattern = "([A-Z][a-z]+)_([a-z]+)", ID))%>% mutate(spec1=gsub("_", " ", spec1))%>%droplevels() %>% group_by(spec1) %>% tally() %>% filter(n>20)%>% left_join(sumSample, by=c(spec1="SpeciesLatinName")) d3 <- specDist %>% filter(Tissue=="Skin"&outlier=="outlier") %>% mutate(spec1 = str_extract(pattern = "([A-Z][a-z]+)_([a-z]+)", ID))%>% mutate(spec1=gsub("_", " ", spec1))%>%droplevels() %>% group_by(spec1) %>% tally() %>% filter(n>10)%>% left_join(sumSample, by=c(spec1="SpeciesLatinName")) ``` ```{r save the corrlation matrices} samp <- samplesWithMonotremes %>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order))%>%mutate(Tissue2 = ifelse(Tissue%in%c("Cortex", "Neocortex", "Striatum", "SVZ", "FrontalCortex", "Hippocampus", "OccipitalCortex", "Substantia nigra", "WholeBrain", "Brain", "Cerebellum", "Hypothalamus", "TemporalCortex"), "Brain", as.character(Tissue))) %>% mutate(orCol = as.character(factor(Order, levels= levels(Order), labels = orCols))) tissueCount <- samp%>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order))%>%group_by(Tissue2,SpeciesLatinName)%>%tally()%>%filter(n>2)%>% group_by(Tissue2) %>%tally()%>%filter(n>40) samp <- samp %>% filter(Tissue%in%tissueCount$Tissue2) allData <- bValsWithMonotremes %>% dplyr::select(samp$Basename) %>% tibble::rownames_to_column(var="CGid") %>% gather(key = "Basename", value = "bval", -CGid) %>% left_join(samp[,c("Basename", "SpeciesLatinName", "Tissue", "spec")]) %>% group_split(Tissue) names(allData) <- sapply(allData, function(x){x$Tissue[1]}) # save mean values per species and tissues plyr::llply(1:length(allData), function(x){ tis <- allData[[x]]$Tissue[1] datColors <- samp %>% filter(SpeciesLatinName%in%allData[[x]]$SpeciesLatinName)%>% dplyr::select(SpeciesLatinName, Order)%>% filter(!duplicated(SpeciesLatinName)) %>% mutate(SpeciesLatinName = make.unique(SpeciesLatinName)) %>% tibble::column_to_rownames(var = "SpeciesLatinName") dat <- allData[[x]] %>% group_by(SpeciesLatinName, CGid) %>% summarise(bval = mean(bval))%>% spread(key = SpeciesLatinName, value = bval) %>% tibble::column_to_rownames(var = "CGid") write.csv(dat, paste("Mean methylation values per species in ", tis, ".csv", sep = "")) }) plyr::llply(1:length(allData), function(x){ tis <- allData[[x]]$Tissue[1] datColors <- samp %>% filter(SpeciesLatinName%in%allData[[x]]$SpeciesLatinName)%>% dplyr::select(SpeciesLatinName, Order)%>% filter(!duplicated(SpeciesLatinName)) %>% mutate(SpeciesLatinName = make.unique(SpeciesLatinName)) %>% tibble::column_to_rownames(var = "SpeciesLatinName") dat <- allData[[x]] %>% group_by(SpeciesLatinName, CGid) %>% summarise(bval = mean(bval))%>% spread(key = SpeciesLatinName, value = bval) %>% tibble::column_to_rownames(var = "CGid") corSpecies <- corAndPvalue(dat)$cor write.csv(dat, paste("Pearson correlation matrices for mammals in ", tis, ".csv", sep = "")) }) plyr::llply(1:length(allData), function(x){ tis <- allData[[x]]$Tissue[1] datColors <- samp %>% filter(SpeciesLatinName%in%allData[[x]]$SpeciesLatinName)%>% dplyr::select(SpeciesLatinName, Order)%>% filter(!duplicated(SpeciesLatinName)) %>% mutate(SpeciesLatinName = make.unique(SpeciesLatinName)) %>% tibble::column_to_rownames(var = "SpeciesLatinName") dat <- allData[[x]] %>% group_by(SpeciesLatinName, CGid) %>% summarise(bval = mean(bval))%>% spread(key = SpeciesLatinName, value = bval) %>% tibble::column_to_rownames(var = "CGid") corSpecies <- corAndPvalue(dat, method="spearman")$cor write.csv(dat, paste("Spearman correlation matrices for mammals in ", tis, ".csv", sep = "")) }) plyr::llply(1:length(allData), function(x){ tis <- allData[[x]]$Tissue[1] datColors <- samp %>% filter(SpeciesLatinName%in%allData[[x]]$SpeciesLatinName)%>% dplyr::select(SpeciesLatinName, Order)%>% filter(!duplicated(SpeciesLatinName)) %>% mutate(SpeciesLatinName = make.unique(SpeciesLatinName)) %>% tibble::column_to_rownames(var = "SpeciesLatinName") dat <- allData[[x]] %>% group_by(SpeciesLatinName, CGid) %>% summarise(bval = mean(bval))%>% spread(key = SpeciesLatinName, value = bval) %>% tibble::column_to_rownames(var = "CGid") corSpecies <- bicorAndPvalue(dat)$bicor write.csv(dat, paste("Bicor correlation matrices for mammals in ", tis, ".csv", sep = "")) }) ``` ## Sensetivity analysis, phyloepigenetic tree based on detection pvalue ```{r figure for detection pvalue} miceadds::load.Rdata("~/Steve Horvath Lab Dropbox/Amin Haghani/MammalianMethCombined/StuffCaesar/NormalizedData/detectionP_combined_sesame.RData", objname = "detectionPval") i <- which(samplesWithMonotremes$Basename%in%names(detectionPval)) sampsForDetPval <- samplesWithMonotremes[i,] detectionPval <- detectionPval %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes")]) %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(sampsForDetPval$Basename) %>% tibble::rownames_to_column(var = "CGid") targets <- unique(sampsForDetPval$SpeciesLatinName) sumDetectionPval <- lapply(targets, function(x){ i <- which(sampsForDetPval$SpeciesLatinName==x) dat <- detectionPval %>% dplyr::select(sampsForDetPval[i,]$Basename) dat <- -log10(apply(dat, 1, median)) }) sumDetectionPval <- bind_cols(sumDetectionPval) %>% setnames(new = targets) %>% mutate(CGid=detectionPval$CGid) %>% relocate(CGid, 1)%>% tibble::column_to_rownames(var = "CGid") sumSampsForDetPval <- sampsForDetPval %>% filter(!duplicated(SpeciesLatinName)) %>% arrange(Order)%>% mutate(Clade =ifelse(Order%in%marsupials, "marsupials", ifelse(Order%in%monotremes, "monotremes", "eutherians"))) sumDetectionPval <- sumDetectionPval %>% dplyr::select(sumSampsForDetPval$SpeciesLatinName) annot1 <- sumSampsForDetPval %>% dplyr::select(SpeciesLatinName, Order) %>% left_join(orColsData) annot1 <- annot1$colors names(annot1) <- sumSampsForDetPval$Order annot2 <- sumSampsForDetPval %>% dplyr::select(SpeciesLatinName, Clade) %>% mutate(Clade=as.factor(Clade)) %>% mutate(colors=as.character(factor(Clade, labels = c("turquoise", "blue", "brown")))) annot2 <- annot2$colors names(annot2) <- sumSampsForDetPval$Clade col_fun = circlize::colorRamp2(breaks = c(min(log(sumSampsForDetPval$maximum_age)), max(log(sumSampsForDetPval$maximum_age))), colors = c("blue", "red")) ha = HeatmapAnnotation(Clade = names(annot2), Order = names(annot1), log_maximumAge=log(sumSampsForDetPval$maximum_age), col = list(Clade=annot2,Order=annot1, log_maximumAge=col_fun), show_legend = F) colPalette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256)) sumDetectionPval[sumDetectionPval=="Inf"] <- NA m <- max(sumDetectionPval, na.rm = T) sumDetectionPval[is.na(sumDetectionPval)] <- m detPHeatmap <-ComplexHeatmap::Heatmap(sumDetectionPval, col = colPalette, show_column_names = TRUE, name = "-log10 Median\ndetection pvalue", show_row_names = F, column_dend_reorder = T, cluster_columns = T, cluster_rows = T, row_dend_reorder=T, clustering_distance_columns="pearson",clustering_method_columns = "average", border = TRUE, bottom_annotation = ha, show_row_dend = F, show_column_dend = T, column_names_gp = grid::gpar(fontsize = 8)) lgd_order = Legend(labels = names(orCols), title = "Order", legend_gp = gpar(fill = orCols)) lgd_clade = Legend(labels = c("eutherians", "marsupials", "monotremes"), title = "Clades", legend_gp = gpar(fill = c("turquoise", "blue", "brown"))) lgd_maxAge <- Legend(col_fun = col_fun, title = "log maximumAge") jpeg(file = "Heatmap detection pvalue 14705 probes.jpeg", width = 30, height = 20, units = "in", res = 300) draw(detPHeatmap, heatmap_legend_side = "left", annotation_legend_side="left", padding = unit(c(15, 10, 5, 15), "mm"), column_title="14705 conserved probes in Eutherians", column_title_gp=gpar(fontsize=17, fontface="bold"),heatmap_legend_list = list(lgd_clade,lgd_order,lgd_maxAge)) dev.off() ``` ```{r selecting probes that does are detectable in all} miceadds::load.Rdata("~/Steve Horvath Lab Dropbox/Amin Haghani/MammalianMethCombined/StuffCaesar/NormalizedData/detectionP_combined_sesame.RData", objname = "detectionPval") i <- which(samplesWithMonotremes$Basename%in%names(detectionPval)) sampsForDetPval <- samplesWithMonotremes[i,] detectionPval <- detectionPval %>% filter(CGid %in% mappability$CGid[which(mappability$mapToAllMammalsCor0.8=="yes")]) %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(sampsForDetPval$Basename) %>% tibble::rownames_to_column(var = "CGid") targets <- unique(sampsForDetPval$SpeciesLatinName) sumDetectionPval <- lapply(targets, function(x){ i <- which(sampsForDetPval$SpeciesLatinName==x) dat <- detectionPval %>% dplyr::select(sampsForDetPval[i,]$Basename) dat <- apply(dat, 1, median) }) sumDetectionPval <- bind_cols(sumDetectionPval) %>% setnames(new = targets) %>% mutate(CGid=detectionPval$CGid) %>% relocate(CGid, 1)%>% tibble::column_to_rownames(var = "CGid") sumSampsForDetPval <- sampsForDetPval %>% filter(!duplicated(SpeciesLatinName)) %>% arrange(Order)%>% mutate(Clade =ifelse(Order%in%marsupials, "marsupials", ifelse(Order%in%monotremes, "monotremes", "eutherians"))) sumDetectionPval <- sumDetectionPval %>% dplyr::select(sumSampsForDetPval$SpeciesLatinName) i <- which(rowSums(sumDetectionPval<0.01)==348) j <- which(rowSums(sumDetectionPval<0.05)==348) detectableProbesInAllSamples <- rownames(sumDetectionPval)[i] saveRDS(detectableProbesInAllSamples, "detectable probes in all species p<0.01.RDS") detectableProbesInAllSamples <- rownames(sumDetectionPval)[j] saveRDS(detectableProbesInAllSamples, "detectable probes in all species p<0.05.RDS") j <- which(rowSums(sumDetectionPval>0.3)>0) detectableProbesInAllSamples <- rownames(sumDetectionPval)[j] saveRDS(detectableProbesInAllSamples, "nondetectable probes in all species p>0.3.RDS") ``` ```{r phylogenetic analysis of all species} library(ggtree) require(treeio) library("WGCNA") detectableProbesInAllSamples <- readRDS("detectable probes in all species p<0.01.RDS") detectableProbesInAllSamples2 <- readRDS("detectable probes in all species p<0.05.RDS") nonDetectableProbes <- readRDS("nondetectable probes in all species p>0.3.RDS") miceadds::load.Rdata("~/Steve Horvath Lab Dropbox/Amin Haghani/MammalianMethCombined/StuffCaesar/NormalizedData/detectionP_combined_sesame.RData", objname = "detectionPval") i <- which(samplesWithMonotremes$Basename%in%names(detectionPval)) sampsForDetPval <- samplesWithMonotremes[i,] detectionPval <- detectionPval %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(sampsForDetPval$Basename) %>% tibble::rownames_to_column(var = "CGid") targets <- unique(sampsForDetPval$SpeciesLatinName) # sumDetectionPval <- lapply(targets, function(x){ # i <- which(sampsForDetPval$SpeciesLatinName==x) # dat <- detectionPval %>% dplyr::select(sampsForDetPval[i,]$Basename) # dat <- apply(dat, 1, median) # }) # sumDetectionPval <- bind_cols(sumDetectionPval) %>% setnames(new = targets) %>% mutate(CGid=detectionPval$CGid) %>% relocate(CGid, 1)%>% tibble::column_to_rownames(var = "CGid") # saveRDS(sumDetectionPval, "Median Detection Pvalue per species.RDS") # median beta values # dat <- bValsWithMonotremes %>% dplyr::select(sampsForDetPval$Basename) %>% tibble::rownames_to_column(var = "CGid") # sumBeta <- lapply(targets, function(x){ # i <- which(sampsForDetPval$SpeciesLatinName==x) # dat <- dat %>% dplyr::select(sampsForDetPval[i,]$Basename) # dat <- apply(dat, 1, median) # }) # sumBeta <- bind_cols(sumBeta) %>% setnames(new = targets) %>% mutate(CGid=dat$CGid) %>% relocate(CGid, 1)%>% tibble::column_to_rownames(var = "CGid") # saveRDS(sumBeta, "Median Beta per species.RDS") sumBeta <- readRDS("Median Beta per species.RDS") %>% mutate(across(everything(), ~abs(. -0.5))) sumDetectionPval <- readRDS("Median Detection Pvalue per species.RDS") #correlation of detectPval and Beta i <- sample(size = 100, x = 1:nrow(sumBeta), replace = F) dat <- sumDetectionPval[rownames(sumBeta)[i],]%>% mutate(across(everything(), ~-log10(.))) %>% tibble::rownames_to_column(var = "CGid")%>% gather(-CGid, key = "SpeciesLatinName", value = "DetPval") dat <- sumBeta[i,]%>% tibble::rownames_to_column(var = "CGid") %>% gather(-CGid, key = "SpeciesLatinName", value = "bval") %>% left_join(dat) p2 <- dat %>% ggplot(aes(x= bval, y=DetPval)) +geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor()+theme_classic(base_size = 12)+ggtitle("Beta vs Detection pvalue (-log10) in 5656 conserved probes")+xlab("abs(bval-0.5)") ggsave("Beta vs Detection pvalue in 5656 conserved probes.pdf", p2, width = 6, height = 6) # sumSampsForDetPval <- sampsForDetPval %>% filter(!duplicated(SpeciesLatinName)) %>% arrange(Order)%>% mutate(Clade =ifelse(Order%in%marsupials, "marsupials", ifelse(Order%in%monotremes, "monotremes", "eutherians"))) sumDetectionPvalMatrix <- sumDetectionPval %>% dplyr::select(sumSampsForDetPval$SpeciesLatinName) %>% mutate(across(everything(), ~ifelse(.<0.05, 1, 0))) plots <- lapply(1:4, function(x){ if(x==1){ map <- sumDetectionPvalMatrix } else if(x==2){ map <- sumDetectionPvalMatrix[rownames(bValsNoMars),] }else if(x==3){ map <- sumDetectionPvalMatrix[rownames(bValsAll),] }else if(x==4){ map <- sumDetectionPvalMatrix[rownames(bValsWithMonotremes),] } cat(paste(x)) corSpecies <- corAndPvalue(map) # 1-cor cor <- corSpecies$cor[,which(colSums(corSpecies$cor, na.rm = T)>0)] cor <- cor[complete.cases(cor),] cor <- cor[colnames(cor),] corDist2 <- as.dist(1-cor) speciesTree2 <- hclust(corDist2, method = "average") # species distances phyloTree <- ape::read.nexus("speciesTimeTree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # from the mappability data specDistances <- cophenetic(speciesTree2) specDistances <- as.data.frame(as.matrix(specDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200))%>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) }) targets <- c("all probes (37452)", "eutherian\n(14705 probes)", "eutherian and marsupial\n(7956 probes)", "mammals (+monotremes)\n(5656 probes)") names(plots) <- targets plots <- rbindlist(plots, idcol = "probes") p1 <- plots %>% distinct() %>% mutate(probes=factor(probes, levels = targets)) %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor()+theme_classic(base_size = 18)+facet_wrap(.~probes, nrow = 2)+ylab("Phlogenetic tree distance (million years)")+xlab("Trees based on binarized detection pvalue (p<0.05)")+labs(size="Number of overlapped points")+ expand_limits(x = 0, y = 0) jpeg("Det Pvalue tree.jpeg", width = 13, height = 7, units = "in", res = 300) p1 dev.off() # plots <- lapply(1:4, function(x){ if(x==1){ map <- sumDetectionPvalMatrix } else if(x==2){ map <- sumDetectionPvalMatrix[rownames(bValsNoMars),] }else if(x==3){ map <- sumDetectionPvalMatrix[rownames(bValsAll),] }else if(x==4){ map <- sumDetectionPvalMatrix[rownames(bValsWithMonotremes),] } corSpecies <- corAndPvalue(map) # 1-cor cor <- corSpecies$cor[,which(colSums(corSpecies$cor, na.rm = T)>0)] cor <- cor[complete.cases(cor),] cor <- cor[colnames(cor),] corDist2 <- as.dist(1-cor) speciesTree2 <- hclust(corDist2, method = "average") tree2 <- ggtree(speciesTree2, layout = "slanted")+ #geom_tiplab(size=7.5,aes(angle=0))+ theme_tree() s <- species %>% mutate(Order=paste(OrderNumberHorvath, Order, sep = ".")) d <- data.frame(label = tree2$data$label) %>% left_join(dplyr::select(.data=s, SpeciesLatinName, Order), by=c("label"="SpeciesLatinName")) plot <- tree2 %<+% d + geom_tiplab(aes(label=label, fill=Order),size=6,aes(angle=0), geom = "label", label.padding = unit(0.01, "lines"))+ #geom_hilight(mapping=aes(subset=node%in%node[which(!is.na(Order))],fill=Order))+ scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ theme(text = element_text(size = 15), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ guides(fill = guide_legend(ncol = 1))+ #coord_flip()+ ggtitle(paste(targets[x]))+ geom_treescale(linesize = 1, fontsize = 4, width = 0.05) #+ xlim(-0.5, NA) }) plots <- ggpubr::ggarrange(plotlist = plots, ncol = 2, nrow = 2, common.legend = TRUE, legend = "right") jpeg("Det pvalue tree actual trees.jpeg", width = 25, height = 25, units = "in", res = 300) plots dev.off() ``` ```{r Trees filtered by detection pvalue} library(ggtree) require(treeio) library("WGCNA") #bValsAll <- readRDS("DNAmDataAll.RDS") samp <- samplesWithMonotremes %>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order))%>%mutate(Tissue2 = ifelse(Tissue%in%c("Cortex", "Neocortex", "Striatum", "SVZ", "FrontalCortex", "Hippocampus", "OccipitalCortex", "Substantia nigra", "WholeBrain", "Brain", "Cerebellum", "Hypothalamus", "TemporalCortex"), "Brain", as.character(Tissue))) %>% mutate(orCol = as.character(factor(Order, levels= levels(Order), labels = orCols))) tissueCount <- samp%>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order))%>%group_by(Tissue2,SpeciesLatinName)%>%tally()%>%filter(n>2)%>% group_by(Tissue2) %>%tally()%>%filter(n>40) samp <- samp %>% filter(Tissue%in%tissueCount$Tissue2) detectableProbesInAllSamples <- readRDS("detectable probes in all species p<0.01.RDS") allData <- bValsWithMonotremes %>% dplyr::select(samp$Basename) %>% tibble::rownames_to_column(var="CGid") %>% filter(CGid%in%detectableProbesInAllSamples) %>% gather(key = "Basename", value = "bval", -CGid) %>% left_join(samp[,c("Basename", "SpeciesLatinName", "Tissue", "spec")]) %>% group_split(Tissue) names(allData) <- sapply(allData, function(x){x$Tissue[1]}) corDat <- plyr::llply(allData, function(x){ tis <- x$Tissue[1] datColors <- samp %>% filter(SpeciesLatinName%in%x$SpeciesLatinName)%>% dplyr::select(SpeciesLatinName, Order)%>% filter(!duplicated(SpeciesLatinName)) %>% mutate(SpeciesLatinName = make.unique(SpeciesLatinName)) %>% tibble::column_to_rownames(var = "SpeciesLatinName") x <- x %>% group_by(SpeciesLatinName, CGid) %>% summarise(bval = mean(bval))%>% spread(key = SpeciesLatinName, value = bval) %>% tibble::column_to_rownames(var = "CGid") corSpecies <- corAndPvalue(x) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) #corDist2 <- dist(corSpecies$cor, method = "minkowski") speciesTree2 <- hclust(corDist2, method = "average") tree2 <- ggtree(speciesTree2, layout = "slanted")+ #geom_tiplab(size=7.5,aes(angle=0))+ theme_tree() d <- data.frame(label = tree2$data$label) %>% left_join(dplyr::select(.data=sumSample, SpeciesLatinName, Number, Order), by=c("label"="SpeciesLatinName")) plot <- tree2 %<+% d + geom_tiplab(aes(label=Number, fill=Order),size=6,aes(angle=0), geom = "label", label.padding = unit(0.01, "lines"))+ geom_hilight(mapping=aes(subset=node%in%node[which(!is.na(Order))],fill=Order))+ scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ theme(text = element_text(size = 30), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ guides(fill = guide_legend(ncol = 1))+ #coord_flip()+ ggtitle(paste(tis))+ geom_treescale(linesize = 1, fontsize = 4, width = 0.05)+ xlim(-0.5, NA) # plot <- gheatmap(tree2, datColors, offset=.2, width=.083, # colnames_angle=0, colnames_offset_y = .1) + # scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ # theme(text = element_text(size = 30), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ # guides(fill = guide_legend(ncol = 1))+ # #coord_flip()+ # ggtitle(paste(tis))+ geom_treescale(linesize = 1, fontsize = 6, width = 0.05) return(list("tree"=speciesTree2, "plot"=plot)) }) plots <- lapply(corDat, function(x){x$plot}) plots <- ggpubr::ggarrange(plotlist = plots, ncol = 3, common.legend = TRUE, legend = "right") pdf("Filtered DNAm clustering, 1-cor.pdf", width = 45, height = 30) plots dev.off() phyloTree <- ape::read.nexus("Mammalia_species") phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" #phyloTree <- ape::read.nexus("speciesTimeTree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # from the DNAm data specDist <- rbindlist(plyr::llply(corDat, function(x){ x <- x[[1]] specDistances <- cophenetic(x) specDistances <- as.data.frame(as.matrix(specDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200)) }), idcol = "Tissue") %>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) specDist <- specDist %>% group_by(Tissue) %>% mutate(residuals = residuals(lm(PhyloDist~dist))) %>% mutate(outlier= ifelse(abs(residuals)>80, "outlier", "goodMatch")) p1 <- specDist %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(size=6, color="blue")+theme_classic(base_size = 20)+ylab("Phlogenetic tree distance\n(million years)")+xlab("DNAm tree (180 CpGs with medianDetPval<0.01) distance")+facet_wrap(.~Tissue, nrow = 1)+labs(size="Number of overlapped\npoints")+theme(axis.text.y = element_text(size=20))+scale_color_manual(values=c("black","red")) pdf(file = "Comparison of the DNAm and DNA sequence Filtered phylogenetic trees.pdf", width = 15, height = 4) p1 dev.off() ``` ## Sensetivity analysis of trees using mappability ```{r phylogenetic analysis of all species} library(ggtree) library("WGCNA") mappability <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mappability file. new annotations.Amin.V2.RDS") detectableProbesInAllSamples <- readRDS("detectable probes in all species p<0.01.RDS") detectableProbesInAllSamples2 <- readRDS("detectable probes in all species p<0.05.RDS") nonDetectableProbes <- readRDS("nondetectable probes in all species p>0.3.RDS") targets <- colnames(mappability)[-1] targets <- gsub("(Bat\\.)|(Dog\\.)|(Cat\\.)|(Cattle\\.)|(Elephant\\.)|(Horse\\.)|(Human\\.)|(Killer whale\\.)|(Marmot\\.alpine\\.)|(Marmot\\.yellow-bellied\\.)|(Mouse\\.)|(Olive baboon\\.)|(Opossum\\.)|(Pig\\.)|(Prairie_vole\\.)|(Rat\\.)|(Rhesus_Macaque\\.)|(Sheep\\.)|(Tasmanian_devil\\.)|(White tailed deer\\.)|(Beluga_whale\\.)|(Asian_elephant\\.)|(Marmoset\\.)|(Vervet_monkey\\.)", "", targets) targets <- str_to_sentence(targets) speciesLatinName <- str_extract("(^\\w+\\.)|(^\\w+v4)", string = targets) speciesLatinName <- gsub("\\.", "", speciesLatinName) speciesLatinName <- gsub("_v+[1-9]", "", speciesLatinName) speciesLatinName <- gsub("Canfam3", "Canis_familiaris", speciesLatinName) speciesLatinName <- gsub("_9$", "", speciesLatinName) speciesLatinName <- gsub("_", " ", speciesLatinName) names(mappability) <- c("CGid", speciesLatinName) samp <- data.frame("SpeciesLatinName"=speciesLatinName) %>% filter(SpeciesLatinName%in%species$SpeciesLatinName&!duplicated(SpeciesLatinName)) mappability <- mappability%>% dplyr::select(CGid, samp$SpeciesLatinName) %>% tibble::column_to_rownames(var = "CGid") mappability[mappability=="yes"] <- 1 mappability[mappability=="no"] <- 0 mappability <- mappability %>%mutate_all(as.numeric) %>% filter(rowSums(.)>0) plots <- lapply(1:6, function(x){ if(x==1){ mappability <- mappability } else if(x==2){ mappability <- mappability[rownames(bValsNoMars),] }else if(x==3){ mappability <- mappability[rownames(bValsAll),] }else if(x==4){ mappability <- mappability[rownames(bValsWithMonotremes),] }else if(x==5){ mappability <- mappability[detectableProbesInAllSamples2,] }else if(x==6){ mappability <- mappability[detectableProbesInAllSamples,] } corSpecies <- corAndPvalue(mappability) # 1-cor cor <- corSpecies$cor[,which(colSums(corSpecies$cor, na.rm = T)>0)] cor <- cor[complete.cases(cor),] cor <- cor[colnames(cor),] corDist2 <- as.dist(1-cor) speciesTree2 <- hclust(corDist2, method = "average") # species distances phyloTree <- ape::read.nexus("speciesTimeTree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # from the mappability data specDistances <- cophenetic(speciesTree2) specDistances <- as.data.frame(as.matrix(specDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200))%>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) }) targets <- c("all probes (37452)", "eutherian\n(14705 probes)", "eutherian and marsupial\n(7956 probes)", "mammals (+monotremes)\n(5656 probes)", "medianDetectP<0.05\nin all (620 probes)","medianDetectP<0.01\nin all (180 probes)") names(plots) <- targets plots <- rbindlist(plots, idcol = "probes") p1 <- plots %>% mutate(probes=factor(probes, levels = targets)) %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor()+theme_classic(base_size = 18)+facet_wrap(.~probes, nrow = 2)+ylab("Phlogenetic tree distance (million years)")+xlab("Distances in Mappability Trees")+labs(size="Number of overlapped points") jpeg("mappability tree.jpeg", width = 13, height = 7, units = "in", res = 300) p1 dev.off() p2 <- plots %>% filter(probes=="medianDetectP<0.01\nin all (180 probes)") %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(color="blue", label.y.npc = 0.1)+theme_classic(base_size = 15)+ylab("Phlogenetic tree distance\n(million years)")+xlab("Distances in Mappability Trees")+labs(size="Number of overlapped points")+theme(legend.position = "none") ggsave("mappability tree 180 probes.jpeg",p2, width = 4, height = 3, units = "in", dpi = 300) # save the trees targets <- c("all probes (37452)", "eutherian\n(14705 probes)", "eutherian and marsupial\n(7956 probes)", "mammals (+monotremes)\n(5656 probes)", "medianDetectP<0.05\nin all (620 probes)","medianDetectP<0.01\nin all (180 probes)") plots <- lapply(1:6, function(x){ if(x==1){ map <- mappability } else if(x==2){ map <- mappability[rownames(bValsNoMars),] }else if(x==3){ map <- mappability[rownames(bValsAll),] }else if(x==4){ map <- mappability[rownames(bValsWithMonotremes),] }else if(x==5){ map <- mappability[detectableProbesInAllSamples2,] }else if(x==6){ map <- mappability[detectableProbesInAllSamples,] } corSpecies <- corAndPvalue(map) # 1-cor cor <- corSpecies$cor[,which(colSums(corSpecies$cor, na.rm = T)>0)] cor <- cor[complete.cases(cor),] cor <- cor[colnames(cor),] corDist2 <- as.dist(1-cor) speciesTree2 <- hclust(corDist2, method = "average") tree2 <- ggtree(speciesTree2, layout = "slanted")+ #geom_tiplab(size=7.5,aes(angle=0))+ theme_tree() s <- species %>% mutate(Order=paste(OrderNumberHorvath, Order, sep = ".")) d <- data.frame(label = tree2$data$label) %>% left_join(dplyr::select(.data=s, SpeciesLatinName, Order), by=c("label"="SpeciesLatinName")) plot <- tree2 %<+% d + geom_tiplab(aes(label=label, fill=Order),size=6,aes(angle=0), geom = "label", label.padding = unit(0.01, "lines"))+ #geom_hilight(mapping=aes(subset=node%in%node[which(!is.na(Order))],fill=Order))+ scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ theme(text = element_text(size = 15), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ guides(fill = guide_legend(ncol = 1))+ #coord_flip()+ ggtitle(paste(targets[x]))+ geom_treescale(linesize = 1, fontsize = 4, width = 0.05) #+ xlim(-0.5, NA) }) plots <- ggpubr::ggarrange(plotlist = plots, ncol = 3, nrow = 2, common.legend = TRUE, legend = "right") jpeg("mappability tree actual trees.jpeg", width = 25, height = 25, units = "in", res = 300) plots dev.off() ``` ```{r Heatmap based on mappability} library(ggtree) library("WGCNA") map <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mappability file. new annotations.Amin.V2.RDS") targets <- colnames(map)[-1] detectableProbesInAllSamples <- readRDS("detectable probes in all species p<0.01.RDS") targets <- gsub("(Bat\\.)|(Dog\\.)|(Cat\\.)|(Cattle\\.)|(Elephant\\.)|(Horse\\.)|(Human\\.)|(Killer whale\\.)|(Marmot\\.alpine\\.)|(Marmot\\.yellow-bellied\\.)|(Mouse\\.)|(Olive baboon\\.)|(Opossum\\.)|(Pig\\.)|(Prairie_vole\\.)|(Rat\\.)|(Rhesus_Macaque\\.)|(Sheep\\.)|(Tasmanian_devil\\.)|(White tailed deer\\.)|(Beluga_whale\\.)|(Asian_elephant\\.)|(Marmoset\\.)|(Vervet_monkey\\.)", "", targets) targets <- str_to_sentence(targets) speciesLatinName <- str_extract("(^\\w+\\.)|(^\\w+v4)", string = targets) speciesLatinName <- gsub("\\.", "", speciesLatinName) speciesLatinName <- gsub("_v+[1-9]", "", speciesLatinName) speciesLatinName <- gsub("Canfam3", "Canis_familiaris", speciesLatinName) speciesLatinName <- gsub("_9$", "", speciesLatinName) speciesLatinName <- gsub("_", " ", speciesLatinName) names(map) <- c("CGid", speciesLatinName) samp <- data.frame("SpeciesLatinName"=speciesLatinName) %>% filter(SpeciesLatinName%in%species$SpeciesLatinName&!duplicated(SpeciesLatinName)) map <- map%>% dplyr::select(CGid, samp$SpeciesLatinName) %>% tibble::column_to_rownames(var = "CGid") map[map=="yes"] <- 1 map[map=="no"] <- 0 map <- map %>%mutate_all(as.numeric) %>% filter(rowSums(.)>0) sampsMappability <- samplesWithMonotremes %>% filter(SpeciesLatinName %in%names(map)) map <- map[rownames(bValsNoMars),] targets <- unique(sampsMappability$SpeciesLatinName) sumSampsForMap <- sampsMappability %>% filter(!duplicated(SpeciesLatinName)) %>% mutate(Clade =ifelse(Order%in%marsupials, "marsupials", ifelse(Order%in%monotremes, "monotremes", "eutherians")))%>% arrange(Order)%>% arrange(Clade) map <- map %>% dplyr::select(sumSampsForMap$SpeciesLatinName) sumSampsForMap <- sumSampsForMap %>% mutate(sumMap =colSums(map==0)) i <- which(sumSampsForMap$sumMap>2000&sumSampsForMap$Clade=="eutherians") sumSampsForMap <- sumSampsForMap[-i,] map <- map[,-i] # column annotation annot1 <- sumSampsForMap %>% dplyr::select(SpeciesLatinName, Order) %>% left_join(orColsData) annot1 <- annot1$colors names(annot1) <- sumSampsForMap$Order annot2 <- sumSampsForMap %>% dplyr::select(SpeciesLatinName, Clade) %>% mutate(Clade=as.factor(Clade)) %>% mutate(colors=as.character(factor(Clade, labels = c("turquoise", "blue", "brown")))) annot2 <- annot2$colors names(annot2) <- sumSampsForMap$Clade col_fun = circlize::colorRamp2(breaks = c(min(log(sumSampsForMap$maximum_age)), max(log(sumSampsForMap$maximum_age))), colors = c("blue", "red")) ha = HeatmapAnnotation(Clade = names(annot2), Order = names(annot1), log_maximumAge=log(sumSampsForMap$maximum_age), col = list(Clade=annot2,Order=annot1, log_maximumAge=col_fun), show_legend = F, annotation_name_side = "left") colPalette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256)) # row annotation rowData <- data.frame(CGid = rownames(map)) %>% mutate(eutherianProbes = ifelse(CGid%in%rownames(bValsNoMars), "grey", NA))%>% mutate(marsupialSubset = ifelse(CGid%in%rownames(bValsAll), "grey", NA))%>% mutate(monotremeSubset = ifelse(CGid%in%rownames(bValsWithMonotremes), "grey", NA))%>% mutate(highDetectableSubset = ifelse(CGid%in%detectableProbesInAllSamples, "grey", NA))%>% mutate(modules=mergedColors) mod1 <- rowData$eutherianProbes names(mod1) <- rowData$CGid mod2 <- rowData$marsupialSubset names(mod2) <- rowData$CGid mod3 <- rowData$monotremeSubset names(mod3) <- rowData$CGid mod4 <- rowData$highDetectableSubset names(mod4) <- rowData$CGid mod5 <- rowData$modules names(mod5) <- rowData$CGid # row annotation ra =rowAnnotation(eutherianProbes = rowData$CGid, marsupialSubset = rowData$CGid, monotremeSubset = rowData$CGid,`180highDetectableSubset` = rowData$CGid, Net1Modules = rowData$CGid , col = list(eutherianProbes =mod1, marsupialSubset = mod2, monotremeSubset= mod3, `180highDetectableSubset` =mod4, Net1Modules=mod5), show_annotation_name = T, show_legend = FALSE) map[map=="Inf"] <- NA m <- max(map, na.rm = T) map[is.na(map)] <- m detPHeatmap <-ComplexHeatmap::Heatmap(map, col = my_palette13, show_column_names = TRUE, name = "Binarize mappability", show_row_names = F, column_dend_reorder = F, cluster_columns = F, cluster_rows = T, row_dend_reorder=T, clustering_distance_columns="pearson",clustering_method_columns = "average", border = TRUE, bottom_annotation = ha, show_row_dend = F, show_column_dend = F, column_names_gp = grid::gpar(fontsize = 8), show_heatmap_legend = F) lgd_order = Legend(labels = names(orCols), title = "Order", legend_gp = gpar(fill = orCols)) lgd_clade = Legend(labels = c("eutherians", "marsupials", "monotremes"), title = "Clades", legend_gp = gpar(fill = c("turquoise", "blue", "brown"))) lgd_maxAge <- Legend(col_fun = col_fun, title = "log maximumAge") lgd_Mappability <- Legend(labels = c("not mapped", "mapped"), title = "Binarized mappability", legend_gp = gpar(fill = c(my_palette13[1], my_palette13[9]))) jpeg(file = "Heatmap mappability 14705 probes.jpeg", width = 15, height = 12, units = "in", res = 300) draw(detPHeatmap+ra, heatmap_legend_side = "left", annotation_legend_side="left", padding = unit(c(15, 10, 5, 15), "mm"), column_title="14705 conserved probes in Eutherians", column_title_gp=gpar(fontsize=17, fontface="bold"),heatmap_legend_list = list(lgd_Mappability, lgd_clade,lgd_order,lgd_maxAge)) dev.off() ``` ```{r scatter plot phyloepigenetic tree vs mappability trees} phyloTree <- ape::read.nexus("Mammalia_species") phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" #phyloTree <- ape::read.nexus("speciesTimeTree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # from the DNAm data specDist <- rbindlist(plyr::llply(corDat, function(x){ x <- x[[1]] specDistances <- cophenetic(x) specDistances <- as.data.frame(as.matrix(specDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200)) }), idcol = "Tissue") %>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) specDist <- specDist %>% group_by(Tissue) %>% mutate(residuals = residuals(lm(PhyloDist~dist))) %>% mutate(outlier= ifelse(abs(residuals)>80, "outlier", "goodMatch")) # from mappability data mapDis <- lapply(1:6, function(x){ if(x==1){ mappability <- mappability } else if(x==2){ mappability <- mappability[rownames(bValsNoMars),] }else if(x==3){ mappability <- mappability[rownames(bValsAll),] }else if(x==4){ mappability <- mappability[rownames(bValsWithMonotremes),] }else if(x==5){ mappability <- mappability[detectableProbesInAllSamples2,] }else if(x==6){ mappability <- mappability[detectableProbesInAllSamples,] } corSpecies <- corAndPvalue(mappability) # 1-cor cor <- corSpecies$cor[,which(colSums(corSpecies$cor, na.rm = T)>0)] cor <- cor[complete.cases(cor),] cor <- cor[colnames(cor),] corDist2 <- as.dist(1-cor) speciesTree2 <- hclust(corDist2, method = "average") # species distances phyloTree <- ape::read.nexus("speciesTimeTree") distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # from the mappability data specDistances <- cophenetic(speciesTree2) specDistances <- as.data.frame(as.matrix(specDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200))%>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) }) targets <- c("all probes (37452)", "eutherian\n(14705 probes)", "eutherian and marsupial\n(7956 probes)", "mammals (+monotremes)\n(5656 probes)", "medianDetectP<0.05\nin all (620 probes)","180 highly detectable probes") names(mapDis) <- targets mapDis <- rbindlist(mapDis, idcol = "probes") # combine combData <- specDist %>% filter(Tissue=="Blood") %>% dplyr::select(Tissue, ID, dist) %>% setnames(new = c("Tissue", "ID", "phyloEpiDis")) combData <- mapDis %>% dplyr::select(probes, ID, dist) %>% setnames(new = c("probes", "ID", "mapDis")) %>% left_join(combData) %>% filter(complete.cases(.)) plot <- combData%>% filter(!probes%in%c("medianDetectP<0.05\nin all (620 probes)", "180 highly detectable probes"))%>% ggplot(aes(x = phyloEpiDis, y=mapDis))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(color="blue", label.y.npc = 0.1)+theme_classic(base_size = 15)+facet_wrap(.~probes, nrow = 1)+ylab("Distances in Blood\nPhloEpigenetic Tree")+xlab("Distances in Mappability Trees") pdf("mappability tree vs phyloepigenetic trees.pdf", width = 10, height = 3) plot dev.off() plot <- combData %>% filter(probes%in%c("180 highly detectable probes")) %>% mutate(probes=factor(probes, levels=c("180 highly detectable probes")))%>% ggplot(aes(x = phyloEpiDis, y=mapDis))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(color="blue", label.y.npc = 0.1)+theme_classic(base_size = 15)+ylab("Distances in Blood\nPhloEpigenetic Tree")+xlab("Distances in Mappability Trees") pdf("mappability tree vs phyloepigenetic trees selected.pdf", width = 4, height = 3) plot dev.off() ``` ```{r scatter plot of the distances} phyloTree <- ape::read.nexus("Mammalia_species") phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # from the DNAm data specDist <- rbindlist(plyr::llply(corDat, function(x){ x <- x[[1]] specDistances <- cophenetic(x) specDistances <- as.data.frame(as.matrix(specDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200)) }), idcol = "Tissue") %>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) p1 <- specDist %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor()+theme_classic(base_size = 12)+ylab("Phlogenetic tree distance (million years)")+xlab("DNAm tree distance")+facet_wrap(.~Tissue, nrow = 1)+labs(size="Number of overlapped points") pdf(file = "Comparison of the DNAm and DNA sequence phylogenetic trees.pdf", width = 12, height = 4) p1 dev.off() ``` ## Sensetivity analysis, phyloepigenetic tree ```{r phyloepigenetic tree sensetivity} load("~/Steve Horvath Lab Dropbox/Amin Haghani/MammalianMethCombined/StuffCaesar/NormalizedData/all_probes_all_samples_sesame.RData") bVals <- dat0sesame %>% tibble::column_to_rownames(var = "CGid") %>% dplyr::select(samplesWithMonotremes$Basename) mappabilityAll <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mappability file. new annotations.Amin.V2.RDS") mappabilityAll <-mappabilityAll %>% mutate(sum = rowSums(mappabilityAll=="yes")) %>% relocate(sum, .after = "CGid") targets <- floor(seq(from = 158, to = 50,length.out = 16)) samp <- samplesWithMonotremes %>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order))%>%mutate(Tissue2 = ifelse(Tissue%in%c("Cortex", "Neocortex", "Striatum", "SVZ", "FrontalCortex", "Hippocampus", "OccipitalCortex", "Substantia nigra", "WholeBrain", "Brain", "Cerebellum", "Hypothalamus", "TemporalCortex"), "Brain", as.character(Tissue))) %>% mutate(orCol = as.character(factor(Order, levels= levels(Order), labels = orCols))) tissueCount <- samp%>% filter(!is.na(SpeciesLatinName)) %>% filter(!is.na(Order))%>%group_by(Tissue2,SpeciesLatinName)%>%tally()%>%filter(n>2)%>% group_by(Tissue2) %>%tally()%>%filter(n>40) samp <- samp %>% filter(Tissue%in%tissueCount$Tissue2) corDat <- plyr::llply(1:length(targets), function(x){ cat(paste(x)) if(x==1){ n1=targets[x] n2 = 160 tis <- paste("Blood, 100 probes map to >", n1, " mammals", sep = "") } else{ n1=targets[x] n2 = targets[x-1] tis <- paste("Blood, 100 probes map to >", n1," but <",n2, " mammals", sep = "") } set.seed(12380) allData <- bVals %>% dplyr::select(samp$Basename) %>% tibble::rownames_to_column(var="CGid") %>% filter(CGid%in%mappabilityAll$CGid[which(mappabilityAll$sum>n1&mappabilityAll$sum% filter(CGid%in%sample(CGid, 100, replace = F))%>% gather(key = "Basename", value = "bval", -CGid) %>% left_join(samp[,c("Basename", "SpeciesLatinName", "Tissue")]) datColors <- samp %>% filter(SpeciesLatinName%in%allData$SpeciesLatinName)%>% dplyr::select(SpeciesLatinName, Order)%>% filter(!duplicated(SpeciesLatinName)) %>% mutate(SpeciesLatinName = make.unique(SpeciesLatinName)) %>% tibble::column_to_rownames(var = "SpeciesLatinName") dat <- allData %>% group_by(SpeciesLatinName, CGid) %>% summarise(bval = mean(bval))%>% spread(key = SpeciesLatinName, value = bval) %>% tibble::column_to_rownames(var = "CGid") corSpecies <- corAndPvalue(dat) # 1-cor corDist2 <- as.dist(1-corSpecies$cor) speciesTree2 <- hclust(corDist2, method = "average") tree2 <- ggtree(speciesTree2, layout = "slanted") +geom_tiplab(size=7.5,aes(angle=0))+theme_tree() plot <- gheatmap(tree2, datColors, offset=.2, width=.083, colnames_angle=0, colnames_offset_y = .1) + scale_fill_manual(values = orCols)+labs(fill="Order")+ggtitle("Clustering 1-cor")+ theme(text = element_text(size = 30), plot.title = element_text(hjust = 0.5), legend.text = element_text(size=20))+ #coord_flip()+ ggtitle(paste(tis)) return(list("tree"=speciesTree2, "plot"=plot)) }) names(corDat) <- sapply(1:length(targets), function(x){ if(x==1){ n1=targets[x] n2 = 160 tis <- paste("100 probes map to >", n1, " mammals", sep = "") } else{ n1=targets[x] n2 = targets[x-1] tis <- paste("100 probes map to >", n1," but <",n2, " mammals", sep = "") } return(tis)}) # plots <- lapply(corDat, function(x){x[[2]]}) # plots <- ggpubr::ggarrange(plotlist = plots, ncol = 4, nrow = 2, common.legend = TRUE, legend = "right") # # pdf("DNAm clustering, conserved probes.pdf", width = 50, height = 50) # plots # dev.off() ``` ```{r scatter plot of the distances} phyloTree <- ape::read.nexus("Mammalia_species") phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" distances <- cophenetic(phyloTree) distances <- as.data.frame(as.matrix(distances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "PhyloDist", -a) %>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, PhyloDist) %>% filter(PhyloDist!=0) # from the DNAm data specDist <- rbindlist(plyr::llply(corDat, function(x){ x <- x[[1]] specDistances <- cophenetic(x) specDistances <- as.data.frame(as.matrix(specDistances))%>% tibble::rownames_to_column(var = "a") %>% gather(key = "b", value = "dist", -a) %>% mutate(a = gsub(" ", "_", a))%>% mutate(b = gsub(" ", "_", b))%>% mutate(ID = paste(a,b, sep = "_")) %>% dplyr::select(ID, dist) %>% filter(dist!=0) %>% left_join(distances) %>% filter(complete.cases(.))%>% mutate(x2 = jitter(as.numeric(dist), factor=200)) %>% mutate(y2 = jitter(as.numeric(PhyloDist), factor=200)) }), idcol = "Tissue") %>% mutate(coordinate = paste(dist, PhyloDist)) %>% group_by(coordinate) %>% add_count(coordinate) p1 <- specDist %>% mutate(Tissue=factor(Tissue, levels = names(corDat))) %>% ggplot(aes(x = dist, y=PhyloDist, size=n))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor()+theme_classic(base_size = 12)+ylab("Phlogenetic tree distance (million years)")+xlab("DNAm tree distance")+facet_wrap(.~Tissue, nrow = 4)+labs(size="Number of overlapped points") jpeg(file = "Sensetivity, phyloepigenetic tree, seq conservation.jpeg", width = 15, height = 15, units = "in", res = 300) p1 dev.off() # scatter plot of correlations correlations <- specDist %>% mutate(Tissue=factor(Tissue, levels = names(corDat))) %>% group_by(Tissue) %>% summarise(r = cor(dist, PhyloDist)) %>% mutate(speciesNumber = as.numeric(as.character(factor(Tissue, levels = names(corDat), labels = targets)))) %>% filter(speciesNumber<158) p2 <- correlations %>% ggplot(aes(y = r, x=speciesNumber))+geom_point_rast()+geom_smooth(method = "lm")+ggpubr::stat_cor(size=8, color="blue")+theme_classic(base_size = 12)+xlab("100 probes map to number of species")+ylab("correlation phyloepigenetic-phylogenetic trees")+ggtitle("Effects of probe conservation \non phyloepigenetic tree") jpeg(file = "Scatter, phyloepigenetic tree, seq conservation.jpeg", width = 5, height = 4, units = "in", res = 300) p2 dev.off() ``` ## Species per tissue ```{r Species per tissue} ## no marsupials samples_tissue <- samplesNoMars %>% filter(!is.na(Tissue2)) %>% dplyr::select(SpeciesCommonName, Tissue2) %>% distinct(.) %>% group_by(Tissue2) %>% tally() %>% right_join(samplesNoMars) %>% filter(n>=8) %>%group_split(Tissue2) names(samples_tissue) <- sapply(samples_tissue, function(x){x$Tissue2[1]}) samples_tissue$all <- samplesNoMars samples_tissue <- plyr::llply(samples_tissue, function(y){ spec2 <- unique(y$SpeciesCommonName) Spec2 <- as.data.frame(sapply(spec2, function(x){ a <- y %>% mutate(x = ifelse(SpeciesCommonName==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Spec2) <- spec2 Spec2 <- cbind(Basename = y$Basename, Spec2) }) results <- rbindlist(plyr::llply(samples_tissue, function(x){ ME <- MEs[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% mutate(dir = as.factor(sign(r))) %>% group_by(var, dir) %>% top_n(1, abs(r))%>% mutate(ID = paste(modules, var, sep = "_"))%>% filter(pval<0.05&abs(r)>0.5) %>% dplyr::select(-dir) }), use.names = T, idcol = "Tissue") ## marsupials samples_tissue_marsupials <- samples%>% filter(!is.na(Tissue2)) %>% dplyr::select(SpeciesCommonName, Tissue2) %>% distinct(.) %>% group_by(Tissue2) %>% tally() %>% right_join(samples) %>% filter(n>=8) %>%group_split(Tissue2) names(samples_tissue_marsupials) <- sapply(samples_tissue_marsupials, function(x){x$Tissue2[1]}) samples_tissue_marsupials$all <- samples samples_tissue_marsupials <- plyr::llply(samples_tissue_marsupials, function(y){ spec2 <- unique(y$SpeciesCommonName) Spec2 <- as.data.frame(sapply(spec2, function(x){ a <- y %>% mutate(x = ifelse(SpeciesCommonName==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Spec2) <- spec2 Spec2 <- cbind(Basename = y$Basename, Spec2) }) results2 <- rbindlist(plyr::llply(samples_tissue_marsupials, function(x){ ME <- MEs4[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(5, -pval)%>%top_n(2, abs(r)) %>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) %>% filter(pval<0.05&abs(r)>0.2) }), use.names = T, idcol = "Tissue") %>% left_join(samples[,c("SpeciesCommonName", "Order")], by = c("var"="SpeciesCommonName"))%>% filter(Order%in%marsupials) %>% dplyr::select(-Order) ## monotremes samples_tissue_monotremes <- samplesWithMonotremes%>% filter(!is.na(Tissue2)) %>% dplyr::select(SpeciesCommonName, Tissue2) %>% distinct(.) %>% group_by(Tissue2) %>% tally() %>% right_join(samplesWithMonotremes) %>% filter(n>=8) %>%group_split(Tissue2) names(samples_tissue_monotremes) <- sapply(samples_tissue_monotremes, function(x){x$Tissue2[1]}) samples_tissue_monotremes$all <- samplesWithMonotremes samples_tissue_monotremes <- plyr::llply(samples_tissue_monotremes, function(y){ spec2 <- unique(y$SpeciesCommonName) Spec2 <- as.data.frame(sapply(spec2, function(x){ a <- y %>% mutate(x = ifelse(SpeciesCommonName==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) colnames(Spec2) <- spec2 Spec2 <- cbind(Basename = y$Basename, Spec2) }) results3 <- rbindlist(plyr::llply(samples_tissue_monotremes, function(x){ ME <- MEs3[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% group_by(var) %>% top_n(5, -pval)%>%top_n(2, abs(r)) %>% group_by(var) %>% mutate(ID = paste(modules, var, sep = "_")) %>% filter(pval<0.05&abs(r)>0.2) }), use.names = T, idcol = "Tissue") %>% left_join(samples[,c("SpeciesCommonName", "Order")], by = c("var"="SpeciesCommonName"))%>% filter(Order%in%monotremes) %>% dplyr::select(-Order) results <- dplyr::bind_rows(results, results2) results <- dplyr::bind_rows(results, results3) sumTable <- results %>% dplyr::select(modules,r, var,pval, Tissue) %>% distinct() %>% group_by(modules) %>% summarize(SpeciesPerTissue = paste(var,"_",Tissue, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) sum2 <- sum2%>% left_join(sumTable) ``` ## Tissue level analysis ```{r preparing samples} tissueNames <- unique(samplesNoMars$Tissue) tis <- as.data.frame(sapply(tissueNames, function(x){ a <- samplesNoMars %>% mutate(x = ifelse(Tissue==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) %>% setnames(new = as.character(tissueNames)) %>% mutate(BrainAllRegions = ifelse(samplesNoMars$Tissue2=="BrainAllRegions", 1, 0)) %>% mutate(Basename=samplesNoMars$Basename) ``` ```{r relate to Tissues} datTraits <- tis %>% filter(Basename %in% rownames(MEs)) %>% tibble::column_to_rownames("Basename") moduleTraitCor=cor(MEs,datTraits,use="p") moduleTraitPvalue=corPvalueStudent(moduleTraitCor,nrow(datTraits)) summaryResults <- as.data.frame(moduleTraitPvalue) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% mutate(dir = as.factor(sign(r))) %>% group_by(var, dir) %>% top_n(1, abs(r))%>% mutate(ID = paste(modules, var, sep = "_"))%>% filter(pval<0.05&abs(r)>0.38) %>% dplyr::select(-dir) summaryResults4 <- summaryResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", "*")) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryResults4 <- summaryResults4[rownames(moduleTraitCor), colnames(moduleTraitCor)] identical(rownames(summaryResults4), rownames(moduleTraitCor)) tissueModules <- rownames(summaryResults4)[rowSums(summaryResults4!="")>0] #moduleTraitCor <- moduleTraitCor[orderModules,] #summaryResults4 <- summaryResults4[orderModules,] moduleTraitCor <- t(moduleTraitCor) summaryResults4 <- t(summaryResults4) i <- which(rowSums(summaryResults4=="*")>0) moduleTraitCor <- moduleTraitCor[i,] summaryResults4 <-summaryResults4[i,] my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231] library(ComplexHeatmap) cols <- gsub("ME", "", colnames(moduleTraitCor)) names(cols) <- colnames(moduleTraitCor) association <- KMEs %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) %>% mutate(modules = paste("ME", moduleColors, sep = "")) %>% right_join(data.frame(modules = colnames(moduleTraitCor))) association <- association[order(match(association$modules, colnames(moduleTraitCor))),] # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(moduleTraitCor), col = list(Modules = cols), CpG_Frequency = anno_barplot(association$Freq), show_legend = FALSE) colnames(moduleTraitCor) <- gsub("ME", "", colnames(moduleTraitCor)) Heatmap <- Heatmap(moduleTraitCor, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = TRUE, row_dend_reorder=TRUE, column_title = "Modules", cell_fun = function(j, i, x, y, w, h, col) { grid.text(summaryResults4[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=25))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15)) pdf(file = "Heatmap modules Tissues.pdf", width = 14, height = 5) draw(Heatmap, heatmap_legend_side = "left",annotation_legend_side="left", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ```{r summ} sumTable <- summaryResults3 %>% dplyr::select(modules,r, var,pval) %>% group_by(modules) %>% summarize(tissues = paste(var, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) tissueModules <- summaryResults3 %>% ungroup() %>% dplyr::select(modules,var) sum2 <- sum2%>% left_join(sumTable) ``` ```{r heatMap with arranged modules} # take the final summary table moduleTraitCor <- moduleTraitCor[,sum2$modules[which(sum2$group!="Marsupials")]] summaryResults4 <- summaryResults4[,sum2$modules[which(sum2$group!="Marsupials")]] # Take sum2 from network analysis, I wanted to get the grouping from Netwrok analysis trCols <- as.character(sum2$color[which(sum2$group!="Marsupials")]) names(trCols) <- sum2$group[which(sum2$group!="Marsupials")] # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(moduleTraitCor), CpG_Frequency = anno_barplot(sum2$Freq[which(sum2$group!="Marsupials")]), Traits = sum2$group[which(sum2$group!="Marsupials")], col = list(Modules = cols, Traits = trCols), show_legend = FALSE) Heatmap <- Heatmap(moduleTraitCor, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = TRUE, row_dend_reorder=TRUE, column_title = "Modules", cell_fun = function(j, i, x, y, w, h, col) { grid.text(summaryResults4[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=25))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15)) jpeg(file = "Heatmap modules Tissues.jpeg", width = 20, height = 15, units = "in", res = 300) draw(Heatmap, heatmap_legend_side = "left",annotation_legend_side="left", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ```{r GREAT analysis} results.combined <- read.csv("enrichment results, Network1 all modules.csv") summar <- summaryResults3 %>% group_by(modules) %>% summarise(tissues = paste(var, collapse = " \n ")) results.combined <- results.combined%>%filter(class%in%summar$modules) %>% left_join(summar, by = c("class"="modules")) # results.combined2 <- results.combined %>% # filter(Ontology%in%c("GO Biological Process", "GO Cellular Component", "GO Molecular Function")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", # ifelse(grepl("(Disease)",Ontology), "Diseases", # ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", # ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>% # filter(HyperP<1e-5&NumFgGenesHit>1) %>% group_by(Desc) %>% add_count() %>% filter(n==1) %>% group_by(class,tissues, datasets) %>% top_n(5, -log10(HyperP)) results.combined2 <- results.combined %>% filter(Ontology%in%c("GO Biological Process", "GO Cellular Component", "GO Molecular Function")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>% filter(HyperP<1e-5&NumFgGenesHit>1) %>% group_by(class,tissues, datasets) %>% top_n(5, -log10(HyperP)) combinedEnichemt <- results.combined2 %>% dplyr::rename(pValue = HyperP, nCommonGenes = NumFgGenesHit) %>% mutate(name = ifelse(datasets == "Upstream regulators", ID, Desc)) %>% mutate(blank = "") %>% mutate(tissues= as.factor(tissues)) %>% mutate(class = gsub("ME", "", class)) %>% mutate(class = factor(class)) combinedEnichemt$name = with(combinedEnichemt, reorder(name, log10(pValue))) p4 <- combinedEnichemt %>% ggplot(aes(y = name, x= blank, size=nCommonGenes, colour = -log10(pValue), shape=datasets))+ geom_point_rast()+ scale_size_binned()+ scale_color_gradient(high="red", low="blue")+ theme_bw()+ ylab(label = "Merged datasets")+ ggtitle("Enrichment analysis, modules related to Tissues")+ theme_classic(base_size = 25)+ facet_wrap(.~class+tissues, nrow = 1)+ theme(axis.text.x = element_blank(), axis.text.y = element_text(size=18), plot.margin = margin(0.5, 0, 0, 1, "cm"), plot.title = element_text(size=24, hjust = 0.5), axis.title.x = element_blank(), axis.ticks.x = element_blank(), strip.text = element_text(size=11))+ scale_shape_manual(values=c(17, 15, 3,7, 8,1))+ guides(shape = guide_legend(override.aes = list(size=5)))+ ggpubr::grids(linetype = "dotted", axis = "y") g <- ggplot_gtable(ggplot_build(p4)) strip_both <- which(grepl('strip-', g$layout$name)) fills <- levels(combinedEnichemt$class) k <- 1 for (i in strip_both) { j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder)) g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k] k <- k+1 } grid::grid.draw(g) jpeg(file = "enrichment modules Tissues.jpeg", width = 37, height = 22, units = "in", res = 300) grid::grid.draw(g) dev.off() ``` ## Tissue per order ```{r Tissue per order} ## no marsupials samples_order <- samplesNoMars %>% dplyr::select(Order, Tissue) %>% distinct(.) %>% group_by(Order) %>% tally() %>% right_join(samplesNoMars) %>% filter(n>=8)%>% filter(!is.na(Tissue2)) %>%group_split(Order) names(samples_order) <- sapply(samples_order, function(x){x$Order[1]}) samples_order$all <- samplesNoMars samples_order <- plyr::llply(samples_order, function(y){ tis2 <- unique(as.character(y$Tissue[which(!is.na(y$Tissue))])) Tis2 <- as.data.frame(sapply(tis2, function(x){ a <- y %>% mutate(x = ifelse(Tissue==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) Tis2$BrainAllRegions <- ifelse(y$Tissue2=="BrainAllRegions", 1, 0) colnames(Tis2) <- c(tis2, "BrainAllRegions") Tis2 <- cbind(Basename = y$Basename, Tis2) }) results <- rbindlist(plyr::llply(samples_order, function(x){ ME <- MEs[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% mutate(dir = as.factor(sign(r))) %>% group_by(var, dir) %>% top_n(1, abs(r))%>% mutate(ID = paste(modules, var, sep = "_"))%>% filter(pval<0.05&abs(r)>0.38) %>% dplyr::select(-dir) }), use.names = T, idcol = "Order") ## marsupials samples_order <- samples %>% dplyr::select(Order, Tissue) %>% distinct(.) %>% group_by(Order) %>% tally() %>% right_join(samples) %>% filter(n>=4)%>% filter(!is.na(Tissue2)&Order%in%marsupials) %>%group_split(Order) names(samples_order) <- sapply(samples_order, function(x){x$Order[1]}) samples_order <- plyr::llply(samples_order, function(y){ tis2 <- unique(as.character(y$Tissue[which(!is.na(y$Tissue))])) Tis2 <- as.data.frame(sapply(tis2, function(x){ a <- y %>% mutate(x = ifelse(Tissue==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) Tis2$BrainAllRegions <- ifelse(y$Tissue2=="BrainAllRegions", 1, 0) colnames(Tis2) <- c(tis2, "BrainAllRegions") Tis2 <- cbind(Basename = y$Basename, Tis2) }) results2 <- rbindlist(plyr::llply(samples_order, function(x){ ME <- MEs4[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% mutate(dir = as.factor(sign(r))) %>% group_by(var, dir) %>% top_n(1, abs(r))%>% mutate(ID = paste(modules, var, sep = "_"))%>% filter(pval<0.05&abs(r)>0.38) %>% dplyr::select(-dir) }), use.names = T, idcol = "Order") ## monotremes samples_order <- samplesWithMonotremes %>% dplyr::select(Order, Tissue) %>% distinct(.) %>% group_by(Order) %>% tally() %>% right_join(samplesWithMonotremes) %>% filter(n>=4)%>% filter(!is.na(Tissue2)&Order%in%monotremes) %>%group_split(Order) names(samples_order) <- sapply(samples_order, function(x){x$Order[1]}) samples_order <- plyr::llply(samples_order, function(y){ tis2 <- unique(as.character(y$Tissue[which(!is.na(y$Tissue))])) Tis2 <- as.data.frame(sapply(tis2, function(x){ a <- y %>% mutate(x = ifelse(Tissue==paste(x), 1, 0)) %>% dplyr::select(x) return(a) })) Tis2$BrainAllRegions <- ifelse(y$Tissue2=="BrainAllRegions", 1, 0) colnames(Tis2) <- c(tis2, "BrainAllRegions") Tis2 <- cbind(Basename = y$Basename, Tis2) }) results3 <- rbindlist(plyr::llply(samples_order, function(x){ ME <- MEs3[x$Basename,] datTraits1 <- x %>% filter(Basename %in% rownames(ME)) %>% tibble::column_to_rownames("Basename") moduleTraitCor1=cor(ME,datTraits1,use="p") moduleTraitPvalue1=corPvalueStudent(moduleTraitCor1,nrow(datTraits1)) summaryResults <- as.data.frame(moduleTraitPvalue1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue1, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor1) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% mutate(dir = as.factor(sign(r))) %>% group_by(var, dir) %>% top_n(1, abs(r))%>% mutate(ID = paste(modules, var, sep = "_"))%>% filter(pval<0.05&abs(r)>0.38) %>% dplyr::select(-dir) }), use.names = T, idcol = "Order") results<- bind_rows(results, results2) results<- bind_rows(results, results3) sumTable <- results %>% dplyr::select(modules,r, var,pval, Order) %>% group_by(modules) %>% summarize(TissuePerOrder = paste(var,"_",Order, " (r=", round(r,2)," ,p=",format(pval, digits=3), ")", sep = "", collapse = " ; ")) sum2 <- sum2%>% left_join(sumTable) ``` ## Sex analysis ```{r sex analysis} gdat <- MEs3 %>% tibble::rownames_to_column(var = "Basename") %>% right_join(samplesWithMonotremes, by="Basename") %>% dplyr::select(paste(sexModule), Female, Order, CommonNames) %>% droplevels() %>% mutate(sex = factor(Female, levels = c(0,1), labels = c("Male", "Female"))) %>% filter(!is.na(sex)) %>% gather(paste(sexModule), key = "modules", value = "eigengene") %>% mutate(modules= gsub("ME", "", modules)) gDatSum <- gdat %>% group_by(sex, Order, modules) %>% add_count() %>% summarize(eigengen = mean(eigengene), se = stats::sd(eigengene)/n) %>% distinct() p <- gDatSum %>% ggplot(aes(x=sex, y=eigengen, fill=Order))+ #facet_grid(.~Order)+ theme_classic(base_size = 15)+theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1),strip.text = element_text(size=15), legend.position="right", legend.key.size = unit(0.3, "cm"))+ggpubr::stat_compare_means(method = "t.test")+ylab("Eigengene")+scale_fill_manual(values = orCols)+facet_wrap(.~modules,nrow = 1)+geom_bar(stat = "identity", position = "dodge")+ geom_errorbar(aes(ymin=eigengen-se, ymax=eigengen+se), width=.2, # Width of the error bars position=position_dodge(.9)) #+geom_boxplot(notch = TRUE) pdf(file = "Sex module.pdf", width = 10, height = 4) p dev.off() ``` ```{r distribution of X chromosome} geneMap <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V5.RDS") mouse <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mouse.Mus_musculus.GRCm38.100.Amin.V5.RDS") %>% setnames(new = c("CGid", paste("mouse", names(.)[-1], sep = "_"))) net1_top500 <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n))%>% top_n(500, abs(KME))) }) net1_top500 <- bind_rows(net1_top500) input <- net1_top500 %>% filter(modules %in% paste(gsub("ME","", sexModule))) %>% dplyr::select(CGid, modules) %>% left_join(geneMap)%>% left_join(mouse) %>% dplyr::select(CGid, SYMBOL, seqnames, main_Categories, modules, mouse_seqnames) %>% mutate(seqnames = gsub("chr", "", seqnames))%>% mutate(chromosome = ifelse(seqnames=="X", "X",ifelse(seqnames=="Y", "Y", "Somatic"))) %>% mutate(chromosome = factor(chromosome, levels = c("X", "Y", "Somatic"))) %>% mutate(modules = factor(modules)) p2<-input %>% filter(!is.na(chromosome)) %>% group_by(chromosome) %>% ggplot(aes(y=chromosome, fill=modules))+geom_bar(position = "stack")+theme_bw()+ ylab("Chromosome")+ xlab("CpG count")+ scale_fill_manual(values = levels(input$modules))+ theme_classic(base_size = 30)+ theme(legend.position = "right")+ggtitle("CpGs in sex modules") pdf(file = "sex chromosomes.pdf", width = 10, height = 5) p2 dev.off() ``` ## Age Analyais ```{r Relative Age} AgeModel <- do.call(rbind,apply(MEs, 2, function(x){ samplesNoMars$Eigengene <- x a.lm <- lm(Eigengene ~ relativeAge+Tissue+Female+SpeciesCommonName, data = samplesNoMars) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(AgeModel[,grepl( "P_" , names(AgeModel) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") AgeModel <- cbind(AgeModel, pvals) AgeModel2 <- AgeModel %>% dplyr::select(ends_with("relativeAge"))%>% tibble::rownames_to_column(var = "modules") AgeModel2$modules = with(AgeModel2, reorder(modules, t_relativeAge)) ageModules <- AgeModel2$modules[which(AgeModel2$P_relativeAge<1e-270&AgeModel2$t_relativeAge>20)] yint <- AgeModel2 %>% filter(P_relativeAge<1e-270) %>% top_n(1, -t_relativeAge) %>% dplyr::select(t_relativeAge) p1 <- AgeModel2 %>% ggplot(aes(x= modules, y=t_relativeAge, color = Beta_relativeAge))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+geom_hline(yintercept = yint$t_relativeAge, linetype="dashed",color = "red")+ylab("T score relative age")+ggtitle("Modules related to relative age")+labs(color = "Beta") jpeg("modules relatioship to relative Age.jpeg", width = 8, height = 3, units = "in", res = 300) p1 dev.off() # Scatter plot of relative age res <- sapply(ageModules, function(x){ i <- grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs)) samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(relativeAge,Tissue,Female,SpeciesCommonName) %>% mutate(Eigengene = MEs[,i])%>% filter(complete.cases(.)) a.lm <- lm(Eigengene ~ Tissue+Female+SpeciesCommonName, data = samp) x <- a.lm$residuals return(x) }) colnames(res) <- ageModules samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(relativeAge,Tissue,Female,SpeciesCommonName, Order) %>% filter(complete.cases(.))%>% droplevels() samp <- cbind(samp, res) samp <- samp %>% gather(key = "modules", value = "residuals", starts_with("ME")) samp <- samp %>% mutate(modules = as.factor(modules)) %>% mutate(modules= gsub("ME", "", modules)) p2 <- samp %>% ggplot(aes(x= relativeAge, y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=5, aes(label = paste(..r.label.., "p<1e-270", sep = "~`,`~")))+facet_wrap(.~modules, nrow = 2, scales = "free_y")+theme_classic(base_size = 20)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, species")+theme(legend.position = "none", axis.title.y = element_text(size=12)) pdf(file = "RelativeAge modules.pdf", width = 3.7, height = 3) p2 dev.off() p3 <- samp %>% filter(Tissue%in%c("Blood", "Liver", "Skin", "Muscle", "Cortex","Heart")) %>% ggplot(aes(x= relativeAge, y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=5)+facet_wrap(.~modules, nrow = 2, scales = "free_y")+theme_classic(base_size = 20)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, species")+theme(legend.position = "none", axis.title.y = element_text(size=12),plot.title = element_text(face = "bold"))+facet_wrap(~Tissue, nrow = 3, scales = "free_y")+ggtitle("Relative age module\nin different tissues") pdf(file = "RelativeAge modules per tissue.pdf", width = 7, height = 10) p3 dev.off() ``` ```{r Monotreme purple module} res <- sapply(ageModules, function(x){ i <- grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs)) samp <- samplesWithMonotremes %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(relativeAge,Tissue,Female,SpeciesCommonName, Order) %>% mutate(Eigengene = MEs3[,i])%>% filter(Order=="Monotremata"&!is.na(relativeAge)) %>% mutate(res = residuals(lm(Eigengene ~ Tissue+SpeciesCommonName))) a.lm <- lm(Eigengene ~ Tissue+SpeciesCommonName, data = samp) x <- a.lm$residuals return(x) }) colnames(res) <- ageModules samp <- samplesWithMonotremes%>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(relativeAge,Tissue,Female,SpeciesCommonName, Order, OriginalOrderInBatch) %>% droplevels()%>% filter(Order=="Monotremata"&!is.na(relativeAge)) samp <- cbind(samp, res) samp <- samp %>% gather(key = "modules", value = "residuals", starts_with("ME")) samp <- samp %>% mutate(modules = as.factor(modules)) %>% mutate(modules= gsub("ME", "", modules)) p2 <- samp %>% ggplot(aes(x= relativeAge, y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=5, color="blue")+facet_wrap(.~modules+SpeciesCommonName, nrow = 1, scales = "free_y")+theme_classic(base_size = 20)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, species")+theme(legend.position = "none", axis.title.y = element_text(size=12))+geom_text_repel(aes(label=OriginalOrderInBatch), size=3) jpeg(file = "RelativeAge modules monotreme.jpeg", width = 8, height = 4, units = "in", res = 300) p2 dev.off() ``` ```{r GREAT analysis} results.combined <- read.csv("enrichment results, Network1 all modules.csv") results.combined <- results.combined%>%filter(class%in%c(as.character(ageModules), maxLifeModules, weightModels$modules)) %>% mutate(var="age") results.combined2 <- results.combined %>% filter(Ontology%in%c("GO Biological Process", "GO Cellular Component", "GO Molecular Function", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB miRNA Motifs", "MSigDB Perturbation", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>% filter(HyperP<1e-4) %>% group_by(Desc) %>% add_count() %>% filter(n==1) %>% group_by(class,group, datasets) %>% top_n(1, -log10(HyperP)) results.combined3 <- results.combined[-grep("(^disease$)|(^cell$)|(^cell part$)|(^mammalian phenotype$)|(^cellular process$)|(^regulation of biological process$)|(^biological regulation$)|(^regulation of cellular process$)", results.combined$Desc),] %>% filter(Ontology%in%c("GO Biological Process", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB miRNA Motifs", "MSigDB Perturbation", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>% filter(HyperP<1e-4) %>% group_by(Desc) %>% add_count() %>% group_by(class,group, datasets) %>% top_n(1, -log10(HyperP)) results.combined4 <- results.combined %>% filter(Ontology%in%c("GO Biological Process", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB miRNA Motifs", "MSigDB Perturbation", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology)))))%>% filter(Desc %in% results.combined3$Desc) results.combined5 <- rbind(results.combined2, results.combined4) # combinedEnichemt <- results.combined5 %>% dplyr::rename(pValue = HyperP, nCommonGenes = NumFgGenesHit) %>% mutate(name = ifelse(datasets == "Upstream regulators", ID, Desc)) %>% mutate(blank = "") %>% mutate(group = gsub("ME", "", group)) %>% mutate(var= as.factor(var)) %>% mutate(class = factor(class)) #write.csv(combinedEnichemt, "enrichment generic top hits,Liver, NMR Mouse Human.csv") combinedEnichemt$name = with(combinedEnichemt, reorder(name, log10(pValue))) p4 <- combinedEnichemt %>% mutate(group = gsub(";", "\n", group))%>% ggplot(aes(y = name, x= blank, size=nCommonGenes, colour = -log10(pValue), shape=datasets))+ geom_point_rast()+ scale_size_binned()+ scale_color_gradient(high="red", low="blue")+ theme_bw()+ ylab(label = "Merged datasets")+ ggtitle("Enrichment analysis, modules related to Age, max age, and weight")+ theme_classic(base_size = 25)+ facet_wrap(.~class+group, nrow = 1)+ theme(axis.text.x = element_blank(), axis.text.y = element_text(size=24), plot.margin = margin(0.5, 0, 0, 1, "cm"), plot.title = element_text(size=24, hjust = 0.5), axis.title.x = element_blank(), axis.ticks.x = element_blank(), strip.text = element_text(size=22))+ scale_shape_manual(values=c(17, 15, 3,7, 8,1))+ guides(shape = guide_legend(override.aes = list(size=7)))+ ggpubr::grids(linetype = "dotted", axis = "y") g <- ggplot_gtable(ggplot_build(p4)) strip_both <- which(grepl('strip-', g$layout$name)) fills <- gsub("ME", "",levels(combinedEnichemt$class)) k <- 1 for (i in strip_both) { j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder)) g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k] k <- k+1 } grid::grid.draw(g) jpeg(file = "enrichment modules Age.jpeg", width = 30, height = 16, units = "in", res = 300) grid::grid.draw(g) dev.off() ``` ```{r chronological Age} AgeModel <- do.call(rbind,apply(MEs, 2, function(x){ samplesNoMars$Eigengene <- x a.lm <- lm(Eigengene ~ Age+Tissue+Female+SpeciesCommonName, data = samplesNoMars) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(AgeModel[,grepl( "P_" , names(AgeModel) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") AgeModel <- cbind(AgeModel, pvals) AgeModel2 <- AgeModel %>% dplyr::select(ends_with("Age"))%>% tibble::rownames_to_column(var = "modules") AgeModel2$modules = with(AgeModel2, reorder(modules, t_Age)) age.chron.Modules <- AgeModel2$modules[which(AgeModel2$P_Age<1e-70)] yint <- AgeModel2 %>% filter(P_Age<1e-70) %>% top_n(1, -t_Age) %>% dplyr::select(t_Age) p1 <- AgeModel2 %>% ggplot(aes(x= modules, y=t_Age, color = Beta_Age))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+geom_hline(yintercept = yint$t_Age, linetype="dashed",color = "red")+ylab("T score chronological age")+ggtitle("Modules related to chronological age")+labs(color = "Beta") jpeg("modules relatioship to chronological Age.jpeg", width = 8, height = 3, units = "in", res = 300) p1 dev.off() # Scatter plot of chronological age res <- sapply(age.chron.Modules, function(x){ i <- grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs)) samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(Age,Tissue,Female,SpeciesCommonName) %>% mutate(Eigengene = MEs[,i])%>% filter(complete.cases(.)) a.lm <- lm(Eigengene ~ Tissue+Female+SpeciesCommonName, data = samp) x <- a.lm$residuals return(x) }) colnames(res) <- age.chron.Modules samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(Age,Tissue,Female,SpeciesCommonName, Order) %>% filter(complete.cases(.)) samp <- cbind(samp, res) samp <- samp %>% gather(key = "modules", value = "residuals", starts_with("ME")) samp <- samp %>% mutate(modules = as.factor(modules)) p2 <- samp %>% ggplot(aes(x= Age, y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=4)+facet_wrap(.~modules, nrow = 1, scales = "free_y")+theme_classic(base_size = 14)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, species") pdf(file = "chronAge modules.pdf", width = 8, height = 3) p2 dev.off() ``` ```{r log chronological Age} AgeModel <- do.call(rbind,apply(MEs, 2, function(x){ samp <- samplesNoMars %>% mutate(Eigengene = x) %>% filter(Age>0) %>% mutate(Age = log(Age)) a.lm <- lm(Eigengene ~ Age+Tissue+Female+SpeciesCommonName, data = samp) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(AgeModel[,grepl( "P_" , names(AgeModel) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") AgeModel <- cbind(AgeModel, pvals) AgeModel2 <- AgeModel %>% dplyr::select(ends_with("Age"))%>% tibble::rownames_to_column(var = "modules") AgeModel2$modules = with(AgeModel2, reorder(modules, t_Age)) age.LogChron.Modules <- AgeModel2$modules[which(AgeModel2$P_Age<1e-200)] yint <- AgeModel2 %>% filter(P_Age<1e-200) %>% top_n(1, -t_Age) %>% dplyr::select(t_Age) p1 <- AgeModel2 %>% ggplot(aes(x= modules, y=t_Age, color = Beta_Age))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+geom_hline(yintercept = yint$t_Age, linetype="dashed",color = "red")+ylab("T score log chronological age")+ggtitle("Modules related to log chronological age")+labs(color = "Beta") jpeg("modules relatioship to log chronological Age.jpeg", width = 8, height = 3, units = "in", res = 300) p1 dev.off() # Scatter plot of chronological age res <- sapply(age.LogChron.Modules, function(x){ i <- grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs)) samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(Age,Tissue,Female,SpeciesCommonName) %>% mutate(Eigengene = MEs[,i])%>% filter(complete.cases(.)) %>% mutate(Age = log(Age)) a.lm <- lm(Eigengene ~ Tissue+Female+SpeciesCommonName, data = samp) x <- a.lm$residuals return(x) }) colnames(res) <- age.LogChron.Modules samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(Age,Tissue,Female,SpeciesCommonName, Order) %>% filter(complete.cases(.)) %>% mutate(Age = log(Age)) samp <- cbind(samp, res) samp <- samp %>% gather(key = "modules", value = "residuals", starts_with("ME")) samp <- samp %>% mutate(modules = as.factor(modules)) p2 <- samp %>% ggplot(aes(x= Age, y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=4)+facet_wrap(.~modules, nrow = 2, scales = "free_y")+theme_classic(base_size = 14)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, species")+xlab("Log Age") pdf(file = "log chronAge modules.pdf", width = 7, height = 3.5) p2 dev.off() ``` ```{r summ} sumTable <- rbindlist(list("relativeAge" = data.frame(modules = ageModules), "chronologicalAge" = data.frame(modules = age.chron.Modules), "log.chro.Age" = data.frame(modules = age.LogChron.Modules)), idcol = "Age") %>% group_by(modules) %>% summarize(Age = paste(Age, " (+, p=0)",sep = "", collapse = " ; ")) sum2 <- sum2%>% left_join(sumTable) ``` ## Lifespan Weight Analyais ```{r Lifespan multivarate model} lifeModel <- do.call(rbind,apply(MEs, 2, function(x){ samplesNoMars$Eigengene <- x samplesNoMars <- samplesNoMars %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) samp <- samplesNoMars %>% group_by(SpeciesCommonName) %>% summarise(Eigengene = mean(Eigengene), maximum_age=maximum_age) %>% distinct() #a.lm <- lm(Eigengene ~ maximum_age, data = samp) a.lm <- lm(Eigengene ~ maximum_age+relativeAge+Tissue+Female, data = samplesNoMars) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(lifeModel[,grepl( "P_" , names(lifeModel) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") lifeModel <- cbind(lifeModel, pvals) lifeModel2 <- lifeModel %>% dplyr::select(ends_with("maximum_age"))%>% tibble::rownames_to_column(var = "modules") lifeModel2$modules = with(lifeModel2, reorder(modules, t_maximum_age)) yint <- lifeModel2 %>% mutate(direction = ifelse(t_maximum_age>0,"+", "-")) %>% group_by(direction) %>% filter(P_maximum_age==0) %>% top_n(1, -abs(t_maximum_age))%>% ungroup()%>% dplyr::select(t_maximum_age) p1 <- lifeModel2 %>% ggplot(aes(x= modules, y=t_maximum_age, color = Beta_maximum_age))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+geom_hline(yintercept = yint$t_maximum_age, linetype="dashed",color = "red")+ylab("T score log(max age)")+ggtitle("Modules related to log(max age), average eigengene")+labs(color = "Beta") jpeg("modules relatioship to max age.jpeg", width = 8, height = 3, units = "in", res = 300) p1 dev.off() maxLifeModules <- lifeModel2 %>% filter(P_maximum_age==0&abs(t_maximum_age)>70) maxLifeModules <- as.character(maxLifeModules$modules) # Scatter plot of relative age res <- sapply(maxLifeModules, function(x){ i <- grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs)) samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,relativeAge,Tissue, Female) %>% mutate(Eigengene = MEs[,i])%>% filter(complete.cases(.)) a.lm <- lm(Eigengene ~ relativeAge+Tissue+Female, data = samp) x <- a.lm$residuals return(x) }) colnames(res) <- maxLifeModules samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,average_weight,relativeAge,Tissue, Order,Female, SpeciesCommonName, Species, spec) %>% filter(complete.cases(.)) samp <- cbind(samp, res) samp <- samp %>% gather(key = "modules", value = "residuals", starts_with("ME")) p2 <- samp %>% mutate(modules= gsub("ME", "", modules)) %>% ggplot(aes(x= log(maximum_age), y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=5, aes(label = paste(..r.label.., "p<1e-200", sep = "~`,`~")))+facet_wrap(.~modules, nrow = 1, scales = "free_y")+theme_classic(base_size = 18)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, relative age")+theme(legend.position = "none", axis.title.y = element_text(size = 12)) pdf(file = "maxLifespan modules.pdf", width = 10, height = 3) p2 dev.off() samp2 <- samp%>% mutate(modules= gsub("ME", "", modules)) %>% mutate(modules = as.factor(modules)) %>% group_by(SpeciesCommonName, modules) %>% mutate(residuals = mean(residuals)) samp2 <- samp2 %>% dplyr::select(-Female, -Tissue, -relativeAge) %>% distinct() # p3 <- samp%>% mutate(modules= gsub("ME", "", modules)) %>% ggplot(aes(x= log(maximum_age), y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=8, color="blue", label.x.npc = 0.6, aes(label = paste(..r.label.., "p<1e-200", sep = "~`,`~")))+ggrepel::geom_text_repel(data=samp2, aes(x= log(maximum_age), y=residuals, label=SpeciesCommonName), size=7)+facet_wrap(.~modules, nrow = 1, scales = "free_y")+theme_classic(base_size = 25)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, relative age")+theme(legend.position = "none", axis.title.y = element_text(size = 25), strip.text = element_text(size=30)) p3 <- samp %>% left_join(dplyr::select(.data=samp2,SpeciesCommonName,Species))%>% mutate(modules= gsub("ME", "", modules)) %>% ggplot(aes(x= log(maximum_age), y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5, size=3)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.1, aes(label = paste(..r.label.., "p<1e-200", sep = "~`,`~")))+ggrepel::geom_text_repel(data=samp2, aes(x= log(maximum_age), y=residuals, label=spec), size=10)+facet_wrap(.~modules, nrow = 1, scales = "free_y")+theme_classic(base_size = 35)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, relative age")+theme(legend.position = "none", axis.title.y = element_text(size = 35), strip.text = element_text(size=35)) pdf(file = "maxLifespan modules, detailed.pdf", width = 25, height = 9) p3 dev.off() ``` ```{r Lifespan marginal model for average} lifeModelMarg <- do.call(rbind,apply(MEs, 2, function(x){ samplesNoMars$Eigengene <- x samplesNoMars <- samplesNoMars %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) samp <- samplesNoMars %>% group_by(SpeciesCommonName) %>% summarise(Eigengene = mean(Eigengene), maximum_age=maximum_age) %>% distinct() a.lm <- lm(Eigengene ~ maximum_age, data = samp) #a.lm <- lm(Eigengene ~ maximum_age+relativeAge+Tissue+Female, data = samplesNoMars) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(lifeModelMarg[,grepl( "P_" , names(lifeModelMarg) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") lifeModelMarg <- cbind(lifeModelMarg, pvals) lifeModelMarg2 <- lifeModelMarg %>% dplyr::select(ends_with("maximum_age"))%>% tibble::rownames_to_column(var = "modules") lifeModelMarg2$modules = with(lifeModelMarg2, reorder(modules, t_maximum_age)) yint <- lifeModelMarg2 %>% mutate(direction = ifelse(t_maximum_age>0,"+", "-")) %>% group_by(direction) %>% filter(P_maximum_age<1e-20) %>% top_n(1, -abs(t_maximum_age))%>% ungroup()%>% dplyr::select(t_maximum_age) p1 <- lifeModelMarg2 %>% ggplot(aes(x= modules, y=t_maximum_age, color = Beta_maximum_age))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+geom_hline(yintercept = yint$t_maximum_age, linetype="dashed",color = "red")+ylab("T score log(max age)")+ggtitle("Modules related to log(max age), average eigengene")+labs(color = "Beta") jpeg("modules relatioship to max age, marginal relationship.jpeg", width = 8, height = 3, units = "in", res = 300) p1 dev.off() maxLifeModulesMarginal <- lifeModelMarg2 %>% filter(P_maximum_age<1.5e-26) maxLifeModulesMarginal <- as.character(maxLifeModulesMarginal$modules) # Scatter plot of relative age # res <- sapply(maxLifeModules, function(x){ # i <- grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs)) # samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,relativeAge,Tissue, Female) %>% mutate(Eigengene = MEs[,i])%>% filter(complete.cases(.)) # # a.lm <- lm(Eigengene ~ relativeAge+Tissue+Female, data = samp) # # x <- a.lm$residuals # return(x) # }) i <- sapply(c(maxLifeModulesMarginal, "MEpaleturquoise"), function(x){grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs))}) res <- MEs[,i] samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,average_weight,relativeAge,Tissue, Order,Female, SpeciesCommonName, Species, spec) samp <- cbind(samp, res) samp <- samp %>% gather(key = "modules", value = "residuals", starts_with("ME")) samp <- samp %>% mutate(modules = as.factor(modules)) %>% group_by(SpeciesCommonName, modules) %>% mutate(residuals = mean(residuals)) samp2 <- samp%>% mutate(modules= gsub("ME", "", modules)) %>% dplyr::select(-Female, -Tissue, -relativeAge) %>% distinct() p3 <- samp2 %>% ggplot(aes(x= log(maximum_age), y=residuals))+geom_point_rast(aes(color=Order), alpha = 1, size=3)+geom_smooth(method = "lm")+ #ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.05, label.y.npc =0.99, aes(label = paste(..r.label.., "p<1e-20", sep = "~`,`~")))+ ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.05, label.y.npc =0.99)+ ggrepel::geom_text_repel(data=samp2, aes(x= log(maximum_age), y=residuals, label=spec), size=10, max.overlaps = 15)+facet_wrap(.~modules, nrow = 1, scales = "free_y")+theme_classic(base_size = 35)+scale_color_manual(values = orCols)+ylab("Mean eigengene")+theme(legend.position = "none", axis.title.y = element_text(size = 35), strip.text = element_text(size=35))+ylim(-0.018, 0.018) pdf(file = "maxLifespan Marginal modules, detailed.pdf", width = 20, height = 9) p3 dev.off() ``` ```{r add animal figures} library(rphylopic) images <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Caesar analysis/Figure for the paper/images of animals all.RDS") sumSample3 <- sumSample %>% filter(maximum_age!="NA") %>% mutate(residual = residuals(lm(log(as.numeric(maximum_age))~log(as.numeric(average_weight))))) %>% mutate(lifespanAdj = Hmisc::cut2(residual, cuts = c(-0.36,0.36)))%>% left_join(orColsData) %>% mutate(Order = factor(Order, levels =orColsData$Order)) sumSample3 <- rbindlist(images) %>% dplyr::rename(SpeciesLatinName=name) %>% right_join(sumSample3)%>% filter(maximum_age!="NA")%>% mutate(maximum_age=log(as.numeric(maximum_age))) %>% mutate(average_weight=log(as.numeric(average_weight)))%>% mutate(ratio=scales::rescale(maximum_age, mean = 1, sd = 0.05)[[1]]*max(maximum_age)/10) %>% mutate(ratio=ifelse(SpeciesCommonName=="Human", ratio*2, ratio)) %>% mutate(uid = ifelse(uid=="", NA, uid)) %>% filter(SpeciesCommonName%in%samp2$SpeciesCommonName) %>% left_join(samp2%>%dplyr::select(SpeciesCommonName, modules, residuals)) sumSample4 <- sumSample3 %>% filter(!is.na(uid))%>% filter(maximum_age>3.6) %>% group_by(Order, modules) %>% top_n(2, maximum_age) sumSample6 <- sumSample3%>% filter(!is.na(uid))%>% filter(maximum_age<3.6&maximum_age> -3.6)%>% group_by(Order,modules) %>% top_n(2, -maximum_age) sumSample9 <- sumSample3%>% filter(!is.na(uid))%>% filter(maximum_age< -3.6)%>% group_by(Order,modules) %>% top_n(2, -maximum_age) sumSample7 <- rbindlist(list(sumSample4,sumSample6, sumSample9)) set.seed(202) targtetmod <- gsub("ME", "", c(maxLifeModulesMarginal))[c(2,1,3,4)] p <- lapply(targtetmod, function(m){ sum <- samp2 %>% ungroup() %>% filter(modules==m) %>% summarize(cor =cor.test(residuals, log(maximum_age))$estimate, p = cor.test(residuals, log(maximum_age))$p.value) p <- samp2 %>% filter(modules==m) %>% ggplot(aes(x= log(maximum_age), y=residuals))+geom_point_rast(aes(color=Order), alpha = 1, size=3)+geom_smooth(method = "lm")+ #ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.05, label.y.npc =0.99, aes(label = paste(..r.label.., "p<1e-20", sep = "~`,`~")))+ #ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.05, label.y.npc =0.99, geom="label", alpha=0.8)+ ggrepel::geom_text_repel(data=(samp2%>% filter(modules==m)), aes(x= log(maximum_age), y=residuals, label=spec), size=7, max.overlaps = 7)+theme_classic(base_size = 35)+scale_color_manual(values = orCols)+ylab("Mean eigengene")+theme(legend.position = "none", axis.title.y = element_text(size = 35), strip.text = element_text(size=35))+ylim(-0.018, 0.018)+ggtitle(paste(i, ",\n(cor=", round(sum$cor,2), ", p=", format(sum$p,digits=3), ")", sep = ""))+ scale_x_continuous(expand = expansion(mult = c(0, .1)))+ scale_y_continuous(expand = expansion(mult = c(0, .1))) sumSample7 <- sumSample7 %>% filter(modules==m) %>% filter(!duplicated(uid)) for(i in which(!is.na(sumSample7$uid))){ set.seed(i+99) #img <- image_data(sumSample7$uid[i], size = "128")[[1]] if(sumSample7$SpeciesCommonName[i]=="Bowhead whale"){ p <- p+add_phylopic(uuid = sumSample7$uid[i], x = sumSample7$maximum_age[i], y = sumSample7$residuals[i], alpha = 0.9, color = sumSample7$colors[i], ysize = sumSample7$ratio[i]) }else{ p <- p+add_phylopic(uuid = sumSample7$uid[i], x = sumSample7$maximum_age[i], y = sumSample7$residuals[i]*sample(seq(from=0.95, to = 1.05, by = 0.05),size = 1), alpha = 0.9, color = sumSample7$colors[i], ysize = sumSample7$ratio[i]) } } return(p) }) p <- ggpubr::ggarrange(plotlist = p, nrow = 1) pdf(file = "maxLifespan Marginal modules, detailed.pdf", width = 35, height = 8) p dev.off() ``` ```{r add animal figures} library(rphylopic) images <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Caesar analysis/Figure for the paper/images of animals all.RDS") sumSample3 <- sumSample %>% filter(maximum_age!="NA") %>% mutate(residual = residuals(lm(log(as.numeric(maximum_age))~log(as.numeric(average_weight))))) %>% mutate(lifespanAdj = Hmisc::cut2(residual, cuts = c(-0.36,0.36)))%>% left_join(orColsData) %>% mutate(Order = factor(Order, levels =orColsData$Order)) sumSample3 <- rbindlist(images) %>% dplyr::rename(SpeciesLatinName=name) %>% right_join(sumSample3)%>% filter(maximum_age!="NA")%>% mutate(maximum_age=log(as.numeric(maximum_age))) %>% mutate(average_weight=log(as.numeric(average_weight)))%>% mutate(ratio=scales::rescale(maximum_age, mean = 1, sd = 0.05)[[1]]*max(maximum_age)/10) %>% mutate(ratio=ifelse(SpeciesCommonName=="Human", ratio*2, ratio)) %>% mutate(uid = ifelse(uid=="", NA, uid)) %>% filter(SpeciesCommonName%in%samp2$SpeciesCommonName) %>% left_join(samp2%>%dplyr::select(SpeciesCommonName, residuals)) sumSample4 <- sumSample3 %>% filter(!is.na(uid))%>% filter(!duplicated(SpeciesLatinName))%>% filter(maximum_age>3.6) %>% top_n(2, maximum_age) sumSample6 <- sumSample3%>% filter(!is.na(uid))%>% filter(!duplicated(SpeciesLatinName))%>% filter(maximum_age<3.6&maximum_age> -3.6)%>% top_n(2, -maximum_age) sumSample9 <- sumSample3%>% filter(!is.na(uid))%>% filter(!duplicated(SpeciesLatinName))%>% filter(maximum_age< -3.6) %>% top_n(2, -maximum_age) sumSample7 <- rbindlist(list(sumSample4,sumSample6, sumSample9)) %>% dplyr::select(-modules) %>% filter(!duplicated(SpeciesLatinName)) %>% mutate(modules="midnightblue") set.seed(202) targtetmod <- "midnightblue" p <- lapply(targtetmod, function(m){ sum <- samp2 %>% ungroup() %>% filter(modules==m) %>% summarize(cor =cor.test(residuals, log(maximum_age))$estimate, p = cor.test(residuals, log(maximum_age))$p.value) p <- samp2 %>% filter(modules==m) %>% ggplot(aes(x= log(maximum_age), y=residuals))+geom_point_rast(alpha = 1, size=4, color="midnightblue")+geom_smooth(method = "lm", color="red")+ #ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.05, label.y.npc =0.99, aes(label = paste(..r.label.., "p<1e-20", sep = "~`,`~")))+ #ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.05, label.y.npc =0.99, geom="label", alpha=0.8) theme_classic(base_size = 35)+scale_color_manual(values = orCols)+ylab("Mean eigengene")+theme(legend.position = "none", axis.title.y = element_text(size = 35), strip.text = element_text(size=35))+ylim(-0.018, 0.018)+ggtitle(paste(targtetmod, " module", ",\n(cor=", round(sum$cor,2), ", p=", format(sum$p,digits=3), ")", sep = ""))+ scale_x_continuous(expand = expansion(mult = c(0, .1)))+ scale_y_continuous(expand = expansion(mult = c(0, .1))) sumSample7 <- sumSample7 %>% filter(modules==m) %>% filter(!duplicated(uid)) # for(i in which(!is.na(sumSample7$uid))){ # set.seed(i+99) # #img <- image_data(sumSample7$uid[i], size = "128")[[1]] # if(sumSample7$SpeciesCommonName[i]=="Bowhead whale"){ # p <- p+add_phylopic(uuid = sumSample7$uid[i], x = sumSample7$maximum_age[i], y = sumSample7$residuals[i], alpha = 0.9, color = sumSample7$colors[i], ysize = sumSample7$ratio[i]) # }else{ # p <- p+add_phylopic(uuid = sumSample7$uid[i], x = sumSample7$maximum_age[i], y = sumSample7$residuals[i]*sample(seq(from=0.95, to = 1.05, by = 0.05),size = 1), alpha = 0.9, color = sumSample7$colors[i], ysize = sumSample7$ratio[i]) # } #} return(p) }) pdf(file = "maxLifespan Marginal modules, midnight blue, detailed.pdf", width = 8, height = 8) p[[1]] dev.off() ``` ```{r weight modules multivariate} weightModel <- do.call(rbind,apply(MEs, 2, function(x){ samplesNoMars$Eigengene <- x samplesNoMars <- samplesNoMars %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) samp <- samplesNoMars %>% group_by(SpeciesCommonName) %>% summarise(Eigengene = mean(Eigengene), average_weight=average_weight) %>% distinct() #a.lm <- lm(Eigengene ~ average_weight, data = samp) a.lm <- lm(Eigengene ~ average_weight+relativeAge+Tissue+Female, data = samplesNoMars) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(weightModel[,grepl( "P_" , names(weightModel) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") weightModel <- cbind(weightModel, pvals) weightModels <- weightModel %>% dplyr::select(ends_with("average_weight"))%>% tibble::rownames_to_column(var = "modules") weightModels$modules = with(weightModels, reorder(modules, t_average_weight)) yint <- weightModels %>% mutate(direction = ifelse(t_average_weight>0,"+", "-")) %>% group_by(direction) %>% filter(P_average_weight==0) %>% top_n(1, -abs(t_average_weight))%>% ungroup()%>% dplyr::select(t_average_weight) p1 <- weightModels %>% ggplot(aes(x= modules, y=t_average_weight, color = Beta_average_weight))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+geom_hline(yintercept = yint$t_average_weight, linetype="dashed",color = "red")+ylab("T score log(average weight)")+ggtitle("Modules related to log(average weight), average eigengene")+labs(color = "Beta") jpeg("modules relatioship to species weight.jpeg", width = 8, height = 3, units = "in", res = 300) p1 dev.off() weightModules <- weightModels %>% filter(P_average_weight==0) %>% group_by(sign(t_average_weight)) %>%top_n(2, abs(t_average_weight)) weightModules <- weightModules$modules # Scatter plot of relative age res <- sapply(weightModules, function(x){ i <- grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs)) samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,average_weight,relativeAge,Tissue, Female) %>% mutate(Eigengene = MEs[,i])%>% filter(complete.cases(.)) a.lm <- lm(Eigengene ~ relativeAge+Tissue+Female, data = samp) x <- a.lm$residuals return(x) }) colnames(res) <- weightModules samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,average_weight,relativeAge,Tissue, Order,Female, SpeciesCommonName, Species, spec) %>% filter(complete.cases(.)) samp <- cbind(samp, res) samp <- samp %>% gather(key = "modules", value = "residuals", starts_with("ME")) p2 <- samp%>% mutate(modules= gsub("ME", "", modules)) %>% ggplot(aes(x= log(average_weight), y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=4,aes(label = paste(..r.label.., "p<1e-200", sep = "~`,`~")))+facet_wrap(.~modules, nrow = 1, scales = "free_y")+theme_classic(base_size = 18)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, relative age")+theme(legend.position = "none", axis.title.y = element_text(size = 12)) pdf(file = "average weight modules.pdf", width = 10, height = 3) p2 dev.off() samp2 <- samp%>% mutate(modules= gsub("ME", "", modules)) %>% mutate(modules = as.factor(modules)) %>% group_by(SpeciesCommonName, modules) %>% mutate(residuals = mean(residuals)) samp2 <- samp2 %>% dplyr::select(-Female, -Tissue, -relativeAge) %>% distinct() p3 <- samp %>% mutate(modules= gsub("ME", "", modules)) %>% ggplot(aes(x= log(average_weight), y=residuals))+geom_point_rast(aes(color=Order), alpha = 0.5, size=3)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.01, aes(label = paste(..r.label.., "p<1e-200", sep = "~`,`~")))+ggrepel::geom_text_repel(data=samp2, aes(x= log(average_weight), y=residuals, label=spec), size=10)+facet_wrap(.~modules, nrow = 1, scales = "free_y")+theme_classic(base_size = 35)+scale_color_manual(values = orCols)+ylab("Adjusted Eigengene \ncovariates: tissue, sex, relative age")+theme(legend.position = "none", axis.title.y = element_text(size = 35), strip.text = element_text(size=35)) pdf(file = "average weight modules, detailed.pdf", width = 25, height = 9) p3 dev.off() ``` ```{r weight modules marginal} weightModelMarg <- do.call(rbind,apply(MEs, 2, function(x){ samplesNoMars$Eigengene <- x samplesNoMars <- samplesNoMars %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) samp <- samplesNoMars %>% group_by(SpeciesCommonName) %>% summarise(Eigengene = mean(Eigengene), average_weight=average_weight) %>% distinct() a.lm <- lm(Eigengene ~ average_weight, data = samp) #a.lm <- lm(Eigengene ~ average_weight+relativeAge+Tissue+Female, data = samplesNoMars) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(weightModelMarg[,grepl( "P_" , names(weightModelMarg) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") weightModelMarg <- cbind(weightModelMarg, pvals) weightModelMargs <- weightModelMarg %>% dplyr::select(ends_with("average_weight"))%>% tibble::rownames_to_column(var = "modules") weightModelMargs$modules = with(weightModelMargs, reorder(modules, t_average_weight)) yint <- weightModelMargs %>% mutate(direction = ifelse(t_average_weight>0,"+", "-")) %>% group_by(direction) %>% filter(P_average_weight<3.6e-17) %>% top_n(1, -abs(t_average_weight))%>% ungroup()%>% dplyr::select(t_average_weight) p1 <- weightModelMargs %>% ggplot(aes(x= modules, y=t_average_weight, color = Beta_average_weight))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+geom_hline(yintercept = yint$t_average_weight, linetype="dashed",color = "red")+ylab("T score log(average weight)")+ggtitle("Modules related to log(average weight), average eigengene")+labs(color = "Beta") jpeg("modules marginal relatioship to species weight.jpeg", width = 8, height = 3, units = "in", res = 300) p1 dev.off() #weightModulesMarg <- weightModelMargs %>% filter(P_average_weight==0&abs(t_average_weight)>80) weightModulesMarg <- weightModelMargs %>% filter(P_average_weight<3.6e-17) weightModulesMarg <- as.character(weightModulesMarg$modules) weightModulesMarg <- c(weightModulesMarg) # Scatter plot of relative age i <- sapply(weightModulesMarg, function(x){grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs))}) res <- MEs[,i] samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,average_weight,relativeAge,Tissue, Order,Female, SpeciesCommonName, Species, spec) samp <- cbind(samp, res) samp <- samp %>% gather(key = "modules", value = "residuals", starts_with("ME")) samp <- samp %>% mutate(modules = as.factor(modules)) %>% group_by(SpeciesCommonName, modules) %>% mutate(residuals = mean(residuals)) samp2 <- samp%>% mutate(modules= gsub("ME", "", modules)) %>% dplyr::select(-Female, -Tissue, -relativeAge) %>% distinct() p3 <- samp2 %>% ggplot(aes(x= log(average_weight), y=residuals))+geom_point_rast(aes(color=Order), alpha = 1, size=3)+geom_smooth(method = "lm")+ ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.01, label.y.npc =0.99, aes(label = paste(..r.label.., "p<1e-17", sep = "~`,`~")))+ ggrepel::geom_text_repel(data=samp2, aes(x= log(average_weight), y=residuals, label=spec), size=10)+facet_wrap(.~modules, nrow = 1, scales = "free_y")+theme_classic(base_size = 35)+scale_color_manual(values = orCols)+ylab("Mean eigengene")+theme(legend.position = "none", axis.title.y = element_text(size = 35), strip.text = element_text(size=35))+ylim(-0.018, 0.02) jpeg(file = "average marginal weight modules, detailed.jpeg", width = 25, height = 9, units = "in", res = 300) p3 dev.off() ``` ```{r add animal figures} images <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Caesar analysis/Figure for the paper/images of animals all.RDS") sumSample3 <- sumSample %>% filter(maximum_age!="NA") %>% mutate(residual = residuals(lm(log(as.numeric(maximum_age))~log(as.numeric(average_weight))))) %>% mutate(lifespanAdj = Hmisc::cut2(residual, cuts = c(-0.36,0.36)))%>% left_join(orColsData) %>% mutate(Order = factor(Order, levels =orColsData$Order)) sumSample3 <- rbindlist(images) %>% dplyr::rename(SpeciesLatinName=name) %>% right_join(sumSample3)%>% filter(maximum_age!="NA")%>% mutate(maximum_age=log(as.numeric(maximum_age))) %>% mutate(average_weight=log(as.numeric(average_weight)))%>% mutate(ratio=scales::rescale(average_weight, mean = 1, sd = 0.05)[[1]]*max(average_weight)/10) %>% mutate(ratio=ifelse(SpeciesCommonName=="Human", ratio*2, ratio)) %>% mutate(uid = ifelse(uid=="", NA, uid)) %>% filter(SpeciesCommonName%in%samp2$SpeciesCommonName) %>% left_join(samp2%>%dplyr::select(SpeciesCommonName, modules, residuals)) sumSample4 <- sumSample3 %>% filter(!is.na(uid))%>% filter(average_weight>15) %>% group_by(Order, modules) %>% top_n(2, maximum_age) sumSample6 <- sumSample3%>% filter(!is.na(uid))%>% filter(average_weight<15&average_weight> 5)%>% group_by(Order,modules) %>% top_n(2, -maximum_age) sumSample9 <- sumSample3%>% filter(!is.na(uid))%>% filter(average_weight< 5)%>% group_by(Order,modules) %>% top_n(2, -maximum_age) sumSample7 <- rbindlist(list(sumSample4,sumSample6, sumSample9)) set.seed(202) targtetmod <- gsub("ME", "", weightModulesMarg)[c(2, 1, 3, 4)] p <- lapply(targtetmod, function(i){ sum <- samp2 %>% ungroup() %>% filter(modules==i) %>% summarize(cor =cor.test(residuals, log(average_weight))$estimate, p = cor.test(residuals, log(average_weight))$p.value) p <- samp2 %>% filter(modules==i) %>% ggplot(aes(x= log(average_weight), y=residuals))+geom_point_rast(aes(color=Order), alpha = 1, size=3)+geom_smooth(method = "lm")+ #ggpubr::stat_cor(size=10, color="blue", label.x.npc = 0.01, label.y.npc =0.99, geom="label", alpha=0.8)+ ggrepel::geom_text_repel(data=(samp2%>% filter(modules==i)), aes(x= log(average_weight), y=residuals, label=spec), size=7, max.overlaps = 7)+theme_classic(base_size = 35)+scale_color_manual(values = orCols)+ylab("Mean eigengene")+theme(legend.position = "none", axis.title.y = element_text(size = 35), strip.text = element_text(size=35))+ylim(-0.018, 0.02)+ggtitle(paste(i, ",\n(cor=", round(sum$cor,2), ", p=", format(sum$p,digits=3), ")", sep = ""))+ scale_x_continuous(expand = expansion(mult = c(0, .1))) sumSample7 <- sumSample7 %>% filter(modules==i) %>% filter(!duplicated(uid)) for(i in which(!is.na(sumSample7$uid))){ set.seed(i+99) img <- image_data(sumSample7$uid[i], size = "128")[[1]] if(sumSample7$SpeciesCommonName[i]=="Bowhead whale"){ p <- p+add_phylopic(img, x = sumSample7$average_weight[i], y = sumSample7$residuals[i], alpha = 0.9, color = sumSample7$colors[i], ysize = sumSample7$ratio[i]) }else{ p <- p+add_phylopic(img, x = sumSample7$average_weight[i], y = sumSample7$residuals[i]*sample(seq(from=0.95, to = 1.05, by = 0.05),size = 1), alpha = 0.9, color = sumSample7$colors[i], ysize = sumSample7$ratio[i]) } } return(p) }) p <- ggpubr::ggarrange(plotlist = p, nrow = 1) pdf(file = "average marginal weight modules, detailed.pdf", width = 35, height = 8) p dev.off() ``` ```{r GREAT analysis} results.combined <- read.csv("enrichment results, Network1 all modules.csv") results.combined <- results.combined%>%filter(class%in%c(weightModules, maxLifeModules)) %>% mutate(group=gsub(" ; ", "\n", group)) results.combined2 <- results.combined %>% filter(Ontology%in%c("GO Biological Process", "GO Cellular Component", "GO Molecular Function", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB miRNA Motifs", "MSigDB Perturbation", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>% filter(HyperP<1e-10) %>% group_by(Desc) %>% add_count() %>% filter(n==1) %>% group_by(class,group, datasets) %>% top_n(1, -log10(HyperP)) results.combined3 <- results.combined[grep("(mortality)|(aging)|(survival)|(perinatal lethality)|(weight)", results.combined$Desc),] %>% filter(Ontology%in%c("GO Biological Process", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB miRNA Motifs", "MSigDB Perturbation", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology))))) %>% filter(HyperP<1e-5) results.combined4 <- results.combined %>% filter(Ontology%in%c("GO Biological Process", "Disease Ontology", "Human Phenotype", "Mouse Phenotype", "MSigDB miRNA Motifs", "MSigDB Perturbation", "MSigDB Predicted Promoter Motifs", "MSigDB Pathway", "BioCyc Pathway", "PANTHER Pathway")) %>% mutate(datasets =ifelse(grepl("(GO)",Ontology), "Gene ontology", ifelse(grepl("(Disease)",Ontology), "Diseases", ifelse(grepl("(Motif)|(Perturbation)",Ontology), "Upstream regulators", ifelse(grepl("(Pathway)",Ontology), "Canonical pathways",Ontology)))))%>% filter(Desc %in% results.combined3$Desc) results.combined5 <- bind_rows(results.combined2,results.combined4) # combinedEnichemt <- results.combined5 %>% dplyr::rename(pValue = HyperP, nCommonGenes = NumFgGenesHit) %>% mutate(name = ifelse(datasets == "Upstream regulators", ID, Desc)) %>% mutate(blank = "") %>% mutate(group = gsub("ME", "", group)) %>% mutate(group= as.factor(group)) %>% mutate(class = gsub("ME", "", class))%>% mutate(class = factor(class)) #write.csv(combinedEnichemt, "enrichment generic top hits,Liver, NMR Mouse Human.csv") combinedEnichemt$name = with(combinedEnichemt, reorder(name, log10(pValue))) p4 <- combinedEnichemt %>% ggplot(aes(y = name, x= blank, size=nCommonGenes, colour = -log10(pValue), shape=datasets))+ geom_point_rast()+ scale_size_binned()+ scale_color_gradient(high="red", low="blue")+ theme_bw()+ ylab(label = "Merged datasets")+ ggtitle("Enrichment analysis, modules related to Max Age and Weight")+ theme_classic(base_size = 25)+ facet_wrap(.~class+group, nrow = 1)+ theme(axis.text.x = element_blank(), axis.text.y = element_text(size=24), plot.margin = margin(0.5, 0, 0, 1, "cm"), plot.title = element_text(size=24, hjust = 0.5), axis.title.x = element_blank(), axis.ticks.x = element_blank(), strip.text = element_text(size=18))+ scale_shape_manual(values=c(17, 15, 3,7, 8,1))+ guides(shape = guide_legend(override.aes = list(size=7)))+ ggpubr::grids(linetype = "dotted", axis = "y") g <- ggplot_gtable(ggplot_build(p4)) strip_both <- which(grepl('strip-', g$layout$name)) fills <- gsub("ME", "",levels(combinedEnichemt$class)) k <- 1 for (i in strip_both) { j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder)) g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k] k <- k+1 } grid::grid.draw(g) pdf(file = "enrichment modules MaxLife Weight.pdf", width = 37, height = 23) grid::grid.draw(g) dev.off() ``` ## Lifespan per order ```{r} samplesArtiodactila <- samplesNoMars %>% filter(Order=="4.Artiodactyla") %>% mutate(Order=ifelse(grepl(paste("(", "4.", "", 11:23, ".", ")", sep = "", collapse = "|"), spec), "4.Artiodactyla.aquatic", "4.Artiodactyla.terrestrial")) samp <- bind_rows(samplesNoMars, samplesArtiodactila) samples_order <- samp %>% dplyr::select(Order, SpeciesLatinName) %>% distinct(.) %>% group_by(Order) %>% tally() %>% right_join(samp)%>% filter(n>=10)%>%group_split(Order) names(samples_order) <- sapply(samples_order, function(x){x$Order[1]}) samples_tissue <- samp %>% dplyr::select(Tissue2, SpeciesLatinName) %>% distinct(.) %>% group_by(Tissue2) %>% tally() %>% right_join(samp) %>% filter(n>=10) %>%group_split(Tissue2) names(samples_tissue) <- sapply(samples_tissue, function(x){x$Tissue2[1]}) samples_tissue_order <- samp %>% dplyr::select(Tissue2, SpeciesLatinName, Order) %>% distinct(.) %>% group_by(Tissue2, Order) %>% tally() %>% right_join(samp) %>% filter(n>=10) %>%group_split(Tissue2, Order) names(samples_tissue_order) <- sapply(samples_tissue_order, function(x){paste(x$Order[1], x$Tissue2[1], sep = "_")}) samples_order <- append(samples_order, samples_tissue) samples_order <- append(samples_order, samples_tissue_order) samples_order$all <- samplesNoMars result <- lapply(samples_order, function(y){ ME <- MEs[y$Basename,] lifeModel <- do.call(rbind,apply(ME, 2, function(x){ y$Eigengene <- x y <- y %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) y <- y %>% group_by(SpeciesCommonName) %>% summarise(Eigengene = mean(Eigengene), maximum_age=maximum_age) %>% distinct() a.lm <- lm(Eigengene ~ maximum_age, data = y) #a.lm <- lm(Eigengene ~ maximum_age+relativeAge+Tissue+Female, data = y) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() %>% mutate(cor=cor.test(y$Eigengene, y$maximum_age)$estimate[[1]]) })) }) # Summary lifeModel_order <- lapply(1:length(result), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(t_maximum_age) %>% tibble::rownames_to_column(var = "modules") names(a)[2] <- names(result)[x] return(a) }) lifeModel_order <- plyr::join_all(lifeModel_order) %>% tibble::column_to_rownames(var="modules") lifeModel_order <- t(lifeModel_order) # for order analysis make it 1e-3 #Annotation lifeModel_order_annot <- lapply(1:length(result), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(P_maximum_age, t_maximum_age) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_maximum_age<1e-3) %>% dplyr::select(-P_maximum_age) a <- a %>% dplyr::select(-P_maximum_age, -t_maximum_age) %>% left_join(b) names(a)[2] <- names(result)[x] return(a) }) lifeModel_order_annot <- plyr::join_all(lifeModel_order_annot) %>% tibble::column_to_rownames(var="modules") lifeModel_order_annot <- t(lifeModel_order_annot) lifeModel_order_annot[!is.na(lifeModel_order_annot)] = "*" lifeModel_order_annot[is.na(lifeModel_order_annot)] = "" lifeModel_order_annot <- lifeModel_order_annot[rownames(lifeModel_order), colnames(lifeModel_order)] # A bar plot of module frequency library(ComplexHeatmap) cols <- gsub("ME", "", colnames(lifeModel_order)) names(cols) <- colnames(lifeModel_order) ha = HeatmapAnnotation(Modules = colnames(lifeModel_order), col = list(Modules = cols), show_legend =FALSE) a <-c(1,3:4, 7:9, 2,32) Heatmap <- Heatmap(lifeModel_order[a,], col = my_palette, show_column_names = FALSE, name = "T score", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = FALSE, row_dend_reorder=FALSE, row_title="Phylogenetic orders", column_title = "Module association with max lifespan within phylogenetic orders", bottom_annotation = ha, clustering_method_rows ="complete", cell_fun = function(j, i, x, y, w, h, col) { grid.text(lifeModel_order_annot[a,][i, j], x, y, gp=gpar(fontsize=20))}) pdf(file = "Heatmap modules Max lifespan order.pdf", width = 16, height = 3) draw(Heatmap, heatmap_legend_side = "right", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ## Weight result2 <- lapply(samples_order, function(y){ ME <- MEs[y$Basename,] lifeModel <- do.call(rbind,apply(ME, 2, function(x){ y$Eigengene <- x y <- y %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) y <- y %>% group_by(SpeciesCommonName) %>% summarise(Eigengene = mean(Eigengene), average_weight=average_weight) %>% distinct() a.lm <- lm(Eigengene ~ average_weight, data = y) #a.lm <- lm(Eigengene ~ average_weight+relativeAge+Tissue+Female, data = y) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame()%>% mutate(cor=cor.test(y$Eigengene, y$average_weight)$estimate[[1]]) })) }) weightModel_order <- lapply(1:length(result2), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(t_average_weight) %>% tibble::rownames_to_column(var = "modules") names(a)[2] <- names(result)[x] return(a) }) weightModel_order <- plyr::join_all(weightModel_order) %>% tibble::column_to_rownames(var="modules") weightModel_order <- t(weightModel_order) # Annotation weightModel_order_annot <- lapply(1:length(result2), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(P_average_weight, t_average_weight) %>% tibble::rownames_to_column(var = "modules") b <- a%>% filter(P_average_weight<1e-3) %>% dplyr::select(-P_average_weight) a <- a %>% dplyr::select(-P_average_weight, -t_average_weight) %>% left_join(b) names(a)[2] <- names(result2)[x] return(a) }) weightModel_order_annot <- plyr::join_all(weightModel_order_annot) %>% tibble::column_to_rownames(var="modules") weightModel_order_annot <- t(weightModel_order_annot) weightModel_order_annot[!is.na(weightModel_order_annot)] = "*" weightModel_order_annot[is.na(weightModel_order_annot)] = "" weightModel_order_annot <- weightModel_order_annot[rownames(weightModel_order), colnames(weightModel_order)] # A bar plot of module frequency library(ComplexHeatmap) cols <- gsub("ME", "", colnames(weightModel_order)) names(cols) <- colnames(weightModel_order) ha = HeatmapAnnotation(Modules = colnames(weightModel_order), col = list(Modules = cols), show_legend =FALSE) Heatmap <- Heatmap(weightModel_order[a,], col = my_palette, show_column_names = TRUE, name = "T score", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = FALSE, row_dend_reorder=FALSE, row_title="Phylogenetic orders", column_title = "Module association with species weight within phylogenetic orders", bottom_annotation = ha, clustering_method_rows ="complete", cell_fun = function(j, i, x, y, w, h, col) { grid.text(weightModel_order_annot[a,][i, j], x, y, gp=gpar(fontsize=20))}) pdf(file = "Heatmap modules Weight order.pdf", width = 16, height = 4.3) draw(Heatmap, heatmap_legend_side = "right", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ```{r} lifeSpanSummary <- lapply(1:length(result), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(P_maximum_age,t_maximum_age, cor) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_maximum_age<1e-3) return(b) }) names(lifeSpanSummary) <- names(result) lifeSpanSummary <- rbindlist(lifeSpanSummary, idcol = "Order") %>% filter(P_maximum_age<1e-3)%>% mutate(c = ifelse(t_maximum_age>0, "+", ifelse(t_maximum_age<0, "-", NA))) %>% group_by(modules) %>% summarize(maximum_age.marginal = paste(Order, " (r=",round(cor,2),", p=" ,format(P_maximum_age, digits = 3), ")", sep = "", collapse = " ; ")) sum2 <- sum2 %>% left_join(lifeSpanSummary) weightModel_order_Summary <- lapply(1:length(result2), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(P_average_weight, t_average_weight, cor) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_average_weight<1e-3) return(b) }) names(weightModel_order_Summary) <- names(result) weightModel_order_Summary <- rbindlist(weightModel_order_Summary, idcol = "Order") %>% filter(P_average_weight<1e-3)%>% mutate(c = ifelse(t_average_weight>0, "+", ifelse(t_average_weight<0, "-", NA))) %>% group_by(modules) %>% summarize(average_weight.marginal = paste(Order, " (r=",round(cor,2),", p=" ,format(P_average_weight, digits = 3), ")", sep = "", collapse = " ; ")) sum2 <- sum2 %>% left_join(weightModel_order_Summary) ``` ```{r lifespan per order multivariate} samples_order <- samplesNoMars %>% dplyr::select(Order, SpeciesLatinName) %>% distinct(.) %>% group_by(Order) %>% tally() %>% right_join(samplesNoMars) %>% filter(n>=10) %>%group_split(Order) names(samples_order) <- sapply(samples_order, function(x){x$Order[1]}) samples_order$all <- samplesNoMars result <- sapply(samples_tissue, function(y){ ME <- MEs[y$Basename,] lifeModel <- do.call(rbind,apply(ME, 2, function(x){ y$Eigengene <- x y <- y %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) a.lm <- lm(Eigengene ~ maximum_age+relativeAge+Tissue+Female, data = y) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) }) # Summary lifeModel_order <- lapply(1:length(result), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(t_maximum_age) %>% tibble::rownames_to_column(var = "modules") names(a)[2] <- names(result)[x] return(a) }) lifeModel_order <- plyr::join_all(lifeModel_order) %>% tibble::column_to_rownames(var="modules") lifeModel_order <- t(lifeModel_order) # for order analysis make it 1e-3 #Annotation lifeModel_order_annot <- lapply(1:length(result), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(P_maximum_age, t_maximum_age) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_maximum_age<1e-3) %>% top_n(length(maxLifeModules),abs(t_maximum_age)) %>% dplyr::select(-P_maximum_age) a <- a %>% dplyr::select(-P_maximum_age, -t_maximum_age) %>% left_join(b) names(a)[2] <- names(result)[x] return(a) }) lifeModel_order_annot <- plyr::join_all(lifeModel_order_annot) %>% tibble::column_to_rownames(var="modules") lifeModel_order_annot <- t(lifeModel_order_annot) lifeModel_order_annot[!is.na(lifeModel_order_annot)] = "*" lifeModel_order_annot[is.na(lifeModel_order_annot)] = "" lifeModel_order_annot <- lifeModel_order_annot[rownames(lifeModel_order), colnames(lifeModel_order)] ## Weight result2 <- lapply(samples_order, function(y){ ME <- MEs[y$Basename,] lifeModel <- do.call(rbind,apply(ME, 2, function(x){ y$Eigengene <- x y <- y %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) a.lm <- lm(Eigengene ~ average_weight+relativeAge+Tissue+Female, data = y) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) }) weightModel_order <- lapply(1:length(result2), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(t_average_weight) %>% tibble::rownames_to_column(var = "modules") names(a)[2] <- names(result)[x] return(a) }) weightModel_order <- plyr::join_all(weightModel_order) %>% tibble::column_to_rownames(var="modules") weightModel_order <- t(weightModel_order) # Annotation weightModel_order_annot <- lapply(1:length(result2), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(P_average_weight, t_average_weight) %>% tibble::rownames_to_column(var = "modules") b <- a%>% filter(P_average_weight<1e-3) %>% top_n(length(weightModules),abs(t_average_weight)) %>% dplyr::select(-P_average_weight) a <- a %>% dplyr::select(-P_average_weight, -t_average_weight) %>% left_join(b) names(a)[2] <- names(result2)[x] return(a) }) weightModel_order_annot <- plyr::join_all(weightModel_order_annot) %>% tibble::column_to_rownames(var="modules") weightModel_order_annot <- t(weightModel_order_annot) weightModel_order_annot[!is.na(weightModel_order_annot)] = "*" weightModel_order_annot[is.na(weightModel_order_annot)] = "" weightModel_order_annot <- weightModel_order_annot[rownames(weightModel_order), colnames(weightModel_order)] ``` ```{r} lifeSpanSummary <- lapply(1:length(result), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(P_maximum_age, t_maximum_age) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_maximum_age<1e-3) return(b) }) names(lifeSpanSummary) <- names(result) lifeSpanSummary <- rbindlist(lifeSpanSummary, idcol = "Order") %>% filter(P_maximum_age<1e-3)%>% mutate(c = ifelse(t_maximum_age>0, "+", ifelse(t_maximum_age<0, "-", NA))) %>% group_by(modules) %>% summarize(maximum_age.MLR = paste(Order, " (",c,", p=" ,format(P_maximum_age, digits = 3), ")", sep = "", collapse = " ; ")) sum2 <- sum2 %>% left_join(lifeSpanSummary) weightModel_order_Summary <- lapply(1:length(result2), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(P_average_weight, t_average_weight) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_average_weight<1e-3) return(b) }) names(weightModel_order_Summary) <- names(result) weightModel_order_Summary <- rbindlist(weightModel_order_Summary, idcol = "Order") %>% filter(P_average_weight<1e-3)%>% mutate(c = ifelse(t_average_weight>0, "+", ifelse(t_average_weight<0, "-", NA))) %>% group_by(modules) %>% summarize(average_weight.MLR = paste(Order, " (",c,", p=" ,format(P_average_weight, digits = 3), ")", sep = "", collapse = " ; ")) sum2 <- sum2 %>% left_join(weightModel_order_Summary) ``` ```{r heatMap with arranged modules} # take the final summary table lifeModel_order <- lifeModel_order[,sum2$modules[which(sum2$group!="Marsupials")]] lifeModel_order_annot <- lifeModel_order_annot[,sum2$modules[which(sum2$group!="Marsupials")]] # Take sum2 from network analysis, I wanted to get the grouping from Netwrok analysis trCols <- as.character(sum2$color[which(sum2$group!="Marsupials")]) names(trCols) <- sum2$group[which(sum2$group!="Marsupials")] # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(lifeModel_order), Traits = sum2$group[which(sum2$group!="Marsupials")], col = list(Modules = cols, Traits = trCols), show_legend =FALSE) Heatmap <- Heatmap(lifeModel_order, col = my_palette, show_column_names = FALSE, name = "T score", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = FALSE, row_dend_reorder=FALSE, row_title="Phylogenetic orders", column_title = "Module association with max lifespan within phylogenetic orders", bottom_annotation = ha, clustering_method_rows ="complete", cell_fun = function(j, i, x, y, w, h, col) { grid.text(lifeModel_order_annot[i, j], x, y, gp=gpar(fontsize=20))}) jpeg(file = "Heatmap modules Max lifespan order.jpeg", width = 16, height = 3, units = "in", res = 300) draw(Heatmap, heatmap_legend_side = "right", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ## weight # take the final summary table weightModel_order <- weightModel_order[,sum2$modules[which(sum2$group!="Marsupials")]] weightModel_order_annot <- weightModel_order_annot[,sum2$modules[which(sum2$group!="Marsupials")]] # Take sum2 from network analysis, I wanted to get the grouping from Netwrok analysis trCols <- as.character(sum2$color[which(sum2$group!="Marsupials")]) names(trCols) <- sum2$group[which(sum2$group!="Marsupials")] # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(weightModel_order), Traits = sum2$group[which(sum2$group!="Marsupials")], col = list(Modules = cols, Traits = trCols), show_legend =FALSE) Heatmap <- Heatmap(weightModel_order, col = my_palette, show_column_names = TRUE, name = "T score", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = FALSE, row_dend_reorder=FALSE, row_title="Phylogenetic orders", column_title = "Module association with species weight within phylogenetic orders", bottom_annotation = ha, clustering_method_rows ="complete", cell_fun = function(j, i, x, y, w, h, col) { grid.text(weightModel_order_annot[i, j], x, y, gp=gpar(fontsize=20))}) jpeg(file = "Heatmap modules Weight order.jpeg", width = 16, height = 4.3, units = "in", res = 300) draw(Heatmap, heatmap_legend_side = "right", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ## Lifespan per Tissue ```{r} samplesArtiodactila <- samplesNoMars %>% filter(Order=="4.Artiodactyla") %>% mutate(Order=ifelse(grepl(paste("(", "4.", "", 11:23, ".", ")", sep = "", collapse = "|"), spec), "4.Artiodactyla.aquatic", "4.Artiodactyla.terrestrial")) samp <- bind_rows(samplesNoMars, samplesArtiodactila) samples_tissue <- samp %>% dplyr::select(Tissue2, SpeciesLatinName) %>% distinct(.) %>% group_by(Tissue2) %>% tally() %>% right_join(samp) %>% filter(n>=40) %>%group_split(Tissue2) names(samples_tissue) <- sapply(samples_tissue, function(x){x$Tissue2[1]}) samples_tissue$all <- samplesNoMars result <- lapply(samples_tissue, function(y){ ME <- MEs[y$Basename,] lifeModel <- do.call(rbind,apply(ME, 2, function(x){ y$Eigengene <- x y <- y %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) y <- y %>% group_by(SpeciesCommonName) %>% summarise(Eigengene = mean(Eigengene), maximum_age=maximum_age) %>% distinct() a.lm <- lm(Eigengene ~ maximum_age, data = y) #a.lm <- lm(Eigengene ~ maximum_age+relativeAge+Tissue+Female, data = y) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() %>% mutate(cor=cor.test(y$Eigengene, y$maximum_age)$estimate[[1]]) })) }) # Summary lifeModel_tissue <- lapply(1:(length(result)-1), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(t_maximum_age) %>% tibble::rownames_to_column(var = "modules") names(a)[2] <- names(result)[x] return(a) }) lifeModel_tissue <- plyr::join_all(lifeModel_tissue) %>% tibble::column_to_rownames(var="modules") lifeModel_tissue <- t(lifeModel_tissue) # for order analysis make it 1e-3 #Annotation lifeModel_tissue_annot <- lapply(1:(length(result)-1), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(P_maximum_age, t_maximum_age) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_maximum_age<1e-3) %>% dplyr::select(-P_maximum_age) a <- a %>% dplyr::select(-P_maximum_age, -t_maximum_age) %>% left_join(b) names(a)[2] <- names(result)[x] return(a) }) lifeModel_tissue_annot <- plyr::join_all(lifeModel_tissue_annot) %>% tibble::column_to_rownames(var="modules") lifeModel_tissue_annot <- t(lifeModel_tissue_annot) lifeModel_tissue_annot[!is.na(lifeModel_tissue_annot)] = "*" lifeModel_tissue_annot[is.na(lifeModel_tissue_annot)] = "" lifeModel_tissue_annot <- lifeModel_tissue_annot[rownames(lifeModel_tissue), colnames(lifeModel_tissue)] # A bar plot of module frequency library(ComplexHeatmap) colnames(lifeModel_tissue) <- gsub("ME", "", colnames(lifeModel_tissue)) cols <- colnames(lifeModel_tissue) names(cols) <- colnames(lifeModel_tissue) ha = HeatmapAnnotation(Modules = colnames(lifeModel_tissue), col = list(Modules = cols), show_legend =FALSE) a <-c(1,3:4, 7:9, 2,32) Heatmap <- Heatmap(lifeModel_tissue, col = my_palette, show_column_names = T, name = "T score", show_row_names = TRUE, column_dend_reorder = T, cluster_columns = T, cluster_rows = T, row_dend_reorder=T, row_title="Tissue", column_title = "Modules marginal association with max lifespan within tissues (p<0.001)", bottom_annotation = ha, clustering_method_rows ="complete", cell_fun = function(j, i, x, y, w, h, col) { grid.text(lifeModel_tissue_annot[i, j], x, y, gp=gpar(fontsize=20))}, show_column_dend = F, show_row_dend = F) pdf(file = "Heatmap modules Max lifespan tissue.pdf", width = 16, height = 4) draw(Heatmap, heatmap_legend_side = "right", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ## Weight result2 <- lapply(samples_tissue, function(y){ ME <- MEs[y$Basename,] lifeModel <- do.call(rbind,apply(ME, 2, function(x){ y$Eigengene <- x y <- y %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) y <- y %>% group_by(SpeciesCommonName) %>% summarise(Eigengene = mean(Eigengene), average_weight=average_weight) %>% distinct() a.lm <- lm(Eigengene ~ average_weight, data = y) #a.lm <- lm(Eigengene ~ average_weight+relativeAge+Tissue+Female, data = y) d <- as.data.frame(coef(summary(a.lm))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Estimate, SE = `Std. Error`, t = `t value`, P = `Pr(>|t|)`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame()%>% mutate(cor=cor.test(y$Eigengene, y$average_weight)$estimate[[1]]) })) }) weightModel_tissue <- lapply(1:(length(result2)-1), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(t_average_weight) %>% tibble::rownames_to_column(var = "modules") names(a)[2] <- names(result)[x] return(a) }) weightModel_tissue <- plyr::join_all(weightModel_tissue) %>% tibble::column_to_rownames(var="modules") weightModel_tissue <- t(weightModel_tissue) # Annotation weightModel_tissue_annot <- lapply(1:(length(result2)-1), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(P_average_weight, t_average_weight) %>% tibble::rownames_to_column(var = "modules") b <- a%>% filter(P_average_weight<1e-3) %>% dplyr::select(-P_average_weight) a <- a %>% dplyr::select(-P_average_weight, -t_average_weight) %>% left_join(b) names(a)[2] <- names(result2)[x] return(a) }) weightModel_tissue_annot <- plyr::join_all(weightModel_tissue_annot) %>% tibble::column_to_rownames(var="modules") weightModel_tissue_annot <- t(weightModel_tissue_annot) weightModel_tissue_annot[!is.na(weightModel_tissue_annot)] = "*" weightModel_tissue_annot[is.na(weightModel_tissue_annot)] = "" weightModel_tissue_annot <- weightModel_tissue_annot[rownames(weightModel_tissue), colnames(weightModel_tissue)] # A bar plot of module frequency library(ComplexHeatmap) colnames(weightModel_tissue) <- gsub("ME", "", colnames(weightModel_tissue)) cols <- gsub("ME", "", colnames(weightModel_tissue)) names(cols) <- colnames(weightModel_tissue) ha = HeatmapAnnotation(Modules = colnames(weightModel_tissue), col = list(Modules = cols), show_legend =FALSE) Heatmap <- Heatmap(weightModel_tissue, col = my_palette, show_column_names = TRUE, name = "T score", show_row_names = TRUE, column_dend_reorder = T, cluster_columns = T, cluster_rows = T, row_dend_reorder=T, row_title="Tissue", column_title = "Modules marginal association with species weight within tissues (p<0.001)", bottom_annotation = ha, clustering_method_rows ="complete", cell_fun = function(j, i, x, y, w, h, col) { grid.text(weightModel_tissue_annot[i, j], x, y, gp=gpar(fontsize=20))}, show_column_dend = F, show_row_dend = F) pdf(file = "Heatmap modules Weight order.pdf", width = 16, height = 4) draw(Heatmap, heatmap_legend_side = "right", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ```{r} lifeSpanSummary <- lapply(1:length(result), function(x){ nam <- names(result)[x] a <- result[[x]] %>% dplyr::select(P_maximum_age,t_maximum_age, cor) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_maximum_age<1e-3) return(b) }) names(lifeSpanSummary) <- names(result) lifeSpanSummary <- rbindlist(lifeSpanSummary, idcol = "Order") %>% filter(P_maximum_age<1e-3)%>% mutate(c = ifelse(t_maximum_age>0, "+", ifelse(t_maximum_age<0, "-", NA))) %>% group_by(modules) %>% summarize(maximum_age.marginal = paste(Order, " (r=",round(cor,2),", p=" ,format(P_maximum_age, digits = 3), ")", sep = "", collapse = " ; ")) sum2 <- sum2 %>% left_join(lifeSpanSummary) weightModel_tissue_Summary <- lapply(1:length(result2), function(x){ nam <- names(result2)[x] a <- result2[[x]] %>% dplyr::select(P_average_weight, t_average_weight, cor) %>% tibble::rownames_to_column(var = "modules") b <- a %>% filter(P_average_weight<1e-3) return(b) }) names(weightModel_tissue_Summary) <- names(result) weightModel_tissue_Summary <- rbindlist(weightModel_tissue_Summary, idcol = "Order") %>% filter(P_average_weight<1e-3)%>% mutate(c = ifelse(t_average_weight>0, "+", ifelse(t_average_weight<0, "-", NA))) %>% group_by(modules) %>% summarize(average_weight.marginal = paste(Order, " (r=",round(cor,2),", p=" ,format(P_average_weight, digits = 3), ")", sep = "", collapse = " ; ")) sum2 <- sum2 %>% left_join(weightModel_tissue_Summary) write.csv(sum2, "summary.csv") ``` ## Phylogenetic regression ```{r phylogenetic regression} # You need my function to trim the tree, matching to your data frame trimPhyloTree <- function(samples = NULL, tree = NULL) { library(ape) # Create necessary tree lengths and species labels if not existing already if(is.null(tree$edge.length)) { tree = compute.brlen(tree, method = 'Grafen') } if(! "labelsCaesar" %in% colnames(samples) & ! "SpeciesLatinName" %in% colnames(samples)) { samples$SpeciesLatinName = rownames(samples) } if(! "labelsCaesar" %in% colnames(samples)) { samples$labelsCaesar <- as.labelsCaesar(samples$SpeciesLatinName) } rownames(samples) = samples$labelsCaesar dat.dropped = samples[samples$labelsCaesar %in% tree$tip.label, ] rownames(dat.dropped) = dat.dropped$labelsCaesar tree.dropped = keep.tip(tree, dat.dropped$labelsCaesar) dat.dropped = dat.dropped[tree.dropped$tip.label, ] return(list(dat.dropped, tree.dropped)) } library(ape) library(nlme) library(geiger) # Get Tree tree = read.tree("Mammalia_species.nwk") phyloTree <- ape::read.nexus("Mammalia_species") # Change these names/synonyms to match our species names phyloTree$tip.label[phyloTree$tip.label == "Tragelaphus_oryx"] = "Taurotragus_oryx" phyloTree$tip.label[phyloTree$tip.label == "Aonyx_cinerea"] = "Aonyx_cinereus" phyloTree$tip.label[phyloTree$tip.label == "Physeter_catodon"] = "Physeter_macrocephalus" phyloTree$tip.label[phyloTree$tip.label == "Equus_burchellii"] = "Equus_quagga" phyloTree$tip.label[phyloTree$tip.label == "Fukomys_damarensis"] = "Cryptomys_damarensis" phyloTree$tip.label[phyloTree$tip.label == "Erethizon_dorsatum"] = "Erethizon_dorsatus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_ehrenbergi"] = "Spalax_ehrenbergi" phyloTree$tip.label[phyloTree$tip.label == "Equus_asinus"] = "Equus_africanus" phyloTree$tip.label[phyloTree$tip.label == "Nannospalax_galili"] = "Spalax_galili" phyloTree$tip.label[phyloTree$tip.label == "Cervus_nippon"] = "Cervus_canadensis" # samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,Tissue, relativeAge, Female,labelsCaesar) %>% filter(complete.cases(.)) # # # adjust for tissue difference # res <- as.data.frame(sapply(names(MEs), function(x){ # i <- grep(pattern = paste("^",x, "$", sep = ""), colnames(MEs)) # samp <- samplesNoMars %>% mutate(Tissue = as.factor(Tissue)) %>% dplyr::select(maximum_age,Tissue, relativeAge, Female) %>% mutate(Eigengene = MEs[,i])%>% filter(complete.cases(.)) # a.lm <- lm(Eigengene ~ Tissue+relativeAge+Female, data = samp) # # x <- a.lm$residuals # return(x) # })) samp <- samplesNoMars %>% mutate(maximum_age = log(maximum_age)) %>% mutate(average_weight = log(average_weight)) phyloModel <- do.call(rbind,apply(MEs, 2, function(x){ samp$Eigengene <- x dt <- samp %>% group_by(labelsCaesar) %>% summarize(Eigengene = mean(Eigengene)) %>% ungroup() %>% left_join(samp[,c("labelsCaesar", "maximum_age")]) %>% distinct() %>% filter(labelsCaesar%in%phyloTree$tip.label) temp = trimPhyloTree(dt, tree) dt = temp[[1]]; tree = temp[[2]] fit <- gls(maximum_age ~ Eigengene, correlation = corBrownian(phy = tree), data = dt) d <- as.data.frame(coef(summary(fit))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Value, SE = `Std.Error`, t = `t-value`, P = `p-value`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(phyloModel[,grepl( "P_" , names(phyloModel) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") phyloModel <- cbind(phyloModel, pvals) phyloModel2 <- phyloModel %>% dplyr::select(ends_with("Eigengene"))%>% tibble::rownames_to_column(var = "modules") phyloModel2$modules = with(phyloModel2, reorder(modules, t_Eigengene)) phyloModules <- phyloModel2 %>% filter(P_Eigengene<1.4e-07) phyloModules <- as.character(phyloModules$modules) # plotting independent contrast plots <- lapply(phyloModules, function(x){ dt <- samp %>% mutate(module=MEs[,grep(pattern = x, colnames(MEs))])%>% filter(labelsCaesar%in%phyloTree$tip.label) %>% group_by(labelsCaesar) %>% summarize(module = mean(module)) %>% ungroup() %>% left_join(samp[,c("labelsCaesar", "maximum_age")]) %>% distinct() %>% mutate(maximum_age = log(maximum_age)) temp = trimPhyloTree(dt, tree) dt = temp[[1]]; tree = temp[[2]] tree <- makeNodeLabel(tree, prefix = "node") datTree <- data.frame(nodes = tree$node.label) %>% mutate(module = pic(dt$module, tree)) %>% mutate(maximum_age = pic(dt$maximum_age, tree)) %>% gather(module, key = "modules", value = "eigengene") %>% mutate(modules=paste(x)) p2 <- datTree %>% ggplot(aes(x= maximum_age, y=eigengene))+geom_point_rast(alpha = 0.5)+geom_smooth(method = "lm")+ggpubr::stat_cor(size=5)+facet_wrap(.~modules, nrow = 2, scales = "free_y")+theme_classic(base_size = 15)+ylab("Phylogenetic idependent contrast eigengene")+xlab("log maximum lifespan")+theme(legend.position = "none", axis.title.y = element_text(size = 12), plot.title = element_text(size=15))+ggtitle("Phylogenetic regression \nmaximum lifespan") }) p <- ggpubr::ggarrange(plotlist = plots, nrow = 2, ncol = 1) jpeg(file = "phylogenetic regression modules.jpeg", width = 4, height = 8, units = "in", res = 300) p dev.off() # weight analysis phyloWeight <- do.call(rbind,apply(MEs, 2, function(x){ samp$Eigengene <- x dt <- samp %>% group_by(labelsCaesar) %>% summarize(Eigengene = mean(Eigengene)) %>% ungroup() %>% left_join(samp[,c("labelsCaesar", "average_weight")]) %>% distinct() %>% filter(labelsCaesar%in%phyloTree$tip.label)%>% filter(!duplicated(labelsCaesar)) temp = trimPhyloTree(dt, tree) dt = temp[[1]]; tree = temp[[2]] fit <- gls(average_weight ~ Eigengene, correlation = corBrownian(phy = tree), data = dt) d <- as.data.frame(coef(summary(fit))) d <- d %>% add_rownames(var = "factor") %>% dplyr::rename(Beta = Value, SE = `Std.Error`, t = `t-value`, P = `p-value`) %>% gather(key = "measure", value = "value", Beta, SE, t, P) %>% mutate(factor = paste(measure,factor, sep = "_")) %>% dplyr::select(-measure) %>% tibble::column_to_rownames('factor') %>% t() %>% as.data.frame() })) pvals <- as.data.frame(apply(phyloWeight[,grepl( "P_" , names(phyloWeight) )], 2, function(x){ fdr <- p.adjust(x, method = "fdr") })) names(pvals) <- paste(names(pvals), "fdr", sep = "_") phyloWeight <- cbind(phyloWeight, pvals) phyloWeight2 <- phyloWeight %>% dplyr::select(ends_with("Eigengene"))%>% tibble::rownames_to_column(var = "modules") phyloWeight2$modules = with(phyloWeight2, reorder(modules, t_Eigengene)) phyloWeightModules <- phyloWeight2 %>% filter(P_Eigengene<1e-4) phyloWeightModules <- as.character(phyloWeightModules$modules) ``` ```{r} lifeSpanSummary <- phyloModel2 %>% dplyr::select(modules, P_Eigengene, t_Eigengene) %>% filter(P_Eigengene<0.001) lifeSpanSummary <- lifeSpanSummary%>% mutate(c = ifelse(t_Eigengene>0, "+", ifelse(t_Eigengene<0, "-", NA))) %>% group_by(modules) %>% summarize(maximum_age.phylo = paste("all", " (",c,", p=" ,format(P_Eigengene, digits = 3), ")", sep = "", collapse = " ; ")) sum2 <- sum2 %>% left_join(lifeSpanSummary) weightSummary <- phyloWeight2 %>% dplyr::select(modules, P_Eigengene, t_Eigengene) %>% filter(P_Eigengene<0.001) weightSummary <- weightSummary%>% mutate(c = ifelse(t_Eigengene>0, "+", ifelse(t_Eigengene<0, "-", NA))) %>% group_by(modules) %>% summarize(average_weight.phylo = paste("all", " (",c,", p=" ,format(P_Eigengene, digits = 3), ")", sep = "", collapse = " ; ")) sum2 <- sum2 %>% left_join(weightSummary) ``` ## Dog breeds ```{r plot for Dog breeds} dogSamples <- read.csv("~/Steve Horvath Lab Dropbox/Amin Haghani/Individual projects/N12.2018-9238DogBloodOstrander/SampleSheetAgeN12final.csv") %>% filter(CanBeUsedForAgingStudies=="yes") %>% mutate(DogBreed=as.factor(DogBreed)) %>% mutate(breeds = factor(DogBreed, labels=1:93)) %>% mutate(`Dog breed` = factor(DogBreed, labels = paste(levels(breeds), levels(DogBreed), sep = ". "))) #targVars <- c("Lifespan.HighClubBreeder", "Weight.kg") targVars <- c("LifespanUpperHorvath", "Weight.kg.avg") #targetModules <- unique(c(maxLifeModules, as.character(weightModules), as.character(ageModules), maxLifeModulesMarginal, weightModulesMarg, phyloModules)) #datMEs <- MEs[dogSamples$Basename,targetModules] datMEs <- MEs[dogSamples$Basename,] %>% tibble::rownames_to_column(var = "Basename") %>% gather(-Basename, key="modules", value="bval") %>% left_join(dogSamples) %>% group_by(DogBreed, modules) %>% summarize(bval=mean(bval)) %>% ungroup() %>% spread(key=modules, value=bval) %>% filter(!is.na(DogBreed)) %>% tibble::column_to_rownames(var = "DogBreed") datTraits <- dogSamples %>% dplyr::select(DogBreed, targVars) %>% filter(!duplicated(DogBreed)) %>% arrange(DogBreed)%>% tibble::column_to_rownames("DogBreed") moduleTraitCor = corAndPvalue(datMEs,datTraits)$cor moduleTraitPvalue= corAndPvalue(datMEs,datTraits)$p summaryResults <- as.data.frame(moduleTraitPvalue) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(moduleTraitPvalue, format = "e", digits = 0), sep = "")) summaryResults3 <- summaryResults2 %>% filter(pval<0.005) %>% arrange(pval) dogModules <- unique(summaryResults3$modules)[c(3:2)] samp <- datMEs %>% tibble::rownames_to_column(var = "DogBreed")%>% gather(key = "modules", value = "eigengene", starts_with("ME")) %>% left_join(dplyr::select(.data=dogSamples, DogBreed,targVars))%>% mutate(DogBreed=as.factor(DogBreed)) %>% mutate(breeds = factor(DogBreed, labels=1:93)) %>% mutate(`Dog breed` = factor(DogBreed, labels = paste(levels(breeds), levels(DogBreed), sep = ". "))) %>% distinct() qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) set.seed(1998) breedCols <- c(sample(col_vector, length(levels(samp$DogBreed)), replace = T)) names(breedCols) <- levels(samp$DogBreed) # p1 <- samp%>% filter(modules%in%dogModules)%>% mutate(modules=factor(modules, levels=dogModules, labels = gsub("ME", "", dogModules))) %>% ggplot(aes(x= LifespanMedianHorvath, y=eigengene))+geom_text(aes(label=breeds, color=`Dog breed`), size=9)+geom_smooth(method = "lm")+facet_wrap(.~modules, nrow = 1, scales = "free_y")+ggpubr::stat_cor(size=8, color="blue")+theme_classic(base_size = 25)+scale_color_manual(values = rep("#FF9999", 93))+ylab("Mean Eigengene per breed")+xlab("Median Life expectency (years) for Dog Breeds")+theme(legend.position = "right", axis.title.y = element_text(size = 20), strip.text = element_text(size=27), legend.text = element_text(size=20))+guides(color = guide_legend(ncol=4))+labs(color="Dog breed") p2 <- samp%>% filter(modules%in%dogModules)%>% mutate(modules=factor(modules, levels=dogModules, labels = gsub("ME", "", dogModules)))%>% ggplot(aes(x= Weight.kg.avg, y=eigengene))+geom_text(aes(label=breeds, color=`Dog breed`), size=9)+geom_smooth(method = "lm")+facet_wrap(.~modules, nrow = 1, scales = "free_y")+ggpubr::stat_cor(size=8, color="blue")+theme_classic(base_size = 25)+scale_color_manual(values = rep("#FF9999", 93))+ylab("Mean Eigengene per breed")+xlab("Average dog breed weight (kg)")+theme(legend.position = "right", axis.title.y = element_text(size = 20), strip.text = element_text(size=27))+guides(color = guide_legend(ncol=4), legend.text = element_text(size=20))+labs(color="Dog breed") p3 <- samp%>% filter(modules%in%dogModules)%>% mutate(modules=factor(modules, levels=dogModules, labels = gsub("ME", "", dogModules))) %>% ggplot(aes(x= LifespanUpperHorvath, y=eigengene))+geom_text(aes(label=breeds, color=`Dog breed`), size=9)+geom_smooth(method = "lm")+facet_wrap(.~modules, nrow = 1, scales = "free_y")+ggpubr::stat_cor(size=8, color="blue")+theme_classic(base_size = 25)+scale_color_manual(values = rep("#FF9999", 93))+ylab("Mean Eigengene per breed")+xlab("Upper Life expectency (years) for Dog Breeds")+theme(legend.position = "right", axis.title.y = element_text(size = 20), strip.text = element_text(size=27), legend.text = element_text(size=20))+guides(color = guide_legend(ncol=4))+labs(color="Dog breed") p <- ggpubr::ggarrange(p3, p2, nrow = 2, common.legend = T, legend = "right") pdf(file = "Dog breed modules.pdf", width = 35, height = 12) p dev.off() ``` ```{r} dogResultSummary <- summaryResults2 %>% filter(pval<0.05)%>% mutate(text=paste("(r=",round(r,2),", p=", formatC(pval, format = "e", digits = 0), ")", sep = "")) %>% dplyr::select(-r, -pval) %>% spread(key = var, value=text) sum2 <- sum2 %>% left_join(dogResultSummary) ``` ```{r summary dog breeds} dogSamples <- read.csv("~/Steve Horvath Lab Dropbox/Amin Haghani/Individual projects/N12.2018-9238DogBloodOstrander/SampleSheetAgeN12final.csv") %>% dplyr::select(DogBreed, SpeciesLatinName, Age, Tissue, "LifespanUpperHorvath", "LifespanMedianHorvath", "Weight.kg.avg") %>% group_by(DogBreed) %>% add_count() %>% summarize(SpeciesLatinName=SpeciesLatinName, Age =paste("(", min(Age), "-",max(Age), ")", sep = ""), Tissues = paste(unique(Tissue), collapse = "; "), Lifespan.Upper = max(LifespanUpperHorvath), Lifespan.Median = max(LifespanMedianHorvath), averageWeightKg = max(Weight.kg.avg), sampleSize=n) %>% distinct() write.csv(dogSamples, "dog samples.csv") ``` ## Network view ```{r} association <- KMEs %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) modules <- unique(c(tissueModules, orderModules$modules, as.character(ageModules), as.character(maxLifeModulesMarginal), as.character(weightModulesMarg),as.character(maxLifeModules), as.character(weightModules),phyloModules,phyloWeightModules, sexModule)) modules <- gsub("ME", "", modules) unknown <- association$moduleColors[which(!association$moduleColors%in%modules)] net <- bind_rows(orderModules, data.frame(modules = as.character(tissueModules), var = "Tissue"), data.frame(modules = as.character(ageModules), var = "Age"), data.frame(modules = unique(c(as.character(maxLifeModules),as.character(maxLifeModulesMarginal), phyloModules)), var = "Max age"), data.frame(modules = unique(c(as.character(weightModules),as.character(weightModulesMarg), phyloWeightModules)), var = "Species weight"), data.frame(modules = as.character(sexModule), var = "Sex"), data.frame(modules = as.character(interventionModules), var = "InterventionMarker"), data.frame(modules = as.character(unknown), var = "unclear")) %>% dplyr::rename("source" = modules, destination=var)%>% mutate(destination = factor(destination, levels = c("Tissue", unique(orderModules$var), "Age", "Max age","Species weight","Sex","InterventionMarker", "unclear")))%>% tibble::rowid_to_column("id") sources <- net %>%distinct(source) %>%dplyr::rename(label = "source") destinations <- net %>% distinct(destination) %>% dplyr::rename(label = destination) nodes <- full_join(sources, destinations, by = "label")%>% tibble::rowid_to_column("id") per_route <- net %>% group_by(source, destination) %>% summarise(weight = n()) %>% ungroup() %>% dplyr::select(-destination) edges <- per_route %>% left_join(nodes, by = c("source" = "label")) %>% dplyr::rename(from = id) edges <- edges %>% left_join(nodes, by = c("destination" = "label")) %>% dplyr::rename(to = id) %>% dplyr::select(from, to, weight) # library("igraph") # routes_igraph <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE) # routes_igraph # plot(routes_igraph, edge.arrow.size = 0.2) # # # library(tidygraph) # library(ggraph) # routes_tidy <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE) # routes_igraph_tidy <- as_tbl_graph(routes_igraph) # # routes_tidy %>% # activate(edges) %>% # arrange(desc(weight)) # # ggraph(routes_tidy, layout = "graphopt") + # geom_node_point(size=8, alpha = 0.1) + # geom_edge_link(aes(width = weight), alpha = 0.1) + # scale_edge_width(range = c(0.2, 2)) + # geom_node_text(aes(label = label), repel = TRUE) + # labs(edge_width = "Letters") + # theme_graph() library(visNetwork) library(networkD3) net2 <- net %>% group_by(source) %>% summarize(group = paste(destination, collapse = " ; ")) %>% dplyr::rename(label = source) nodes2 <- nodes %>% mutate(shape = ifelse(grepl("^ME", label), "circle", "square")) %>% mutate(shadow = ifelse(grepl("^ME", label), FALSE, TRUE)) %>% mutate(value = ifelse(grepl("^ME", label), 5, 7)) %>% mutate(font.size = 20) %>%left_join(net2, by="label") %>% mutate(group = ifelse(is.na(group)|label=="unknown", "variable", group)) %>% mutate(label =gsub("ME", "", label)) library(RColorBrewer) qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) set.seed(2022) cols <- c(sample(col_vector, length(unique(nodes2$group)), replace = FALSE)) cols[which(unique(nodes2$group)=="variable")] <- "#0080FF" nodes2 <- nodes2 %>%mutate(group = as.factor(group)) %>% mutate(color = factor(group,levels=levels(group), labels = cols)) %>% mutate(value = 10) labs <- nodes2$label[c(1:65)] nodes2 <- nodes2 %>% mutate(col2 = ifelse(label%in%labs, label, "#0080FF"))%>% mutate(col2 = col2hex(col2))%>% mutate(group2 = ifelse(label%in%c("Age"), "Age", ifelse(label%in%c(as.character(orders2), "Marsupials"), "Order", ifelse(label%in%c(unique(samplesNoMars$Tissue2),unique(samplesNoMars$Tissue), "Cortex", "Cerebellum"), "Tissue", ifelse(label%in%c("Max age", "Species weight"), "SpeciesLevel", label))))) %>% mutate(col3 = ifelse(group!="variable",col2, ifelse(group2=="Age", "#FC8D62", ifelse(group2=="Order", "#1B9E77", ifelse(group2=="Sex", "#CAB2D6", ifelse(group2=="SpeciesLevel", "#E6AB02", ifelse(group2=="Tissue", "#D95F02", "#0080FF"))))))) #write.csv(nodes2, "nodes2.csv") pdf("networkLegened.pdf", width = 5, height = 6) nodes2 %>% ggplot(aes(x= value, fill = group))+geom_histogram()+scale_fill_manual(values = levels(nodes2$color)) dev.off() network <- visNetwork(nodes2, edges, width = "100%", height = "1000px") %>% visIgraphLayout(layout = "layout_with_kk") %>% visEdges(arrows = "middle") network %>% visSave(file = "network.html") visExport(network, type = "pdf", name = "network") ## netwrokD3 nodes_d3 <- mutate(nodes2, id = id - 1) edges_d3 <- mutate(edges, from = from - 1, to = to - 1) %>% mutate(id = from) %>% left_join(nodes_d3[,c("id", "color")]) %>%dplyr::rename(edges_col = color) %>% dplyr::select(-id) forceNetwork(Links = edges_d3, Nodes = nodes_d3, Source = "from", Target = "to", NodeID = "label", Group = "group", Value = "weight", opacity = 1, fontSize = 6, zoom = TRUE, charge = -1, linkDistance = 60, opacityNoHover=1) ## igraph library(igraph) net <- graph_from_data_frame(d = edges, vertices = nodes2, directed = TRUE) plot(net, edge.arrow.size=.1, edge.curved=.1, vertex.label.cex=.5) l <- layout_with_kk(net) layouts <- grep("^layout_", ls("package:igraph"), value=TRUE)[-1] l <- layout_with_fr(net, niter = 500, grid = "auto", dim=2) plot(net, edge.arrow.size=.1, edge.curved=.1, vertex.label.cex=.3, layout=l) pdf("network.pdf", width = 10, height = 6) plot(net, edge.arrow.size=.2, edge.curved=.1, vertex.label.cex=1, layout=l) dev.off() library(RCy3) createNetworkFromIgraph(net,"myIgraph") ``` ## RNAseq analysis ```{r} fortifyCCA <- function(model, data, axes = 1:6, display = c("sp", "wa", "lc", "bp", "cn"), ...) { ## extract scores scrs <- scores(model, choices = axes, display = display, ...) ## handle case of only 1 set of scores if (length(display) == 1L) { scrs <- list(scrs) nam <- switch(display, sp = "species", species = "species", wa = "sites", sites = "sites", lc = "constraints", bp = "biplot", cn = "centroids", stop("Unknown value for 'display'")) names(scrs) <- nam } miss <- vapply(scrs, function(x ) all(is.na(x)), logical(1L)) scrs <- scrs[!miss] nams <- names(scrs) nr <- vapply(scrs, FUN = NROW, FUN.VALUE = integer(1)) df <- do.call('rbind', scrs) rownames(df) <- NULL df <- as.data.frame(df) df <- cbind(Score = factor(rep(nams, times = nr)), Label = unlist(lapply(scrs, rownames), use.names = FALSE), df) df } ``` ```{r RNAseq analysis} library("vegan") HorsegeneMap <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Horse.Equus_caballus.EquCab3.0.100.Amin.V4.RDS") HorsegeneMap$seqnames <- as.factor(HorsegeneMap$seqnames) RNA <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Individual mammalian projects/N23.2019-9032HorsesCarrieFinno/Horse Transcriptome data.RDS") RNA <- RNA %>% tibble::column_to_rownames(var = "SYMBOL") DNA <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Individual mammalian projects/N23.2019-9032HorsesCarrieFinno/DNAm for RNAseq.RDS") human = readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V5.RDS") background1 <- human %>% dplyr::select(seqnames, CGstart, CGend, CGid) %>% filter(!is.na(CGstart)) %>% dplyr::rename(CHR = seqnames) %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes")])%>% setnames(new = c("chr", "start", "end","CGid")) RNAzScore <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Individual mammalian projects/N23.2019-9032HorsesCarrieFinno/DNAm RNAseq correlations for neighboring genes.RDS") %>% filter(CGid%in%background1$CGid) %>% dplyr::select(CGid, z, pval, SYMBOL, annotation, CpGisland) RNAzScore <- RNAzScore[order(match(RNAzScore$CGid,background1$CGid)),] #write.csv(RNAzScore, "DNA RNA relationship.csv") top10 <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(10, abs(KME))) }) names(top10) <- sapply(top10, function(x){x$modules[1]}) result <- plyr::llply(top10, function(x){ horser <- HorsegeneMap %>% filter(CGid%in%x$CGid) DNAr <- DNA %>% filter(rownames(.)%in%horser$CGid) %>% t(.) RNAr <- RNA %>% filter(rownames(.)%in%horser$SYMBOL) %>% t(.) ccar <- cca(DNAr, RNAr) fdatr <- fortifyCCA(ccar)%>% filter(Score=="sites") %>% mutate(Label = gsub("(AH1)|(AH2)", "", Label)) %>% setnames(new = gsub("^CA", "CCA", colnames(.))) }) corResult <- plyr::llply(result, function(i){ x <- i colnames(x) <- gsub("^CA", "CCA", colnames(x)) a <- as.numeric(x$CCA1) b <- as.numeric(x$CCA2) c <- cor.test(a, b) d <- data.frame(r = c$estimate, pval = c$p.value) }) corResult <- rbindlist(corResult, idcol = "modules") corResult$modules = with(corResult, reorder(modules, -pval)) corResult <- corResult %>% mutate(highlight= ifelse(pval<0.05, "yes", NA)) yint <- corResult %>% mutate(direction = ifelse(r>0,"+", "-")) %>% group_by(direction) %>% filter(pval<0.05) %>% top_n(1, -abs(r))%>% ungroup() p1 <- corResult %>% ggplot(aes(y=r, x=modules, color=-log10(pval)))+geom_point_rast()+scale_color_gradient(low="blue", high = "red")+theme_classic()+theme(axis.text.x = element_text(angle=90, hjust=1, vjust = 0.5), plot.title = element_text(hjust = 0.5))+ylab("Canonical correlation")+ggtitle("Canonical correlation analysis \ntop 10 hub CpGs vs mRNA in horse tissues")+geom_hline(yintercept = yint$r, linetype="dashed",color = "red") pdf("CCA RNA DNA.pdf", width = 14, height = 4) p1 dev.off() # plot input <- rbindlist(result, idcol = "modules") %>% filter(modules%in%corResult$modules[which(corResult$highlight=="yes")]) annot <- input %>% group_by(modules) %>% top_n(4, CCA1) annot <- input %>% group_by(modules) %>% top_n(4, -CCA1) %>% bind_rows(annot) p2 <- input %>% ggplot(aes(x=CCA1, y=CCA2)) +geom_point_rast(alpha=1, aes(color=Label), size=4)+facet_wrap(.~modules, scales = "free", nrow = 3)+geom_smooth(method = "lm")+ylab("mRNA 1st PCA")+xlab("DNAm 1st PCA")+theme_classic(base_size = 20)+ggpubr::stat_cor(label.x.npc = "left", label.y.npc = "bottom", size =6, color ="red")+theme(legend.position = "bottom", strip.text = element_text(size = 20))+geom_text_repel(data = annot, aes(label = Label))+guides(col = guide_legend(nrow = 3, byrow = TRUE)) jpeg("CCA RNA DNA correlation.jpeg", width = 25, height = 12, units = "in", res = 300) p2 dev.off() ``` ```{r summ} horseCorSum <- corResult%>% mutate(direction = paste("(r=",round(r,2),", p=" ,format(pval, digits = 3), ")", sep = "")) %>% mutate(direction=ifelse(highlight=="yes", direction, NA))%>% dplyr::select(modules, direction) %>% mutate(modules = paste("ME", modules, sep = "")) sum2 <- sum2%>% left_join(horseCorSum) %>% dplyr::rename(mRNAcorrelationInHorseTissues=direction) ``` ```{r add top 10 hub CpGs and genes} top10 <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(10, abs(KME))) }) names(top10) <- sapply(top10, function(x){x$modules[1]}) geneMap <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V5.RDS") top10Sum <- rbindlist(top10) %>% left_join(geneMap[,c("CGid", "SYMBOL")])%>% mutate(modules = paste("ME", modules, sep = "")) %>% group_by(modules) %>% summarize(KME = paste(round(KME,2), collapse = " ; "), hubCGid = paste(CGid, collapse = " ; "), hubGenes = paste(SYMBOL, collapse = " ; ")) sum2 <- sum2 %>% left_join(top10Sum) ``` ```{r add top 500 hub CpGs and genes} top500 <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(500, abs(KME))) }) top500 <- rbindlist(top500) %>% left_join(human) %>% left_join(mouse) #write.csv(top500, "top500KME.csv") ``` ## Identify Intervention modules biomarkers of lifespan ```{r Identify biomarkers of lifespan} # #Parabiosis experiment # N74data <- loadRData("~/Steve Horvath Lab Dropbox/Amin Haghani/Individual projects/N74.2020-9185MouseParabiosisGladyshevBohan/NormalizedData/all_probes_sesame_normalized.Rdata") %>% tibble::column_to_rownames("CGid") # # N74data <- N74data[rownames(KMEs),] # #identical(rownames(N74data), rownames(KMEs)) # # N74data <- t(N74data) # N74MEs = moduleEigengenes(N74data, colors = mergedColors) # N74MEs = N74MEs$eigengenes # N74MEs <- N74MEs[,colnames(MEs)] # # # identical(colnames(N74MEs), colnames(MEs)) # # MEsIntervention <- rbind(MEs, N74MEs) MEsIntervention <- MEs parabiosisSamples <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40/N74.2020-9185MouseParabiosisGladyshevBohan/SampleSheetAgeN74final.csv") %>% mutate(treatment = factor(Condition, levels = c("Young mouse connected to young mouse for a short time", "Young mouse connected to old mouse ", "Old mouse connected to old mouse for a short time", "Old mouse connected to Young mouse for short time", "Old mouse connected to young mouse"), labels = c("YY", "YO", "OO", "OY","OY"))) %>% mutate(group = paste(treatment, gsub("[0-9]\\.", "",ExperimentDetailed), sep = ", ")) %>% mutate(group = factor(group, levels = c("YY, Experiment2", "YO, Experiment2", "OO, Experiment2", "OY, Experiment2", "YY, Connected3Months", "YY, Detached3Months", "OO, Connected3Months", "OO, Detached3Months", "OY, Connected3Months","OY, Detached3Months", "YO, Connected3Months", "YO, Detached3Months", "YY, Connected5Weeks", "OY, Connected5Weeks", "OO, Connected5Weeks"))) %>% mutate(effectOnYoung = ifelse(treatment=="YY",0, ifelse(treatment=="YO",1, NA)))%>% mutate(effectOnOld = ifelse(treatment=="OO",0, ifelse(treatment=="OY",1, NA))) # parabiosisSamplesYoungs <- parabiosisSamples %>% filter(!is.na(effectOnYoung)&Experiment!="Experiment5")%>% group_split(Experiment) names(parabiosisSamplesYoungs) <- sapply(parabiosisSamplesYoungs, function(x){paste("N74.", "YOvsYY, ", gsub("[0-9]\\.", "",x$ExperimentDetailed[1]),"_", x$Tissue[1] ,sep = "")}) parabiosisSamplesYoungs <- plyr::llply(parabiosisSamplesYoungs, function(x){x <- x %>% dplyr::select(Basename, effectOnYoung)}) # parabiosisSamplesOlds <- parabiosisSamples %>% filter(!is.na(effectOnOld))%>% group_split(Experiment) names(parabiosisSamplesOlds) <- sapply(parabiosisSamplesOlds, function(x){paste("N74.","OYvsOO, ", gsub("[0-9]\\.", "",x$ExperimentDetailed[1]),"_", x$Tissue[1] ,sep = "")}) parabiosisSamplesOlds <- plyr::llply(parabiosisSamplesOlds, function(x){x <- x %>% dplyr::select(Basename, effectOnOld)}) # Rejuvenation # N55data <- loadRData("~/Steve Horvath Lab Dropbox/Amin Haghani/Individual projects/N55.2020-9012MouseJuanCarlosBelmontePradeepReddy/NormalizedData/all_probes_sesame_normalized.Rdata") %>% tibble::column_to_rownames("CGid") # # N55data <- N55data[rownames(KMEs),] # #identical(rownames(N55data), rownames(KMEs)) # # N55data <- t(N55data) # N55MEs = moduleEigengenes(N55data, colors = mergedColors) # N55MEs = N55MEs$eigengenes # # MEsIntervention <- rbind(MEsIntervention, N55MEs) rejuvenation4F10mSamples <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40/N55.2020-9012.2020-9086.2020-9176MouseJuanCarlosBelmontePradeepReddy//SampleSheetAgeN55final.csv") %>% filter(CanBeUsedForAgingStudies=="yes"&ConfidenceInAgeEstimate>50) %>%dplyr::select(Basename, Tissue, Female, Experiment, Age, Condition) %>% mutate(groups = ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Control"&Age<1.5, "4F-young", ifelse(Experiment=="WeakRejuvenation.LAKI.Progeria.4F.treatedFor8.weeks"&Condition=="Treated (+Dox)", "LAKI+Dox", ifelse(Experiment=="WeakRejuvenation.LAKI.Progeria.4F.treatedFor8.weeks"&Condition=="Control", "LAKI", ifelse(Experiment=="StrongRejuvenation.4F.treated7months"&Condition=="Control", "B6+Dox", ifelse(Experiment=="StrongRejuvenation.4F.treated7months"&Condition=="Treated (+Dox)", "4F+7mDox", ifelse(Experiment=="StrongRejuvenation.4F.mice.Treatedfor10.months"&Condition=="Control", "4F-old", ifelse(Experiment=="StrongRejuvenation.4F.mice.Treatedfor10.months"&Condition=="Treated (+Dox)", "4F+10mDox", ifelse(Experiment=="NoRejuvenationOnlyDox.B6mice"&Condition=="Control", "B6-old", ifelse(Experiment=="NoRejuvenationOnlyDox.B6mice"&Condition=="Treated (+Dox)", "B6+Dox", ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Control"&Age>1.5, "4F-old", ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Treated (+Dox)", "4F+1mDox", NA))))))))))))%>% mutate(groups = factor(groups, levels = c("B6-old", "4F-young", "4F-old", "B6+Dox", "4F+1mDox", "4F+7mDox", "4F+10mDox","LAKI","LAKI+Dox"))) %>% filter(groups%in%c("4F-old", "4F+1mDox", "4F+7mDox", "4F+10mDox")) %>% mutate(treatment = as.numeric(as.character(factor(groups, levels = c("4F-old", "4F+1mDox", "4F+7mDox", "4F+10mDox"), labels = c(0,1,7,10))))) %>% filter(treatment%in%c(0,10)) %>% mutate(treatment = ifelse(treatment==10, 1, treatment)) %>% group_split(Tissue) names(rejuvenation4F10mSamples) <- sapply(rejuvenation4F10mSamples, function(x){paste("rejuvenation4F10m", x$Tissue[1], sep = "_")}) rejuvenation4F10mSamples <- plyr::llply(rejuvenation4F10mSamples, function(x){x <- x %>% dplyr::select(Basename, treatment)}) # rejuvenation4F7mSamples <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40/N55.2020-9012.2020-9086.2020-9176MouseJuanCarlosBelmontePradeepReddy//SampleSheetAgeN55final.csv") %>% filter(CanBeUsedForAgingStudies=="yes"&ConfidenceInAgeEstimate>50) %>%dplyr::select(Basename, Tissue, Female, Experiment, Age, Condition) %>% mutate(groups = ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Control"&Age<1.5, "4F-young", ifelse(Experiment=="WeakRejuvenation.LAKI.Progeria.4F.treatedFor8.weeks"&Condition=="Treated (+Dox)", "LAKI+Dox", ifelse(Experiment=="WeakRejuvenation.LAKI.Progeria.4F.treatedFor8.weeks"&Condition=="Control", "LAKI", ifelse(Experiment=="StrongRejuvenation.4F.treated7months"&Condition=="Control", "B6+Dox", ifelse(Experiment=="StrongRejuvenation.4F.treated7months"&Condition=="Treated (+Dox)", "4F+7mDox", ifelse(Experiment=="StrongRejuvenation.4F.mice.Treatedfor10.months"&Condition=="Control", "4F-old", ifelse(Experiment=="StrongRejuvenation.4F.mice.Treatedfor10.months"&Condition=="Treated (+Dox)", "4F+10mDox", ifelse(Experiment=="NoRejuvenationOnlyDox.B6mice"&Condition=="Control", "B6-old", ifelse(Experiment=="NoRejuvenationOnlyDox.B6mice"&Condition=="Treated (+Dox)", "B6+Dox", ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Control"&Age>1.5, "4F-old", ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Treated (+Dox)", "4F+1mDox", NA))))))))))))%>% mutate(groups = factor(groups, levels = c("B6-old", "4F-young", "4F-old", "B6+Dox", "4F+1mDox", "4F+7mDox", "4F+10mDox","LAKI","LAKI+Dox"))) %>% filter(groups%in%c("4F-old", "4F+1mDox", "4F+7mDox", "4F+10mDox")) %>% mutate(treatment = as.numeric(as.character(factor(groups, levels = c("4F-old", "4F+1mDox", "4F+7mDox", "4F+10mDox"), labels = c(0,1,7,10))))) %>% filter(treatment%in%c(0,7)) %>% mutate(treatment = ifelse(treatment==7, 1, treatment)) %>% group_split(Tissue) names(rejuvenation4F7mSamples) <- sapply(rejuvenation4F7mSamples, function(x){paste("rejuvenation4F7m", x$Tissue[1], sep = "_")}) rejuvenation4F7mSamples <- plyr::llply(rejuvenation4F7mSamples, function(x){x <- x %>% dplyr::select(Basename, treatment)}) # rejuvenationLAKIsamples <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40/N55.2020-9012.2020-9086.2020-9176MouseJuanCarlosBelmontePradeepReddy//SampleSheetAgeN55final.csv") %>% filter(CanBeUsedForAgingStudies=="yes"&ConfidenceInAgeEstimate>50) %>%dplyr::select(Basename, Tissue, Female, Experiment, Age, Condition) %>% mutate(groups = ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Control"&Age<1.5, "4F-young", ifelse(Experiment=="WeakRejuvenation.LAKI.Progeria.4F.treatedFor8.weeks"&Condition=="Treated (+Dox)", "LAKI+Dox", ifelse(Experiment=="WeakRejuvenation.LAKI.Progeria.4F.treatedFor8.weeks"&Condition=="Control", "LAKI", ifelse(Experiment=="StrongRejuvenation.4F.treated7months"&Condition=="Control", "B6+Dox", ifelse(Experiment=="StrongRejuvenation.4F.treated7months"&Condition=="Treated (+Dox)", "4F+7mDox", ifelse(Experiment=="StrongRejuvenation.4F.mice.Treatedfor10.months"&Condition=="Control", "4F-old", ifelse(Experiment=="StrongRejuvenation.4F.mice.Treatedfor10.months"&Condition=="Treated (+Dox)", "4F+10mDox", ifelse(Experiment=="NoRejuvenationOnlyDox.B6mice"&Condition=="Control", "B6-old", ifelse(Experiment=="NoRejuvenationOnlyDox.B6mice"&Condition=="Treated (+Dox)", "B6+Dox", ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Control"&Age>1.5, "4F-old", ifelse(Experiment=="WeakRejuvenation.4Fmice.treated.1.month"&Condition=="Treated (+Dox)", "4F+1mDox", NA))))))))))))%>% mutate(groups = factor(groups, levels = c("B6-old", "4F-young", "4F-old", "B6+Dox", "4F+1mDox", "4F+7mDox", "4F+10mDox","LAKI","LAKI+Dox"))) %>% filter(groups%in%c("LAKI","LAKI+Dox")) %>% mutate(treatment = as.numeric(as.character(factor(groups, levels = c("LAKI","LAKI+Dox"), labels = c(0,1))))) %>% group_split(Tissue) names(rejuvenationLAKIsamples) <- sapply(rejuvenationLAKIsamples, function(x){paste("rejuvenationLAKI", x$Tissue[1], sep = "_")}) rejuvenationLAKIsamples <- plyr::llply(rejuvenationLAKIsamples, function(x){x <- x %>% dplyr::select(Basename, treatment)}) # CR treatment in experiment 1 does not seem consistant, lets ignore it CRsamples <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40/N25.2019-9071MouseLiverDietTakahashi/SampleSheetAgeN25final.csv") %>% filter(CanBeUsedForAgingStudies=="yes") %>% dplyr::select(Basename, Tissue, CalorieRestriction) %>% dplyr::rename(CR = CalorieRestriction) %>% mutate(CR = ifelse(CR=="no", "Control", "CR")) %>% dplyr::select(Basename, CR)%>% mutate(CR = factor(CR, levels=c("Control", "CR"), labels = c(0,1))) # highFatSamples <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40/N08.2018-9250and2019-9077MouseLiverBloodMozhui/SampleSheetAgeN08final.csv")%>% filter(CanBeUsedForAgingStudies=="yes") %>% filter(Tissue=="Liver") %>% dplyr::select(Basename, Tissue, OriginalDietCode) %>% mutate(OriginalDietCode = ifelse(OriginalDietCode=="Chow", "Control", ifelse(OriginalDietCode=="ADF", "Intermittant_Fasting", "High_Fat_Diet")))%>% dplyr::rename(highFat = OriginalDietCode) %>% filter(highFat%in%c("Control", "High_Fat_Diet"))%>% dplyr::select(Basename, highFat) %>% mutate(highFat = factor(highFat, levels=c("Control", "High_Fat_Diet"), labels = c(0,1))) KO_samples <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40//N02.2018-9290.2019-9235.2020-9012.2020-9074.2020-9065MouseWang//SampleSheetAgeN02final.csv") GHRKO_Liver <- KO_samples %>% filter(Genotype %in% c("GHRKO", "WT")& Age>0.5&Age<1&Tissue=="Liver") %>% mutate(GHRKO = ifelse(Genotype=="GHRKO", 1, 0)) %>% dplyr::select(Basename, GHRKO) %>% mutate(GHRKO = factor(GHRKO, levels=c(0,1))) GHRKO_Cortex <- KO_samples %>% filter(Genotype %in% c("GHRKO", "WT")& Age>0.5&Age<1&Tissue=="Cortex") %>% mutate(GHRKO = ifelse(Genotype=="GHRKO", 1, 0)) %>% dplyr::select(Basename, GHRKO) %>% mutate(GHRKO = factor(GHRKO, levels=c(0,1))) GHRKO_Hippocampus <- KO_samples %>% filter(Genotype %in% c("GHRKO", "WT")& Age>0.5&Age<1&Tissue=="Hippocampus") %>% mutate(GHRKO = ifelse(Genotype=="GHRKO", 1, 0)) %>% dplyr::select(Basename, GHRKO) %>% mutate(GHRKO = factor(GHRKO, levels=c(0,1))) GHRKO_Kidney <- KO_samples %>% filter(Genotype %in% c("GHRKO", "WT")& Age>0.5&Age<1&Tissue=="Kidney") %>% mutate(GHRKO = ifelse(Genotype=="GHRKO", 1, 0)) %>% dplyr::select(Basename, GHRKO) %>% mutate(GHRKO = factor(GHRKO, levels=c(0,1))) #interSamples <- list(GHRKO= GHRKO_samples, CaloricRestriction=CRsamples, highFatDiet=highFatSamples) interSamples <- list(GHRKO_Hippocampus=GHRKO_Hippocampus,GHRKO_Cortex=GHRKO_Cortex,GHRKO_Kidney=GHRKO_Kidney ,GHRKO_Liver= GHRKO_Liver, CaloricRestriction=CRsamples, highFatDiet=highFatSamples) interSamples <- append(interSamples, rejuvenation4F10mSamples) interSamples <- append(interSamples, rejuvenation4F7mSamples) interSamples <- append(interSamples, rejuvenationLAKIsamples) interSamples <- append(interSamples, parabiosisSamplesYoungs) interSamples <- append(interSamples, parabiosisSamplesOlds) moduleTraitCor <- lapply(interSamples, function(y){ datTraits <- y %>% filter(Basename %in% rownames(MEsIntervention)) %>% tibble::column_to_rownames("Basename") ME_lines <- MEsIntervention[rownames(datTraits), ] moduleTraitCor=cor(ME_lines,datTraits,use="p") moduleTraitPvalue=corPvalueStudent(moduleTraitCor,nrow(datTraits)) cor <- as.data.frame(moduleTraitCor) #if(rowSums(!is.na(moduleTraitCor))==0){moduleTraitCor=NULL} return(cor) }) names(moduleTraitCor) <- names(interSamples) moduleTraitCor <- as.data.frame(bind_cols(moduleTraitCor)) colnames(moduleTraitCor) <-names(interSamples) ## moduleTraitPvalue <- lapply(interSamples, function(y){ datTraits <- y %>% filter(Basename %in% rownames(MEsIntervention)) %>% tibble::column_to_rownames("Basename") ME_lines <- MEsIntervention[rownames(datTraits), ] moduleTraitCor=cor(ME_lines,datTraits,use="p") moduleTraitPvalue=corPvalueStudent(moduleTraitCor,nrow(datTraits)) %>% as.data.frame() #if(rowSums(!is.na(moduleTraitPvalue))==0){moduleTraitPvalue=NULL} return(moduleTraitPvalue) }) names(moduleTraitPvalue) <- names(interSamples) moduleTraitPvalue <- as.data.frame(bind_cols(moduleTraitPvalue)) colnames(moduleTraitPvalue) <-names(interSamples) summaryResults <- as.data.frame(moduleTraitPvalue) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults) %>% mutate(text=paste(round(r,2),"\n", formatC(pval, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% filter(pval<0.059) %>% dplyr::select(-pval)%>% spread(key = "var", value = "r") %>% filter(rowSums(!is.na(.))>4) %>%filter((CaloricRestriction>0&highFatDiet<0&GHRKO_Liver>0)|(CaloricRestriction<0&highFatDiet>0&GHRKO_Liver<0)) %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var"))%>% mutate(dir = as.factor(sign(r)))%>% mutate(ID = paste(modules, var, sep = "_")) %>% dplyr::select(-dir) %>% mutate(pval =ifelse(is.na(r), NA, pval)) summaryResults4 <- summaryResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", "*")) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryResults4 <- summaryResults4[rownames(moduleTraitCor), colnames(moduleTraitCor)] identical(rownames(summaryResults4), rownames(moduleTraitCor)) interventionModules <- rownames(summaryResults4)[rowSums(summaryResults4!="")>0] interventionModules <- interventionModules[c(3, 5, 6, 1, 4, 2)] #interventionModules <- interventionModules[c(2, 5, 1, 3, 4)] moduleTraitCor <- t(moduleTraitCor) summaryResults4 <- t(summaryResults4) my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231] library(ComplexHeatmap) cols <- gsub("ME", "", colnames(moduleTraitCor)) names(cols) <- colnames(moduleTraitCor) association <- KMEs %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) %>% mutate(modules = paste("ME", moduleColors, sep = "")) %>% right_join(data.frame(modules = colnames(moduleTraitCor))) association <- association[order(match(association$modules, colnames(moduleTraitCor))),] # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(moduleTraitCor), col = list(Modules = cols), show_legend = FALSE) colnames(moduleTraitCor) <- gsub("ME", "", colnames(moduleTraitCor)) Heatmap <- Heatmap(moduleTraitCor, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = F, row_dend_reorder=F, column_title = "Modules with dynamic responses to lifespan interventions", cell_fun = function(j, i, x, y, w, h, col) { grid.text(summaryResults4[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=25))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15)) jpeg(file = "Heatmap modules Interventions.jpeg", width = 18, height = 12, units = "in", res = 300) draw(Heatmap, heatmap_legend_side = "left",annotation_legend_side="left", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ```{r heatmap intervention module for the paper} #interSamples <- list(GHRKO= GHRKO_samples, CaloricRestriction=CRsamples, highFatDiet=highFatSamples) interSamples <- list(GHRKO= GHRKO_Liver, CaloricRestriction=CRsamples, highFatDiet=highFatSamples) moduleTraitCor <- lapply(interSamples, function(y){ datTraits <- y %>% filter(Basename %in% rownames(MEsIntervention)) %>% tibble::column_to_rownames("Basename") ME_lines <- MEsIntervention[rownames(datTraits), ] moduleTraitCor=cor(ME_lines,datTraits,use="p") moduleTraitPvalue=corPvalueStudent(moduleTraitCor,nrow(datTraits)) if(rowSums(!is.na(moduleTraitCor))==0){moduleTraitCor=NULL} return(moduleTraitCor) }) names(moduleTraitCor) <- names(interSamples) moduleTraitCor <- bind_cols(plyr::llply(moduleTraitCor, function(x){ x <- bind_cols(x)})) colnames(moduleTraitCor) <- names(interSamples) rownames(moduleTraitCor) <- colnames(MEsIntervention) ## moduleTraitPvalue <- lapply(interSamples, function(y){ datTraits <- y %>% filter(Basename %in% rownames(MEsIntervention)) %>% tibble::column_to_rownames("Basename") ME_lines <- MEsIntervention[rownames(datTraits), ] moduleTraitCor=cor(ME_lines,datTraits,use="p") moduleTraitPvalue=corPvalueStudent(moduleTraitCor,nrow(datTraits)) if(rowSums(!is.na(moduleTraitPvalue))==0){moduleTraitPvalue=NULL} return(moduleTraitPvalue) }) names(moduleTraitPvalue) <- names(interSamples) moduleTraitPvalue <- bind_cols(plyr::llply(moduleTraitPvalue, function(x){ x <- bind_cols(x)})) colnames(moduleTraitPvalue) <- names(interSamples) rownames(moduleTraitPvalue) <- colnames(MEsIntervention) summaryResults <- as.data.frame(moduleTraitPvalue) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "pval", -modules) summaryResults2 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% bind_cols(summaryResults[, c("pval"), drop=FALSE]) %>% mutate(text=paste(round(r,2),"\n", formatC(pval, format = "e", digits = 0), sep = "")) summaryResults3 <- as.data.frame(moduleTraitCor) %>% tibble::rownames_to_column(var="modules") %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var")) %>% filter(pval<0.059) %>% dplyr::select(-pval)%>% spread(key = "var", value = "r") %>% filter(rowSums(!is.na(.))>2) %>%filter((CaloricRestriction>0&highFatDiet<0&GHRKO>0)|(CaloricRestriction<0&highFatDiet>0&GHRKO<0)) %>% gather(key = "var", value = "r", -modules) %>% left_join(summaryResults, by=c("modules", "var"))%>% mutate(dir = as.factor(sign(r)))%>% mutate(ID = paste(modules, var, sep = "_")) %>% dplyr::select(-dir) %>% mutate(pval =ifelse(is.na(r), NA, pval)) summaryResults4 <- summaryResults2 %>% dplyr::select(modules, var, text) %>% mutate(ID = paste(modules, var, sep = "_")) %>% left_join(summaryResults3[,c("ID", "pval")], by="ID") %>% mutate(text = ifelse(is.na(pval), "", "*")) %>% dplyr::select(-ID, -pval) %>% spread(key = "var", value = "text") %>% tibble::column_to_rownames(var = "modules") summaryResults4 <- summaryResults4[rownames(moduleTraitCor), colnames(moduleTraitCor)] identical(rownames(summaryResults4), rownames(moduleTraitCor)) moduleTraitCor <- t(moduleTraitCor) summaryResults4 <- t(summaryResults4) i <- sapply(interventionModules, function(x){grep(x, colnames(moduleTraitCor))}) j <- c(1:55)[-i] i <- c(i, j) moduleTraitCor <-moduleTraitCor[,i] summaryResults4 <- summaryResults4[,i] my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231] library(ComplexHeatmap) cols <- gsub("ME", "", colnames(moduleTraitCor)) names(cols) <- colnames(moduleTraitCor) association <- KMEs %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) %>% mutate(modules = paste("ME", moduleColors, sep = "")) %>% right_join(data.frame(modules = colnames(moduleTraitCor))) association <- association[order(match(association$modules, colnames(moduleTraitCor))),] # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(moduleTraitCor), col = list(Modules = cols), show_legend = FALSE) colnames(moduleTraitCor) <- gsub("ME", "", colnames(moduleTraitCor)) Heatmap <- Heatmap(moduleTraitCor, col = my_palette, show_column_names = TRUE, name = "Correlation with module", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = F, row_dend_reorder=F, column_title = "Modules with dynamic responses to lifespan interventions", cell_fun = function(j, i, x, y, w, h, col) { grid.text(summaryResults4[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=25))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15)) jpeg(file = "Heatmap modules Interventions, for paper.jpeg", width = 18, height = 3, units = "in", res = 300) draw(Heatmap, heatmap_legend_side = "left",annotation_legend_side="left", padding = unit(c(2, 10, 10, 30), "mm")) dev.off() ``` ## Summary table ```{r summary Table} association <- KMEs %>% dplyr::select(modules) %>% group_by(modules) %>% count() %>% dplyr::rename(moduleColors = modules, Freq = n) %>% mutate(modules=paste("ME", moduleColors, sep="")) modules <- unique(c(tissueModules, orderModules$modules, as.character(ageModules), as.character(maxLifeModulesMarginal), as.character(maxLifeModules), as.character(weightModulesMarg), sexModule, interventionModules, dogModules)) modules <- gsub("ME", "", modules) unknown <- association$moduleColors[which(!association$moduleColors%in%modules)] net <- bind_rows(orderModules, data.frame(modules = as.character(tissueModules), var = "Tissue"), data.frame(modules = as.character(ageModules), var = "Age"), data.frame(modules = unique(c(as.character(maxLifeModulesMarginal), as.character(maxLifeModules))), var = "Max age"), data.frame(modules = unique(c(as.character(weightModulesMarg))), var = "Species weight"), data.frame(modules = as.character(sexModule), var = "Sex"), data.frame(modules = as.character(interventionModules), var = "InterventionMarker"), data.frame(modules = as.character(dogModules), var = "dogModules"), data.frame(modules = as.character(unknown), var = "unclear")) %>% dplyr::rename("source" = modules, destination=var)%>% mutate(destination = factor(destination, levels = c("Tissue", unique(orderModules$var), "Age", "Max age","Species weight","Sex","InterventionMarker","dogModules", "unclear")))%>% tibble::rowid_to_column("id") sum <- bind_rows(orderModules, data.frame(modules = as.character(tissueModules), var = "Tissue"), data.frame(modules = as.character(ageModules), var = "Age"), data.frame(modules = unique(c(as.character(maxLifeModulesMarginal), as.character(maxLifeModules))), var = "Max age"), data.frame(modules = unique(c(as.character(weightModulesMarg))), var = "Species weight"), data.frame(modules = as.character(sexModule), var = "Sex"), data.frame(modules = as.character(interventionModules), var = "InterventionMarker"), data.frame(modules = as.character(dogModules), var = "dogModules"), data.frame(modules = as.character(unknown), var = "unclear")) %>% dplyr::rename("source" = modules, destination=var)%>% mutate(destination = factor(destination, levels = c("Tissue", unique(orderModules$var), "Age", "Max age","Species weight","Sex","InterventionMarker","dogModules", "unclear")))%>% tibble::rowid_to_column("id") sum3 <- sum %>% add_count(source) %>% group_by(source) %>% summarize(group = paste(destination, collapse = " ; ")) %>% dplyr::rename(modules = source) %>% left_join(association) %>%left_join(nodes2[,c("group", "color")]) %>% distinct() %>% mutate(ord = ifelse(group =="unknown", "x", ifelse(group=="Marsupials", "y", group))) %>% arrange(-Freq, ord) %>% left_join(sum2) lev <- unique(sum3$group) sum3 <- sum3 %>% mutate(group= factor(group, levels = lev)) write.csv(sum3, "summary table.csv") # sum <- sum %>% mutate(source=gsub("ME", "", source)) %>% mutate(destination=as.character(destination)) %>% mutate(destination = ifelse(grepl(pattern = "[0-9]+", x = destination), "Order", destination))%>% group_by(source) %>% summarize(destination=paste(destination, collapse = "; ")) %>% right_join(association, by=c("source"="moduleColors")) %>%arrange(-Freq) ``` ```{r plot for GHRKO samples} # load required data and then run these codes GHRKO_samples <- KO_samples %>% filter(Genotype %in% c("GHRKO", "WT")& Age>0.5&Age<1) %>% mutate(a = ifelse(Genotype=="GHRKO", 1, 0)) %>% dplyr::select(Basename, Tissue, a) %>% setnames(new = c("Basename", "Tissue", "GHRKO")) %>%group_by(Tissue) %>% filter(sum(GHRKO)>0) %>% group_split(Tissue) names(GHRKO_samples) <- sapply(GHRKO_samples, function(x){x$Tissue[1]}) targetModules <- c("MEpurple", "MEsalmon4", "MEskyblue3", "MEivory","MEroyalblue", "MElavenderblush3", "MEmagenta", "MEblack", "MEmidnightblue", "MEtan") plots <- lapply(c(3,4), function(x){ samp <- GHRKO_samples[[x]] tit <- paste("Growth Hormone Receptor KO (GHRKO) mouse: ",names(GHRKO_samples)[[x]], sep = "") gdatGHRKO <- MEs %>% tibble::rownames_to_column(var = "Basename") %>% filter(Basename%in%samp$Basename)%>% right_join(samp, by="Basename") %>% dplyr::select(targetModules, GHRKO) %>% gather(targetModules, key = "modules", value = "eigengene") %>% mutate(modules = factor(modules, levels = targetModules)) %>% mutate(GHRKO = factor(GHRKO, levels = c(0,1), labels = c("Wt", "GHRKO"))) %>% mutate(eigengene=scales::rescale(eigengene, to = c(1,2))) sum <- gdatGHRKO %>% filter(GHRKO=="Wt")%>%ungroup()%>% group_by(modules) %>% summarize(mean=abs(mean(eigengene))) # gdatGHRKO <- gdatGHRKO %>% left_join(sum) %>% mutate(eigengene=eigengene/mean) p <- gdatGHRKO %>% mutate(modules = factor(modules, levels = targetModules, labels = gsub("ME", "", targetModules)))%>% ggplot(aes(x=GHRKO, y=eigengene, fill=GHRKO))+geom_boxplot(notch = T)+ theme_classic(base_size = 15)+ylab("Normalized Eigengene")+ scale_fill_manual(values = c("maroon", "darkblue"))+ facet_wrap(.~modules,nrow = 1, scales = "free_y")+ ggpubr::stat_compare_means(comparisons = list(c("Wt", "GHRKO")), method = "wilcox.test", size=5, label.y.npc=0.9, color="red")+ggtitle(paste(tit))+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), strip.text = element_text(size=15), legend.position="none", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold"), plot.margin = unit(c(0,0,2.5,0),"cm"))+ scale_y_continuous(expand = expansion(mult = c(0, .2))) sum <- gdatGHRKO%>% mutate(modules = factor(modules, levels = targetModules, labels = gsub("ME", "", targetModules))) %>% group_split(modules) sum <- bind_rows(lapply(sum, function(y){ t.test.value <- wilcox.test(eigengene ~ GHRKO, data = y)$p.value a <- data.frame(modules=y$modules[1], pval=t.test.value, max=max(y$eigengene)) })) %>% mutate(GHRKO="GHRKO") p2 <- gdatGHRKO %>% mutate(modules = factor(modules, levels = targetModules, labels = gsub("ME", "", targetModules)))%>% ggplot(aes(x=modules, y=eigengene, fill=GHRKO))+geom_boxplot(notch = T)+ theme_classic(base_size = 15)+ylab("Eigengene / Wt")+ scale_fill_manual(values = c("maroon", "darkblue"))+ #facet_wrap(.~modules,nrow = 1, scales = "free_y")+ ggpubr::stat_compare_means(comparisons = list(c("Wt", "GHRKO")), method = "wilcox.test", size=5, label.y.npc=0.9, color="red")+ggtitle(paste(tit))+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), strip.text = element_text(size=15), legend.position="top", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold", size=15), plot.margin = unit(c(0,0,2.5,0),"cm"))+ scale_y_continuous(expand = expansion(mult = c(0, .2)))+geom_text(data = sum, aes(x=modules,label=formatC(pval,digits = 2, format = "e"), y=max), color="red")+geom_vline(xintercept = 6.5, color="red", linetype="dashed") #if(x==1){p = p+theme(axis.title.x = element_blank(), axis.text.x = element_blank(), plot.margin = unit(c(0,0,2.5,0),"cm"))} return(p2) }) pdf(file = "GHRKO liver.pdf", width = 8, height = 5) plots[[2]]+theme(plot.margin = unit(c(0,0,0,0),"cm")) dev.off() p <- ggpubr::ggarrange(plotlist = plots, nrow = 2, common.legend = T, legend = "top")+theme(legend.key.size = unit(4,"cm")) pdf(file = "GHRKO modules.pdf", width = 15, height = 8) p dev.off() ``` ## Caloric restriction ```{r plot for CR, Rapamycin samples} # CR treatment in experiment 1 does not seem consistant, lets ignore it CRsamples1 <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40//.../SampleSheetAgeN25final.csv") %>% filter(CanBeUsedForAgingStudies=="yes") %>% dplyr::select(Basename, Tissue, CalorieRestriction) %>% dplyr::rename(Intervention = CalorieRestriction) %>% mutate(Intervention = ifelse(Intervention=="no", "Control", "CR")) # CRsamples2 <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40//.../SampleSheetAgeN39final.csv") %>% filter(CanBeUsedForAgingStudies=="yes") %>% dplyr::select(Basename, Tissue,Treatment12m) %>% filter(Treatment12m%in% c("Control", "CR", "Rapamycin14")) %>% dplyr::rename(Intervention = Treatment12m) # CRsamples3 <- read.csv("~/Steve Horvath Lab Dropbox/N.Mammal40//.../SampleSheetAgeN08final.csv")%>% filter(CanBeUsedForAgingStudies=="yes") %>% filter(Tissue=="Liver") %>% dplyr::select(Basename, Tissue, OriginalDietCode) %>% mutate(OriginalDietCode = ifelse(OriginalDietCode=="Chow", "Control", ifelse(OriginalDietCode=="ADF", "Intermittant_Fasting", "High_Fat_Diet")))%>% dplyr::rename(Intervention = OriginalDietCode) %>% filter(Intervention%in%c("Control", "High_Fat_Diet")) CRsamples <- rbindlist(list(CRsamples1,CRsamples3), idcol = "experiment") %>% mutate(Intervention = factor(Intervention, levels=c("Control","High_Fat_Diet", "CR", "Intermittant_Fasting", "Rapamycin14"), labels=c("Control","High_Fat_Diet", "CR", "Intermittant_Fasting", "Rapamycin"))) targetModules <- c("MEpurple", "MEsalmon4", "MEskyblue3", "MEivory","MEroyalblue", "MElavenderblush3", "MEmagenta", "MEblack", "MEmidnightblue", "MEtan") gdatCR <- MEs %>% tibble::rownames_to_column(var = "Basename") %>% filter(Basename%in%CRsamples$Basename)%>% right_join(CRsamples, by="Basename") %>% dplyr::select(targetModules, Intervention, experiment) %>% gather(targetModules, key = "modules", value = "eigengene") %>% mutate(modules = factor(modules, levels = targetModules, labels = gsub("ME", "", targetModules))) %>% mutate(eigengene=scales::rescale(eigengene, to = c(1,2))) sum <- gdatCR %>% filter(Intervention=="Control")%>%ungroup()%>% group_by(modules,experiment) %>% summarize(mean=abs(mean(eigengene))) # gdatCR <- gdatCR %>% left_join(sum) %>% mutate(eigengene=eigengene/mean) p1 <- gdatCR%>% filter(experiment==1) %>% ggplot(aes(x=Intervention, y=eigengene, fill=Intervention))+geom_boxplot(notch = T)+ theme_classic(base_size = 15)+ylab("Eigengene")+ scale_fill_manual(values = c("maroon", "darkgreen"))+ facet_wrap(.~modules,nrow = 1, scales = "free")+ ggpubr::stat_compare_means(comparisons = list(c("Control", "CR")), method = "wilcox.test", size=5, label.y.npc=0.9, color="red")+ggtitle("Experiment 1, Caloric restriction, liver")+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), strip.text = element_text(size=15), legend.position="none", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold"), plot.margin = unit(c(0,0,0,0),"cm"))+ scale_y_continuous(expand = expansion(mult = c(0, .2))) sum <- gdatCR %>% group_split(modules) sum <- bind_rows(lapply(sum, function(y){ y <- y%>% filter(experiment==1) %>% ungroup() t.test.value <- wilcox.test(eigengene ~ Intervention, data = y)$p.value a <- data.frame(modules=y$modules[1], pval=t.test.value, max=max(y$eigengene)) })) %>% mutate(Intervention="CR") p2 <- gdatCR%>% filter(experiment==1) %>% ggplot(aes(x=modules, y=eigengene, fill=Intervention))+geom_boxplot(notch = T)+ theme_classic(base_size = 15)+ylab("Eigengene / Control")+ scale_fill_manual(values = c("maroon", "darkblue"))+ #facet_wrap(.~modules,nrow = 1, scales = "free_y")+ ggpubr::stat_compare_means(comparisons = list(c("Control", "CR")), method = "wilcox.test", size=5, label.y.npc=0.9, color="red")+ggtitle(paste("Caloric restriction, liver"))+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), strip.text = element_text(size=15), legend.position="top", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold", size=15), plot.margin = unit(c(0,0,2.5,0),"cm"))+ scale_y_continuous(expand = expansion(mult = c(0, .2)))+geom_text(data = sum, aes(x=modules,label=formatC(pval,digits = 2, format = "e"), y=max), color="red")+geom_vline(xintercept = 6.5, color="red", linetype="dashed") pdf(file = "CR experiment.pdf", width = 8, height = 5) p2+theme(plot.margin = unit(c(0,0,0,0),"cm")) dev.off() # p2 <- gdatCR %>% filter(experiment==2) %>% ggplot(aes(x=Intervention, y=eigengene, fill=Intervention))+geom_boxplot(notch = T)+ # theme_classic(base_size = 15)+ylab("Eigengene")+ # scale_fill_manual(values = c("maroon", "darkgreen", "tomato"))+ # facet_wrap(.~modules,nrow = 1, scales = "free")+ # ggpubr::stat_compare_means(label = "p.signif", method = "wilcox.test", # ref.group = "Control", size=6, label.y.npc=0.9, color="red")+ggtitle("Experiment 2, Caloric restriction, Rapamycin, liver")+ # theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), # strip.text = element_text(size=15), legend.position="none", # legend.key.size = unit(0.3, "cm"), # plot.title = element_text(hjust = 0.5, face="bold"), plot.margin = unit(c(0,0,1.5,0),"cm")) p3 <- gdatCR %>% filter(experiment==2) %>% droplevels() %>% ggplot(aes(x=Intervention, y=eigengene, fill=Intervention))+geom_boxplot(notch = T)+ theme_classic(base_size = 15)+ylab("Eigengene")+ scale_fill_manual(values = c("maroon", "orange"))+ facet_wrap(.~modules,nrow = 1, scales = "free")+ ggpubr::stat_compare_means(comparisons = list(c("Control", "High_Fat_Diet")), method = "wilcox.test", size=5, label.y.npc=0.9, color="red")+ggtitle("Experiment 2, High fat diet, liver")+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), strip.text = element_text(size=15), legend.position="none", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold"), plot.margin = unit(c(0,0,0,0),"cm"))+ scale_y_continuous(expand = expansion(mult = c(0, .2))) sum <- gdatCR %>% group_split(modules) sum <- bind_rows(lapply(sum, function(y){ y <- y%>% filter(experiment==2)%>% droplevels() %>% ungroup() t.test.value <- wilcox.test(eigengene ~ Intervention, data = y)$p.value a <- data.frame(modules=y$modules[1], pval=t.test.value, max=max(y$eigengene)) })) %>% mutate(Intervention="High_Fat_Diet") p2 <- gdatCR%>% filter(experiment==2) %>% droplevels() %>% mutate(Intervention=factor(Intervention, levels=c("Control", "High_Fat_Diet"))) %>% ggplot(aes(x=modules, y=eigengene, fill=Intervention))+geom_boxplot(notch = T)+ theme_classic(base_size = 15)+ylab("Eigengene / Control")+ scale_fill_manual(values = c("maroon", "orange"))+ #facet_wrap(.~modules,nrow = 1, scales = "free_y")+ ggpubr::stat_compare_means(comparisons = list(c("Control", "High_Fat_Diet")), method = "wilcox.test", size=5, label.y.npc=0.9, color="red")+ggtitle(paste("High fat diet, liver"))+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), strip.text = element_text(size=15), legend.position="top", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold", size=15), plot.margin = unit(c(0,0,2.5,0),"cm"))+ scale_y_continuous(expand = expansion(mult = c(0, .2)))+geom_text(data = sum, aes(x=modules,label=formatC(pval,digits = 2, format = "e"), y=max), color="red")+geom_vline(xintercept = 6.5, color="red", linetype="dashed") pdf(file = "high fat diet.pdf", width = 8, height = 5) p2+theme(plot.margin = unit(c(0,0,0,0),"cm")) dev.off() ``` ## GREAT anlaysis all ```{r GREAT analysis} library("rGREAT") human = readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V4.RDS") background1 <- human %>% dplyr::select(seqnames, CGstart, CGend, CGid) %>% filter(!is.na(CGstart)) %>% dplyr::rename(CHR = seqnames) %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes")])%>% setnames(new = c("chr", "start", "end","CGid")) background2 <- human %>% dplyr::select(seqnames, CGstart, CGend, CGid) %>% filter(!is.na(CGstart)) %>% dplyr::rename(CHR = seqnames) %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianAndMarsupialCor0.8=="yes")])%>% setnames(new = c("chr", "start", "end","CGid")) net1_top500 <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(500, abs(KME))) }) net1_top500 <- bind_rows(net1_top500) input <- net1_top500 %>% dplyr::select(-KME) %>% left_join(background1) %>% filter(!is.na(start)) %>% mutate(modules = paste("ME", modules, sep = "")) %>% left_join(sum2[,c("modules", "group")]) %>% group_split(modules) names(input) <- sapply(input, function(x){x$modules[1]}) input <- plyr::llply(input, function(x){ x <- x %>% dplyr::select(-modules) %>% group_split(group) names(x) <- sapply(x, function(y){y$group[1]}) x <- lapply(x, function(y){y = y %>% dplyr::select(-group) %>% setnames(new = c("CGid", "chr", "start", "end")) %>% relocate(CGid, .after=end) }) return(x) }) results <- plyr::llply(input, function(y){ result <- plyr::llply(y, function(x){ job <- submitGreatJob(x, bg = background1, species = "hg19", includeCuratedRegDoms = TRUE, rule = c("basalPlusExt"), adv_upstream = 5.0, adv_downstream = 1.0, adv_span = 50, request_interval = 0, version="3.0.0", max_tries = 10) ontology.all=availableOntologies(job) output.all <- plyr::llply(ontology.all, function(j){ cat(paste(j,"\n", sep = " ")) out0.list = tryCatch(getEnrichmentTables(job, download_by="tsv",ontology=j),error=function(e){NULL}) if(!is.null(out0.list)){ out0.list <- out0.list[[1]] %>% dplyr::select(-FgRegionNames, -BgRegionNames, -BgGeneNames) %>% filter(HyperP<1e-3) } else { out0.list=NULL} }) output.all <- plyr::compact(output.all) %>% rbindlist(., use.names = FALSE, fill = FALSE) }) result2 <- result %>% rbindlist(idcol = "group") }) results.combined <- results %>% rbindlist(idcol = "class") write.csv(results.combined[,-c("FgRegionNames", "BgRegionNames")], "enrichment results, Network1 all modules.csv") results.combined <- read.csv("enrichment results, Network1 all modules.csv") ``` ```{r GREAT analysis network 2 consensus, message=F} library("rGREAT") human = readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V4.RDS") background1 <- human %>% dplyr::select(seqnames, CGstart, CGend, CGid) %>% filter(!is.na(CGstart)) %>% dplyr::rename(CHR = seqnames) %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes")])%>% setnames(new = c("chr", "start", "end","CGid")) background2 <- human %>% dplyr::select(seqnames, CGstart, CGend, CGid) %>% filter(!is.na(CGstart)) %>% dplyr::rename(CHR = seqnames) %>% filter(CGid %in% mappability$CGid[which(mappability$mapToEutherianAndMarsupialCor0.8=="yes")])%>% setnames(new = c("chr", "start", "end","CGid")) net2_top500 <- lapply(1:(ncol(KMEs2)-1), function(x){ n <- colnames(KMEs2)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs2 %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(500, abs(KME))) }) net2_top500 <- bind_rows(net2_top500) input <- net2_top500 %>% dplyr::select(-KME) %>% left_join(background2) %>% mutate(modules = paste("ME", modules, sep = "")) %>% group_split(modules) names(input) <- sapply(input, function(x){x$modules[1]}) input <- plyr::llply(input, function(x){ x <- x %>% dplyr::select(-modules)%>% relocate(CGid, .after=end) return(x) }) results <- plyr::llply(input, function(y){ job <- submitGreatJob(y, bg = background2, species = "hg19", includeCuratedRegDoms = TRUE, rule = c("basalPlusExt"), adv_upstream = 5.0, adv_downstream = 1.0, adv_span = 50, request_interval = 0, version="3.0.0", max_tries = 10) ontology.all=availableOntologies(job) output.all <- plyr::llply(ontology.all, function(j){ cat(paste(j,"\n", sep = " ")) out0.list = tryCatch(getEnrichmentTables(job, download_by="tsv",ontology=j),error=function(e){NULL}) if(!is.null(out0.list)){ out0.list <- out0.list[[1]] %>% dplyr::select(-FgRegionNames, -BgRegionNames, -BgGeneNames) %>% filter(HyperP<1e-3) } else { out0.list=NULL} }) output.all <- plyr::compact(output.all) %>% rbindlist(., use.names = FALSE, fill = FALSE) }) results.combined <- results %>% rbindlist(idcol = "class") write.csv(results.combined, "enrichment results, Network2 all modules.csv") ## consensus targets <- unique(matchedCons$consMatched) names(targets) <- paste("ME",unique(matchedCons$consMatched),sep = "") net3_top <- plyr::llply(targets, function(x){ CGs <- matchedCons %>% filter(consMatched==x) inputs <- background1 %>% filter(CGid %in%CGs$CGid) }) results <- plyr::llply(net3_top, function(y){ job <- submitGreatJob(y, bg = background1, species = "hg19", includeCuratedRegDoms = TRUE, rule = c("basalPlusExt"), adv_upstream = 5.0, adv_downstream = 1.0, adv_span = 50, request_interval = 0, version="3.0.0", max_tries = 10) ontology.all=availableOntologies(job) output.all <- plyr::llply(ontology.all, function(j){ cat(paste(j,"\n", sep = " ")) out0.list = tryCatch(getEnrichmentTables(job, download_by="tsv",ontology=j),error=function(e){NULL}) if(!is.null(out0.list)){ out0.list <- out0.list[[1]] %>% dplyr::select(-FgRegionNames, -BgRegionNames, -BgGeneNames) %>% filter(HyperP<1e-3) } else { out0.list=NULL} }) output.all <- plyr::compact(output.all) %>% rbindlist(., use.names = FALSE, fill = FALSE) }) results.combined <- results %>% rbindlist(idcol = "class") write.csv(results.combined, "enrichment results, Network consensus all modules.csv") ``` ```{r} net1_topAll <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(30000, abs(KME))) }) net1_topAll <- bind_rows(net1_topAll) net1_topAll <- net1_topAll %>% left_join(geneMapSum)%>% left_join(geneMapSum2) #write.csv(net1_top500, "Net1KME.csv") ``` ## model preservation ```{r model preservation} mp_net1 <- readRDS("WGCNA results/No marsupials/model preservation no Marsupials.RDS") # Zsummary <- rbindlist(lapply(2:length(mp_net1$preservation$Z$ref.Primates), function(x){ dat <- mp_net1$preservation$Z$ref.Primates[[x]] a <- data.frame(t(dat$Zsummary.pres)) names(a) <- rownames(a) return(a) })) colnames(Zsummary) <- rownames(mp_net1$preservation$Z$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_net1$preservation$Z$ref.Primates)[-1]) Zsummary <- Zsummary %>% mutate(Order = cnames) %>% dplyr::select(Order,sum2$moduleColors) %>% gather(key = "modules", value = "Zsummary", -Order) # pval pval <- rbindlist(lapply(2:length(mp_net1$preservation$log.p$ref.Primates), function(x){ dat <- mp_net1$preservation$log.p$ref.Primates[[x]] a <- data.frame(t(-dat$log.psummary.pres)) names(a) <- rownames(a) return(a) })) colnames(pval) <- rownames(mp_net1$preservation$log.p$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_net1$preservation$log.p$ref.Primates)[-1]) pval <- pval %>% mutate(Order = cnames) %>% dplyr::select(Order, sum2$moduleColors) pval <- pval %>% gather(key = "modules", value = "pval", -Order) # medianRank medianRank <- rbindlist(lapply(2:length(mp_net1$preservation$observed$ref.Primates), function(x){ dat <- mp_net1$preservation$observed$ref.Primates[[x]] a <- data.frame(t(dat$medianRank.pres)) names(a) <- rownames(a) return(a) })) colnames(medianRank) <- rownames(mp_net1$preservation$observed$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_net1$preservation$log.p$ref.Primates)[-1]) medianRank <- medianRank %>% mutate(Order = cnames) %>% dplyr::select(Order, sum2$moduleColors)%>% gather(key = "modules", value = "medianRank", -Order) gDat <- Zsummary %>% left_join(pval) %>% left_join(medianRank) %>% mutate(modules = factor(modules, levels=sum2$moduleColors)) %>% mutate(Order = factor(Order, levels=levels(samples$Order))) p1 <- gDat %>% mutate(Zsummary=ifelse(pval<1.3|Zsummary<1.9, NA, Zsummary))%>% mutate(medianRank=ifelse(pval<1.3|Zsummary<1.9, NA, medianRank))%>%ggplot(aes(x=modules, y=Order, color=Zsummary, size=medianRank))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+scale_color_gradient(low = "blue", high = "red", limits=c(1.9,65))+ggtitle("Network 1 module preservation in different orders compared to primates")+scale_size(range = c(5,0.5), limits = c(1,60))+geom_rect(xmin = as.numeric(gDat$modules)-0.5, xmax =as.numeric(gDat$modules)+0.5, ymin = -0.5, ymax = Inf, aes(fill=modules), alpha =0.01, color=NA, show.legend = FALSE)+scale_fill_manual(values = levels(gDat$modules)) pdf("Network1 module preservation.pdf", width = 12, height = 4.5) p1 dev.off() ``` ```{r model preservation, network2} mp_net2 <- readRDS("WGCNA results/All, mappability filter/model preservation all samples.RDS") #matchedModules <- mergedData %>% dplyr::select(net2, net2Matched) %>% distinct() # Zsummary <- rbindlist(lapply(2:length(mp_net2$preservation$Z$ref.Primates), function(x){ dat <- mp_net2$preservation$Z$ref.Primates[[x]] a <- data.frame(t(dat$Zsummary.pres)) names(a) <- rownames(a) return(a) })) colnames(Zsummary) <- rownames(mp_net2$preservation$Z$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_net2$preservation$Z$ref.Primates)[-1]) Zsummary <- Zsummary %>% mutate(Order = cnames) %>% gather(key = "modules", value = "Zsummary", -Order) # pval pval <- rbindlist(lapply(2:length(mp_net2$preservation$log.p$ref.Primates), function(x){ dat <- mp_net2$preservation$log.p$ref.Primates[[x]] a <- data.frame(t(-dat$log.psummary.pres)) names(a) <- rownames(a) return(a) })) colnames(pval) <- rownames(mp_net2$preservation$log.p$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_net2$preservation$log.p$ref.Primates)[-1]) pval <- pval %>% mutate(Order = cnames) pval <- pval %>% gather(key = "modules", value = "pval", -Order) # medianRank medianRank <- rbindlist(lapply(2:length(mp_net2$preservation$observed$ref.Primates), function(x){ dat <- mp_net2$preservation$observed$ref.Primates[[x]] a <- data.frame(t(dat$medianRank.pres)) names(a) <- rownames(a) return(a) })) colnames(medianRank) <- rownames(mp_net2$preservation$observed$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_net2$preservation$log.p$ref.Primates)[-1]) medianRank <- medianRank %>% mutate(Order = cnames)%>% gather(key = "modules", value = "medianRank", -Order) net2Mod <- unique(Zsummary$modules)[match(levels(gDat$modules), unique(Zsummary$modules))] net2Mod <-c(net2Mod[which(!is.na(net2Mod))], unique(Zsummary$modules)[which(!unique(Zsummary$modules)%in%levels(gDat$modules))]) gDat2 <- Zsummary %>% left_join(pval) %>% left_join(medianRank) %>% mutate(Order = factor(Order, levels=levels(samples$Order)))%>% mutate(modules = factor(modules, levels =net2Mod)) %>% droplevels() p2 <- gDat2 %>% mutate(Zsummary=ifelse(pval<1.3|Zsummary<1.9, NA, Zsummary))%>% mutate(medianRank=ifelse(pval<1.3|Zsummary<1.9, NA, medianRank))%>%ggplot(aes(x=modules, y=Order, color=Zsummary, size=medianRank))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+scale_color_gradient(low = "blue", high = "red", limits=c(1.9,65))+ggtitle("Network 2 module preservation in different orders compared to primates")+scale_size(range = c(5,0.5), limits = c(1,60))+geom_rect(xmin = as.numeric(gDat2$modules)-0.5, xmax =as.numeric(gDat2$modules)+0.5, ymin = -0.5, ymax = Inf, aes(fill=modules), alpha =0.01, color=NA, show.legend = FALSE)+scale_fill_manual(values = net2Mod) pdf("Network2 module preservation.pdf", width = 10, height = 6) p2 dev.off() ``` ```{r model preservation, consensus} mp_cons <- readRDS("WGCNA results/Consensus WGCNA 57 species tissues/model preservation consesus Network.RDS") #matchedModules <- mergedData %>% dplyr::select(net2, net2Matched) %>% distinct() # Zsummary <- rbindlist(lapply(2:length(mp_cons$preservation$Z$ref.Primates), function(x){ dat <- mp_cons$preservation$Z$ref.Primates[[x]] a <- data.frame(t(dat$Zsummary.pres)) names(a) <- rownames(a) return(a) })) colnames(Zsummary) <- rownames(mp_cons$preservation$Z$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_cons$preservation$Z$ref.Primates)[-1]) Zsummary <- Zsummary %>% mutate(Order = cnames)%>% dplyr::select(Order,unique(matchedCons$consMatched))%>% gather(key = "modules", value = "Zsummary", -Order) # pval pval <- rbindlist(lapply(2:length(mp_cons$preservation$log.p$ref.Primates), function(x){ dat <- mp_cons$preservation$log.p$ref.Primates[[x]] a <- data.frame(t(-dat$log.psummary.pres)) names(a) <- rownames(a) return(a) })) colnames(pval) <- rownames(mp_cons$preservation$log.p$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_cons$preservation$log.p$ref.Primates)[-1]) pval <- pval %>% mutate(Order = cnames)%>% dplyr::select(Order,unique(matchedCons$consMatched)) pval <- pval %>% gather(key = "modules", value = "pval", -Order) # medianRank medianRank <- rbindlist(lapply(2:length(mp_cons$preservation$observed$ref.Primates), function(x){ dat <- mp_cons$preservation$observed$ref.Primates[[x]] a <- data.frame(t(dat$medianRank.pres)) names(a) <- rownames(a) return(a) })) colnames(medianRank) <- rownames(mp_cons$preservation$observed$ref.Primates$inColumnsAlsoPresentIn.Cetacea) cnames <- gsub("inColumnsAlsoPresentIn.", "",names(mp_cons$preservation$log.p$ref.Primates)[-1]) medianRank <- medianRank %>% mutate(Order = cnames)%>% dplyr::select(Order,unique(matchedCons$consMatched))%>% gather(key = "modules", value = "medianRank", -Order) netConsMod <- unique(Zsummary$modules)[match(levels(gDat$modules), unique(Zsummary$modules))] netConsMod <-c(netConsMod[which(!is.na(netConsMod))], unique(Zsummary$modules)[which(!unique(Zsummary$modules)%in%levels(gDat$modules))]) gDat3 <- Zsummary %>% left_join(pval) %>% left_join(medianRank) %>% mutate(Order = factor(Order, levels=levels(samples$Order)))%>% mutate(modules = factor(modules, levels =netConsMod)) %>% droplevels() p3 <- gDat3 %>% mutate(Zsummary=ifelse(pval<1.3|Zsummary<1.9, NA, Zsummary))%>% mutate(medianRank=ifelse(pval<1.3|Zsummary<1.9, NA, medianRank))%>%ggplot(aes(x=modules, y=Order, color=Zsummary, size=medianRank))+geom_point_rast()+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+scale_color_gradient(low = "blue", high = "red", limits=c(1.9,65))+ggtitle("Consensus Network module preservation in different orders compared to primates")+scale_size(range = c(5,0.5), limits = c(1,60))+geom_rect(xmin = as.numeric(gDat3$modules)-0.5, xmax =as.numeric(gDat3$modules)+0.5, ymin = -0.5, ymax = Inf, aes(fill=modules), alpha =0.01, color=NA, show.legend = FALSE)+scale_fill_manual(values = netConsMod) pdf("Network Consensus module preservation.pdf", width = 10, height = 6) p3 dev.off() p <- ggpubr::ggarrange(p1,p2,p3, ncol = 1, common.legend = TRUE, legend = "right") pdf("Network Consensus module preservation.pdf", width = 10, height = 12) p dev.off() ``` ## Create an parent GO heatmap for all modules ```{r} # library(rrvgo) # enrichment <- read.csv("enrichment results, Network1 all modules.csv") # GOterms <- enrichment %>% filter(Ontology%in% "GO Biological Process") # # scores <- setNames(-log10(GOterms$HyperP), GOterms$ID) # # simMatrix <- calculateSimMatrix(GOterms$ID, # orgdb="org.Hs.eg.db", # ont="BP", # method="Rel") # # reducedTerms <- reduceSimMatrix(simMatrix, # scores, # threshold=0.7, # orgdb="org.Hs.eg.db") # # # reducedTerms <- reducedTerms %>% dplyr::select(go, parentTerm) %>% dplyr::rename(ID=go) %>% right_join(GOterms) # # reducedTerms <- reducedTerms %>% filter(!is.na(parentTerm)) %>% group_by(class, group, parentTerm) %>% top_n(1, -HyperP) # # reducedTerms2 <- reducedTerms%>% group_by(class, group) %>% top_n(10, -HyperP) %>% dplyr::rename(pValue = HyperP, nCommonGenes = NumFgGenesHit) %>% mutate(blank = "") %>% mutate(group = gsub("ME", "", group)) %>% mutate(group= as.factor(group)) %>% mutate(class = gsub("ME", "", class))%>% mutate(class = factor(class)) # # # ## Lets make a heatmap # reducedTerms <- reducedTerms%>%dplyr::rename(pValue = HyperP, nCommonGenes = NumFgGenesHit) %>%mutate(pValue=-log10(pValue)) # saveRDS(reducedTerms, "reducedTerms.RDS") reducedTerms <- readRDS("reducedTerms.RDS") reducedTerms2 <- reducedTerms %>% filter(pValue>15)%>%group_by(class, parentTerm)%>% top_n(3, -pValue)%>%group_by(class)%>% top_n(3, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% spread(key = "class", value = "pValue")%>% tibble::column_to_rownames("parentTerm") # unk <- sum2$modules[which(!sum2$modules%in%colnames(reducedTerms2))] # # reducedTerms2 <- bind_cols(reducedTerms2, lapply(unk, function(x){x = rep(NA, nrow(reducedTerms2))})) # names(reducedTerms2)[50:55] <- unk # # # take the final summary table # reducedTerms2 <- reducedTerms2[,sum2$modules] reducedTerms2[is.na(reducedTerms2)] <- 0 #write.csv(reducedTerms2, "Modules gene ontology parent.csv") # manually add the categories to parent terms #reducedTerms2 <- read.csv("Modules gene ontology parent.csv") mainOntCategories<- read_xlsx("Modules gene ontology parent.xlsx", sheet = 2) reducedTerms2 <- reducedTerms%>% filter(pValue>10)%>%group_by(class, parentTerm)%>% top_n(20, -pValue)%>%group_by(class)%>% top_n(20, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% left_join(mainOntCategories[,c(2:3)]) %>% filter(!is.na(category)) %>% group_by(class, category) %>% top_n(1, pValue)%>% spread(key = "class", value = "pValue")%>% tibble::column_to_rownames("parentTerm") %>% arrange(category) #reducedTerms2 <- bind_cols(reducedTerms2, lapply(unk, function(x){x = rep(NA, nrow(reducedTerms2))})) #names(reducedTerms2)[51:56] <- unk # annotating top category of each module topCat <- reducedTerms%>% filter(pValue>10)%>%group_by(class, parentTerm)%>% top_n(20, -pValue)%>%group_by(class)%>% top_n(20, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% left_join(mainOntCategories[,c(2:3)]) %>% filter(!is.na(category)) %>% group_by(class) %>% top_n(1, pValue) %>% mutate(annot = "*") topCat <- reducedTerms%>% filter(pValue>10)%>%group_by(class, parentTerm)%>% top_n(20, -pValue)%>%group_by(class)%>% top_n(20, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% left_join(mainOntCategories[,c(2:3)]) %>% filter(!is.na(category)) %>% left_join(topCat, by = c("class", "category", "pValue", "parentTerm")) %>%dplyr::select(parentTerm, class, annot)%>% spread(key = "class", value = "annot")%>% tibble::column_to_rownames("parentTerm") #topCat <- bind_cols(topCat, lapply(unk, function(x){x = rep(NA, nrow(topCat))})) #names(topCat)[50:55] <- unk # Take sum2 from network analysis, I wanted to get the grouping from Netwrok analysis trCols <- as.character(sum2$color) names(trCols) <- sum2$group # row annotation df <- data.frame("parentTerm" = rownames(reducedTerms2), "category" = reducedTerms2$category) qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) set.seed(2022) catColors<- c(sample(col_vector, length(unique(df$category)), replace = FALSE)) names(catColors) <- unique(df$category) ra =rowAnnotation(Category = df$category, col = list(Category = catColors)) reducedTerms2 <- reducedTerms2 %>% dplyr::select(-category) #reducedTerms2 <- reducedTerms2[,sum2$modules[which(sum2$group!="Marsupials")]] reducedTerms2 <- reducedTerms2[,order(match(colnames(reducedTerms2),colnames(MEs)))] reducedTerms2[is.na(reducedTerms2)] <- 0 topCat <- topCat[rownames(reducedTerms2),colnames(reducedTerms2)] topCat[is.na(topCat)] <- "" cols <- gsub("(ME)", "", colnames(reducedTerms2)) names(cols) <- colnames(reducedTerms2) # A bar plot of module frequency #ha = HeatmapAnnotation(Modules = colnames(reducedTerms2), Traits = sum2$group[which(sum2$group!="Marsupials")], col = list(Modules = cols, Traits = trCols), show_legend =T) ha = HeatmapAnnotation(Modules = colnames(reducedTerms2), col = list(Modules = cols), show_legend =F) my_palette13 <- rev(brewer.pal(9,"Spectral")) colnames(reducedTerms2) <- gsub("ME", "", colnames(reducedTerms2)) Heatmap <- Heatmap(reducedTerms2, col = c("#E0E0E0",my_palette13), show_column_names = TRUE, name = "-log10 pvalue", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = FALSE, row_dend_reorder=FALSE, column_title = "Parent GO biological processes",row_names_gp = gpar(fontsize = 15), bottom_annotation = ha, cell_fun = function(j, i, x, y, w, h, col) { grid.text(topCat[i, j], x, y,vjust = 0.8, gp=gpar(fontsize=25))}, rect_gp = gpar(col = "darkgrey", lty = 1, lwd = 2)) pdf(file = "Heatmap Gene Ontology.pdf", width = 20, height = 15) draw(ra+Heatmap, heatmap_legend_side = "left", annotation_legend_side="left", padding = unit(c(2, 10, 10, 250), "mm")) dev.off() ###################################### # lets make heatmap of main categories reducedTerms2 <- reducedTerms%>% filter(pValue>10)%>%group_by(class, parentTerm)%>% top_n(20, -pValue)%>%group_by(class)%>% top_n(20, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% left_join(mainOntCategories[,c(2:3)]) %>% filter(!is.na(category)) %>% group_by(class, category) %>% top_n(1, pValue) %>% summarise(pValue = max(pValue))%>% group_by(class)%>% spread(key = "class", value = "pValue")%>% tibble::column_to_rownames("category") #%>% top_n(3, pValue) #reducedTerms2 <- bind_cols(reducedTerms2, lapply(unk, function(x){x = rep(NA, nrow(reducedTerms2))})) #names(reducedTerms2)[50:55] <- unk # # annotating top category of each module # topCat <- reducedTerms%>%group_by(class, parentTerm)%>% top_n(20, -pValue)%>%group_by(class)%>% top_n(20, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% left_join(mainOntCategories[,c(2:3)]) %>% filter(!is.na(category)) %>% group_by(class) %>% top_n(1, pValue) %>% mutate(annot = "*") # # topCat <- reducedTerms%>%group_by(class, parentTerm)%>% top_n(20, -pValue)%>%group_by(class)%>% top_n(20, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% left_join(mainOntCategories[,c(2:3)]) %>% filter(!is.na(category)) %>% left_join(topCat, by = c("class", "category", "pValue", "parentTerm")) %>%dplyr::select(parentTerm, class, annot)%>% spread(key = "class", value = "annot")%>% tibble::column_to_rownames("parentTerm") # # # topCat <- bind_cols(topCat, lapply(unk, function(x){x = rep(NA, nrow(topCat))})) # names(topCat)[50:54] <- unk # # Take sum2 from network analysis, I wanted to get the grouping from Netwrok analysis # trCols <- as.character(sum2$color[which(sum2$group!="Marsupials")]) # names(trCols) <- sum2$group[which(sum2$group!="Marsupials")] # # # row annotation # df <- data.frame("parentTerm" = rownames(reducedTerms2), "category" = reducedTerms2$category) # # qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] # col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) # set.seed(2022) # catColors<- c(sample(col_vector, length(unique(df$category)), replace = FALSE)) # names(catColors) <- unique(df$category) # # ra =rowAnnotation(Category = df$category, col = list(Category = catColors)) reducedTerms2 <- reducedTerms2[,order(match(colnames(reducedTerms2),colnames(MEs)))] reducedTerms2[is.na(reducedTerms2)] <- 0 # topCat <- topCat[rownames(reducedTerms2),colnames(reducedTerms2)] # topCat[is.na(topCat)] <- "" # # cols <- gsub("(ME)", "", colnames(reducedTerms2)) # names(cols) <- colnames(reducedTerms2) # A bar plot of module frequency #ha = HeatmapAnnotation(Modules = colnames(reducedTerms2), Traits = sum2$group[which(sum2$group!="Marsupials")], col = list(Modules = cols, Traits = trCols), show_legend =T) ha = HeatmapAnnotation(Modules = colnames(reducedTerms2), col = list(Modules = cols), show_legend =F) my_palette13 <- rev(brewer.pal(9,"Spectral")) colnames(reducedTerms2) <- gsub("ME", "", colnames(reducedTerms2)) Heatmap <- Heatmap(reducedTerms2, col = c("#E0E0E0",my_palette13), show_column_names = TRUE, name = "max -log10 pvalue", show_row_names = TRUE, column_dend_reorder = FALSE, cluster_columns = FALSE, cluster_rows = FALSE, row_dend_reorder=FALSE, column_title = "Top defined categories based on GO biological processes",row_names_gp = gpar(fontsize = 25), bottom_annotation = ha, rect_gp = gpar(col = "darkgrey", lty = 1, lwd = 2), column_names_gp = gpar(fontsize = 26), column_title_gp = gpar(fontsize = 30)) jpeg(file = "Heatmap Gene Ontology Categories.jpeg", width = 16, height = 12, units = "in", res = 300) draw(Heatmap, heatmap_legend_side = "left", annotation_legend_side="left", padding = unit(c(2, 10, 10, 50), "mm")) dev.off() ``` ```{r summ} topCatSum <- reducedTerms%>% filter(pValue>10)%>%group_by(class, parentTerm)%>% top_n(20, -pValue)%>%group_by(class)%>% top_n(20, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% left_join(mainOntCategories[,c(2:3)]) %>%filter(!is.na(category))%>% group_by(class) %>% top_n(3, pValue) %>% summarize(topGeneOntology = paste(unique(category), collapse = " ; ")) %>% dplyr::rename(modules = class) topTermSum <- reducedTerms%>% filter(pValue>10)%>%group_by(class, parentTerm)%>% top_n(20, -pValue)%>%group_by(class)%>% top_n(20, -pValue)%>%ungroup()%>% dplyr::select(class,parentTerm, pValue) %>%distinct()%>% left_join(mainOntCategories[,c(2:3)]) %>%filter(!is.na(category))%>% group_by(class) %>% top_n(3, pValue) %>% summarize(topGeneOntologyTerm = paste(unique(parentTerm), collapse = " ; ")) %>% dplyr::rename(modules = class) sum2 <- sum2 %>% left_join(topCatSum) sum2 <- sum2 %>% left_join(topTermSum) summaryTable <- sum2 %>% dplyr::select(-moduleColors, -color) summaryTable[is.na(summaryTable)] <- "" write.csv(summaryTable, "Summary table of all modules.csv") ``` ## TF enrichment ```{r TF enrichement} source("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/TF enrichment/CGenrichment function.Amin.R") input <- net1_top500 %>% dplyr::select(CGid, modules) %>%right_join(DNAmNetworks[,1,drop=FALSE], by="CGid") %>% mutate(groups = ifelse(is.na(modules), "background", modules)) TFbackground <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/TF enrichment/TF.backgroundForEnrichment.Charles.Amin.RDS") TFgenes <- read.delim("~/Steve Horvath Lab Dropbox/Amin Haghani/MammalianArrayNormalizationTools/TranscriptionalFactor.Enrichment.BreezeHaghani/pwm.gene.mapping.V3.withclusters.txt") TFenrichment <- CGenrichment(input=input, database = TFbackground, filter = "pval", topn = 10, ignoreGroup = "background", cutoff = 0.05, TFgenes=TFgenes) TFenrichment1 <- TFenrichment %>% arrange(pval) %>% mutate(motif=factor(motif, levels=make.unique(as.character(motif)))) %>% dplyr::rename(nCommonCpGs = nCommon, pValue = pval, modules = group) %>% filter(pValue<=0.05) write.csv(TFenrichment1, "CpG level TF enrichment.csv") ``` ## export modules as BED file ```{r export bed files} BEDfiles <- DNAmNetworks %>% dplyr::select(human.hg19.seqnames, human.hg19.CGstart, human.hg19.CGend, CGid, `network 1`) %>% setnames(new = c("chrom", 'chromStart', 'chromEnd', 'name', "modules")) %>% group_split(modules) lapply(BEDfiles, function(x){ fn <- x$modules[1] x <- x%>%dplyr::select(modules) write.table(x, paste("ChromHMM enrichment/BEDfiles/", fn, ".bed", sep = "")) }) ``` ## IPA analysis I used top 500 CpGs of the modules for IPA ```{r canonical pathways} top500 <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(500, abs(KME))) }) top500 <- rbindlist(top500) %>% left_join(geneMap[,c("CGid", "SYMBOL")]) %>% dplyr::select(modules, SYMBOL) %>% distinct() %>% mutate(sig = 1) %>% spread(key = modules, value = sig) %>% dplyr::select(SYMBOL,sum2$moduleColors ) #write.csv(top500, "gene for IPA.csv") cpFiles <- list.files("IPA analysis/", pattern = "canonical pathways", recursive = T, full.names = T) intervals <- list(c(1:20), c(21:40), c(41:55)) canonicalPathways <- rbindlist(lapply(1:length(cpFiles), function(x){ a <- readxl::read_xls(cpFiles[[x]], skip = 1) %>% setnames(new = c(names(.)[1], sum2$modules[intervals[[x]]])) }), fill = T) %>% gather(2:56, key = "modules", value = "pval") %>% filter(pval>2.3) write.csv(canonicalPathways, "Canonical pathway.csv") canonicalPathways <- rbindlist(lapply(1:length(cpFiles), function(x){ a <- readxl::read_xls(cpFiles[[x]], skip = 1) %>% setnames(new = c(names(.)[1], sum2$modules[intervals[[x]]])) }), fill = T) %>% gather(2:56, key = "modules", value = "pval") %>% filter(pval>2.3) %>% group_by(modules) %>% top_n(5, pval) %>% arrange(-pval)%>% summarise("IPA Canonical Pathways p<0.005"= paste(`Canonical Pathways`, collapse = " ; ")) upFiles <- list.files("IPA analysis/", pattern = "upstream", recursive = T, full.names = T) upstreams <- rbindlist(lapply(1:length(upFiles), function(x){ a <- readxl::read_xls(upFiles[[x]], skip = 1) %>% setnames(new = c(names(.)[1], sum2$modules[intervals[[x]]])) }), fill = T) %>% gather(2:56, key = "modules", value = "pval") %>% filter(pval>2.3) write.csv(upstreams, "upstream regulators.csv") upFiles <- list.files("IPA analysis/", pattern = "upstream", recursive = T, full.names = T) upstreams <- rbindlist(lapply(1:length(upFiles), function(x){ a <- readxl::read_xls(upFiles[[x]], skip = 1) %>% setnames(new = c(names(.)[1], sum2$modules[intervals[[x]]])) }), fill = T) %>% gather(2:56, key = "modules", value = "pval") %>% filter(pval>2.3) %>% group_by(modules) %>% top_n(5, pval) %>% arrange(-pval)%>% summarise("IPA upstream regulators p<0.005"= paste(`Upstream Regulators`, collapse = " ; ")) sum2 <- sum2 %>% left_join(canonicalPathways) %>% left_join(upstreams) ``` ## EWAS GWAS enrichment ```{r} source("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Codes, and protocols/GWAS enrichment/Caesar, oneStepGWAS, Amin.R") top500 <- rbindlist(lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(500, abs(KME))) })) %>% group_split(modules) names(top500) <- sapply(top500, function(x){x$modules[1]}) geneMap <- geneMap %>% filter(CGid%in%mappability$CGid[which(mappability$mapToEutherianNotMarsupialsCor0.8=="yes")]) GWASenrichResults <- plyr::llply(top500, function(x){ result <- oneStepGWAS(EWAS=x, geneMap=geneMap, topNumber = 500, n.topcpg = 1000, cutoff1 = 0.05, pvalue = 0.005, input="WGCNA") }) GWASenrichResults1 <- rbindlist(GWASenrichResults, idcol = "modules") write.csv(GWASenrichResults1, "GWAS enrichment Network 1_cutoff05.csv") ``` ## GWAS enrichment ```{r GWAS enrichment} GWASenrichment <- read.csv("GWAS enrichment Network 1_cutoff05.csv") GWASenrichment <- GWASenrichment %>% filter(P<1e-3) %>% group_by(modules, Category) %>% top_n(3, -P) %>% ungroup() %>% group_by(modules)%>%summarise('GWAS_top_traits_P<1e-3' = paste(unique(trait), collapse = " ; "), 'GWAS_top_categories_P<1e-3' = paste(unique(Category), collapse = " ; ")) %>% dplyr::rename(moduleColors = modules) sum2 <- sum2 %>% left_join(GWASenrichment) #write.csv(sum2, "summary table.csv") GWASenrichment <- read.csv("GWAS enrichment Network 1_cutoff05.csv") GWASenrichment <- GWASenrichment %>% filter(P<1e-3) %>% mutate(Overlap = paste(" ", Overlap, sep = "")) #write.csv(GWASenrichment, "top GWAS enrichment.csv") ``` ## Gene region enrichment ```{r geneRegion analysis} geneMap <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V5.RDS") geneRegions <- KMEs %>% dplyr::select(modules) %>% tibble::rownames_to_column(var = "CGid") %>% bind_rows(dplyr::select(.data=background1, CGid)) %>% left_join(dplyr::select(.data=geneMap, CGid, main_Categories))%>% mutate(main_Categories = factor(main_Categories, levels = rev(c("Intergenic_upstream", "Promoter", "fiveUTR", "Exon", "Intron", "threeUTR", "Intergenic_downstream")))) %>% mutate(modules = ifelse(is.na(modules), "background", modules)) %>% group_by(modules, main_Categories) %>% mutate() %>% tally() %>% group_by(modules) %>% mutate(total = sum(n)) %>% spread(key = "main_Categories", value = "n") %>% tibble::column_to_rownames("modules") geneRegions[is.na(geneRegions)] <- 0 targetModules <- rownames(geneRegions)[-1] targetRegions <- colnames(geneRegions)[-1] regionPvals <- bind_cols(lapply(targetRegions, function(y){ sapply(targetModules, function(x){ regions <- geneRegions[c("background", x),] %>% dplyr::select(paste(y), total) res <- prop.test(x = regions[,1], n = regions[,2])$p.value }) })) regionPvals <- as.data.frame(apply(regionPvals, 2, function(x){p.adjust(x, method="fdr")})) names(regionPvals) <- targetRegions rownames(regionPvals) <- targetModules geneRegionSum <- KMEs %>% dplyr::select(modules) %>% tibble::rownames_to_column(var = "CGid") %>% left_join(dplyr::select(.data=geneMap, CGid, main_Categories))%>% mutate(main_Categories = factor(main_Categories, levels = rev(c("Intergenic_upstream", "Promoter", "fiveUTR", "Exon", "Intron", "threeUTR", "Intergenic_downstream")))) %>% group_by(modules, main_Categories) %>% tally() %>% group_by(modules) %>% mutate(total = sum(n)) %>% mutate(percentage = round(n/total*100,2)) %>% dplyr::select(-n, -total) %>% spread(key = "main_Categories", value = "percentage") %>% tibble::column_to_rownames("modules") geneRegionSum[is.na(geneRegionSum)] <- 0 geneRegionBack <- dplyr::select(.data=background1, CGid) %>% left_join(dplyr::select(.data=geneMap, CGid, main_Categories))%>% mutate(main_Categories = factor(main_Categories, levels = rev(c("Intergenic_upstream", "Promoter", "fiveUTR", "Exon", "Intron", "threeUTR", "Intergenic_downstream")))) %>% group_by(main_Categories) %>% tally() %>% mutate(total = sum(n)) %>% mutate(background = round(n/total*100,2)) %>% dplyr::select(-n, -total) %>% dplyr::rename(region = main_Categories) gDat <- geneRegionSum %>% tibble::rownames_to_column(var = "modules") %>% gather(key = "region", value = "percentage", -modules) gDat <- regionPvals %>% tibble::rownames_to_column(var = "modules") %>% gather(key = "region", value = "pvalue", -modules) %>% left_join(gDat) %>% mutate(region = factor(region, levels = rev(c("Intergenic_upstream", "Promoter", "fiveUTR", "Exon", "Intron", "threeUTR", "Intergenic_downstream")))) %>% mutate(modules = factor(modules, levels=sum2$moduleColors)) %>% left_join(geneRegionBack) %>% mutate(changeFromBackground = factor(as.character(sign(percentage-background)), levels = c(-1,1), labels = c("decrease", "increase"))) netMod <- levels(gDat$modules) p3 <- gDat%>% mutate(pvalue = ifelse(pvalue>0.05, NA, pvalue)) %>% mutate(percentage = ifelse(pvalue>0.05, NA, percentage)) %>%ggplot(aes(x=modules, y=region, color=-log10(pvalue), size=percentage))+geom_point_rast(aes(shape=changeFromBackground))+theme_classic()+theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+scale_color_gradient(low = "blue", high = "red", limits=c(1.3,40))+scale_shape_manual(values = c(15,17))+ggtitle("Enrichment of different gene regions per module")+scale_size(range = c(0.5, 5), limits = c(0,100))+geom_rect(xmin = as.numeric(gDat$modules)-0.5, xmax =as.numeric(gDat$modules)+0.5, ymin = -0.5, ymax = Inf, aes(fill=modules), alpha =0.01, color=NA, show.legend = FALSE)+scale_fill_manual(values = netMod)+labs(color="-log10(adj.pvalue)", size="Percentage of CpGs\n per module")+ylab("Gene region location of CpGs") pdf("Gene region enrichment.pdf", width = 12, height =6) p3 dev.off() # I have decided to do a proportion test for regions prop.test(x = c(1428, 23), n = c(14705, 470)) fisher.test(x = geneRegions[1:2,1:2]) chisq.test(x = geneRegions[1,], y = geneRegions[2,]) ``` # Protein-Protein network analysis ```{r STRING network} library(STRINGdb) library(igraph) # stringdb <- STRINGdb$new(version = "11", species= 9606, score_threshold=400) # genes <- data.frame(SYMBOL = unique(geneMap$SYMBOL)) # genes <- stringdb$map(genes, "SYMBOL", removeUnmappedRows = TRUE ) # saveRDS(genes, "STRING gene dictionary.RDS") genes <- readRDS("STRING gene dictionary.RDS") #mammalianPPI <- stringdb$get_interactions(genes$STRING_id) #mammalianPPI$combined_score <- scales::rescale(mammalianPPI$combined_score) #saveRDS(mammalianPPI, "mammalianPPI.RDS") mammalianPPI <- readRDS("mammalianPPI.RDS") mammalianPPI <- mammalianPPI%>% left_join(genes, by=c("from"="STRING_id")) %>% dplyr::rename(source=SYMBOL)%>% left_join(genes, by=c("to"="STRING_id")) %>% dplyr::rename(destination=SYMBOL) %>% distinct() %>% mutate(ID=paste(source,destination , sep = "-")) ppiNet <- rbindlist(lapply(1:length(TWASresuls$PPI_network), function(x){ net <- TWASresuls$PPI_network[[x]] %>% dplyr::rename(source=preferredName_A, destination=preferredName_B) %>% filter(source!="") %>% mutate(ID = paste(source, destination, sep = "-")) %>% mutate(color.background=modules[[x]])})) mammalianPPI <- mammalianPPI %>% left_join(dplyr::select(.data=ppiNet, ID, color.background)) mammalianPPI <- mammalianPPI %>% group_by(ID) %>% summarize(color.background=paste(color.background, collapse = "; "), source=unique(source), destination=unique(destination), score=unique(combined_score)) mammalianPPI_top500 <- mammalianPPI %>% filter(color.background%in%color.background[!grepl("(^NA)|(NA$)", color.background)])%>% mutate(shadow=ifelse(grepl(";", color.background), TRUE, FALSE)) %>% mutate(shape=ifelse(grepl(";", color.background), "ellipse", "circle")) %>% mutate(color.background=ifelse(grepl(";", color.background), str_extract("^\\w+", string = color.background), color.background)) sources <- mammalianPPI_top500 %>% group_by(color.background, shadow, shape) %>% distinct(source)%>% dplyr::rename(label=source) destinations <- mammalianPPI_top500%>% group_by(color.background, shadow, shape) %>% distinct(destination) %>% dplyr::rename(label=destination) nodes <- full_join(sources, destinations, by = c("label", "color.background","shadow", "shape"))%>% tibble::rowid_to_column("id")%>% mutate(font.size = 30) %>% mutate(group = color.background) %>% mutate(color.background=gsub("[0-9]", "", color.background))%>% mutate(color.background=col2hex(color.background))%>% mutate(font.color=ifelse(grepl("(dark)|(black)|(^blue)|(midnightblue)|(brown4)", group), "white", "black")) %>% mutate(font.color=ifelse(grepl("(darkorange2)", group), "black",font.color)) lnodes <- data.frame(id=1:length(unique(nodes$group)), label=unique(nodes$group), color=unique(nodes$group), font.size=17) %>% mutate(font.color=ifelse(grepl("(dark)|(black)|(^blue)|(midnightblue)", label), "white","black")) %>% mutate(font.color=ifelse(grepl("(darkorange2)", label), "black",font.color)) %>% mutate(color=gsub("[0-9]", "", color)) %>% mutate(color=col2hex(color)) per_route <- mammalianPPI_top500 %>% mutate(weight=score) %>% dplyr::select(-shape, -shadow, -color.background) edges <- per_route %>% left_join(nodes, by = c("source" = "label")) %>% dplyr::rename(from = id) edges <- edges %>% left_join(nodes, by = c("destination" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(to = id) %>% dplyr::select(from, to, weight) library(visNetwork) network <- visNetwork(nodes = nodes, edges = edges, width = "100%", height = "1000px", main="Protein-protein interaction network in Mammalian co-methylation modules") %>% visIgraphLayout(layout = "layout_with_kk", physics=T, smooth=T) %>% visEdges(arrows = "middle")%>% visNodes(size=40,color = list(border = "black"))%>% visClusteringByColor(colors = "color.background",force = F)%>% visOptions(highlightNearest = TRUE)%>% visLegend(position = "right", useGroups = F, addNodes = lnodes,ncol=5, width=0.4) network %>% visSave(file = "PPI networks/Mammalian PPI network unsupervised.html") ## net1_top <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(50, abs(KME))) }) names(net1_top) <- sapply(net1_top, function(x){x$modules[1]}) genes2 <- data.frame(CGid = geneMap$CGid, modules=mergedColors, SYMBOL=geneMap$SYMBOL)%>% filter(CGid%in%rbindlist(net1_top)$CGid)%>% group_by(SYMBOL) %>% summarise(modules=paste(unique(modules), collapse = "; ")) %>% ungroup() %>% left_join(genes) %>% filter(!is.na(STRING_id)) %>% dplyr::rename(color.background=modules) mammalianPPI <- readRDS("mammalianPPI.RDS")%>% left_join(genes2, by=c("from"="STRING_id")) %>% dplyr::rename(source=SYMBOL)%>% left_join(genes2[,c(1,3)], by=c("to"="STRING_id")) %>% dplyr::rename(destination=SYMBOL) %>% dplyr::select(-from, -to)%>% distinct() %>% filter(!is.na(source)|!is.na(destination))%>% mutate(ID=paste(source,destination , sep = "-"))%>% filter(color.background%in%color.background[!grepl("(^NA)|(NA$)", color.background)])%>% mutate(shadow=ifelse(grepl(";", color.background), TRUE, FALSE)) %>% mutate(shape=ifelse(grepl(";", color.background), "ellipse", "circle")) %>% mutate(color.background=ifelse(grepl(";", color.background), str_extract("^\\w+", string = color.background), color.background)) %>% filter(!duplicated(ID)&!is.na(source)&!is.na(destination)) counts <- mammalianPPI %>% group_by(source) %>% tally() %>% setnames(new = c("SYMBOL", "n")) counts2 <- mammalianPPI %>% group_by(destination) %>% tally()%>% setnames(new = c("SYMBOL", "n")) %>% bind_rows(counts)%>% group_by(SYMBOL) %>% summarize(n=sum(n)) sources <- mammalianPPI %>% group_by(color.background, shadow, shape) %>% distinct(source)%>% dplyr::rename(label=source) destinations <- mammalianPPI%>% group_by(color.background, shadow, shape) %>% distinct(destination) %>% dplyr::rename(label=destination) nodes <- full_join(sources, destinations, by = c("label", "color.background","shadow", "shape"))%>% tibble::rowid_to_column("id")%>% mutate(font.size = 30) %>% mutate(group = color.background) %>% mutate(color.background=gsub("[0-9]", "", color.background))%>% mutate(color.background=col2hex(color.background))%>% mutate(font.color=ifelse(grepl("(dark)|(black)|(^blue)|(midnightblue)|(brown4)", group), "white", "black")) %>% mutate(font.color=ifelse(grepl("(darkorange2)", group), "black",font.color)) lnodes <- data.frame(id=1:length(unique(nodes$group)), label=unique(nodes$group), color=unique(nodes$group), font.size=17) %>% mutate(font.color=ifelse(grepl("(dark)|(black)|(^blue)|(midnightblue)", label), "white","black")) %>% mutate(font.color=ifelse(grepl("(darkorange2)", label), "black",font.color)) %>% mutate(color=gsub("[0-9]", "", color)) %>% mutate(color=col2hex(color)) per_route <- mammalianPPI %>% mutate(weight=combined_score) %>% dplyr::select(-shape, -shadow, -color.background) edges <- per_route %>% left_join(nodes, by = c("source" = "label")) %>% dplyr::rename(from = id) edges <- edges %>% left_join(nodes, by = c("destination" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(to = id) %>% dplyr::select(from, to, weight) library(visNetwork) network <- visNetwork(nodes = nodes, edges = edges, width = "100%", height = "1000px", main="Protein-protein interaction network in Mammalian co-methylation modules") %>% visIgraphLayout(layout = "layout_with_kk", physics=T, smooth=T) %>% visEdges(arrows = "middle")%>% visNodes(size=5,color = list(border = "black"))%>% visOptions(highlightNearest = TRUE)%>% visLegend(width = 0.1, position = "right", main = "Group")%>% visLegend(position = "right", useGroups = F, addNodes = lnodes,ncol=5, width=0.4) network %>% visSave(file = "PPI networks/Mammalian PPI network unsupervised all.html") #purple module mammalianPPI2 <- mammalianPPI %>% filter(color.background=="purple") %>% mutate(shape="circle") sources <- mammalianPPI2 %>% group_by(color.background, shadow, shape) %>% distinct(source)%>% dplyr::rename(label=source) destinations <- mammalianPPI2%>% group_by(color.background, shadow, shape) %>% distinct(destination) %>% dplyr::rename(label=destination) nodes <- full_join(sources, destinations, by = c("label", "color.background","shadow", "shape"))%>% tibble::rowid_to_column("id")%>% mutate(font.size = 30) per_route <- mammalianPPI2 %>% mutate(weight=combined_score) edges <- per_route %>% left_join(nodes, by = c("source" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(from = id) edges <- edges %>% left_join(nodes, by = c("destination" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(to = id) %>% dplyr::select(from, to, weight) net <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE) library(RCy3) createNetworkFromIgraph(net,"purpleNetwork") #royalblue module mammalianPPI2 <- mammalianPPI %>% filter(color.background=="royalblue") %>% mutate(shape="circle") sources <- mammalianPPI2 %>% group_by(color.background, shadow, shape) %>% distinct(source)%>% dplyr::rename(label=source) destinations <- mammalianPPI2%>% group_by(color.background, shadow, shape) %>% distinct(destination) %>% dplyr::rename(label=destination) nodes <- full_join(sources, destinations, by = c("label", "color.background","shadow", "shape"))%>% tibble::rowid_to_column("id")%>% mutate(font.size = 30) per_route <- mammalianPPI2 %>% mutate(weight=combined_score) edges <- per_route %>% left_join(nodes, by = c("source" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(from = id) edges <- edges %>% left_join(nodes, by = c("destination" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(to = id) %>% dplyr::select(from, to, weight) net <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE) library(RCy3) createNetworkFromIgraph(net,"royalblueNetwork") ``` ```{r extracting hub genes per modules} net1_top <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(50, abs(KME))) }) names(net1_top) <- sapply(net1_top, function(x){x$modules[1]}) genes2 <- data.frame(CGid = geneMap$CGid, modules=mergedColors, SYMBOL=geneMap$SYMBOL)%>% filter(CGid%in%rbindlist(net1_top)$CGid)%>% #group_by(SYMBOL) %>% summarise(modules=paste(unique(modules), collapse = "; ")) %>% ungroup() %>% left_join(genes) %>% filter(!is.na(STRING_id)) %>% dplyr::rename(color.background=modules) mammalianPPI <- readRDS("mammalianPPI.RDS")%>% left_join(genes2, by=c("from"="STRING_id")) %>% dplyr::rename(source=SYMBOL)%>% left_join(genes2[,c("SYMBOL","STRING_id")], by=c("to"="STRING_id")) %>% dplyr::rename(destination=SYMBOL) %>% dplyr::select(-from, -to)%>% distinct() %>% filter(!is.na(source)|!is.na(destination))%>% mutate(ID=paste(source,destination , sep = "-"))%>% filter(color.background%in%color.background[!grepl("(^NA)|(NA$)", color.background)])%>% mutate(shadow=ifelse(grepl(";", color.background), TRUE, FALSE)) %>% mutate(shape=ifelse(grepl(";", color.background), "ellipse", "circle")) %>% mutate(color.background=ifelse(grepl(";", color.background), str_extract("^\\w+", string = color.background), color.background)) %>% filter(!duplicated(ID)&!is.na(source)&!is.na(destination)) targets <- unique(mammalianPPI$color.background) hubCount <- rbindlist(lapply(1:length(targets), function(x){ a <- mammalianPPI %>% filter(color.background==targets[x]) %>% gather(source, destination, key = "node",value = "SYMBOL" ) b <- a%>% group_by(SYMBOL) %>% tally() %>% mutate(mean=mean(n))%>% filter(n>2) sources <- mammalianPPI %>% filter(color.background==targets[x])%>% group_by(color.background, shadow, shape) %>% distinct(source)%>% dplyr::rename(label=source) destinations <- mammalianPPI%>% filter(color.background==targets[x])%>% group_by(color.background, shadow, shape) %>% distinct(destination) %>% dplyr::rename(label=destination) nodes <- full_join(sources, destinations, by = c("label", "color.background","shadow", "shape"))%>% tibble::rowid_to_column("id")%>% mutate(font.size = 30) per_route <- mammalianPPI %>% filter(color.background==targets[x])%>% mutate(weight=combined_score) edges <- per_route %>% left_join(nodes, by = c("source" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(from = id) edges <- edges %>% left_join(nodes, by = c("destination" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(to = id) %>% dplyr::select(from, to, weight) net <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE) c <-data.frame(module=targets[x], nHubInPPI=nrow(b), nGenes=length(unique(a$SYMBOL)), maxConnection = max(b$n), averageConnection = b$mean[1], clusterCoef=transitivity(net, type = "global")) %>% mutate(connectivityScore=nHubInPPI/nGenes) }))%>% mutate(module=factor(module, levels=module[order(clusterCoef)])) cols <- levels(hubCount$module) # summary of the PPI network itself # All proteins on array genes3 <- data.frame(CGid = geneMap$CGid, modules=mergedColors, SYMBOL=geneMap$SYMBOL)%>% #group_by(SYMBOL) %>% summarise(modules=paste(unique(modules), collapse = "; ")) %>% ungroup() %>% left_join(genes) %>% filter(!is.na(STRING_id)) %>% dplyr::rename(color.background=modules) mammalianPPIBackground <- readRDS("mammalianPPI.RDS")%>% left_join(genes3, by=c("from"="STRING_id")) %>% dplyr::rename(source=SYMBOL)%>% left_join(genes3[,c("SYMBOL","STRING_id")], by=c("to"="STRING_id")) %>% dplyr::rename(destination=SYMBOL) %>% dplyr::select(-from, -to)%>% distinct() %>% filter(!is.na(source)|!is.na(destination))%>% mutate(ID=paste(source,destination , sep = "-"))%>% filter(color.background%in%color.background[!grepl("(^NA)|(NA$)", color.background)])%>% mutate(shadow=ifelse(grepl(";", color.background), TRUE, FALSE)) %>% mutate(shape=ifelse(grepl(";", color.background), "ellipse", "circle")) %>% mutate(color.background=ifelse(grepl(";", color.background), str_extract("^\\w+", string = color.background), color.background)) %>% filter(!duplicated(ID)&!is.na(source)&!is.na(destination)) nHubForPPI <- mammalianPPIBackground %>% filter(!duplicated(ID)) %>% gather(source, destination, key = "node",value = "SYMBOL" ) nHubForPPI2 <- nHubForPPI%>% group_by(SYMBOL) %>% tally() %>% mutate(mean=mean(n))%>% filter(n>2) sources <- mammalianPPIBackground %>% group_by(color.background, shadow, shape) %>% distinct(source)%>% dplyr::rename(label=source) destinations <- mammalianPPIBackground%>% group_by(color.background, shadow, shape) %>% distinct(destination) %>% dplyr::rename(label=destination) nodes <- full_join(sources, destinations, by = c("label", "color.background","shadow", "shape"))%>% tibble::rowid_to_column("id")%>% mutate(font.size = 30) per_route <- mammalianPPIBackground %>% mutate(weight=combined_score) edges <- per_route %>% left_join(nodes, by = c("source" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(from = id) edges <- edges %>% left_join(nodes, by = c("destination" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(to = id) %>% dplyr::select(from, to, weight) net <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE) nHubForPPI3 <-data.frame(module="All PPI network", nHubInPPI=nrow(nHubForPPI2), nGenes=length(unique(nHubForPPI$SYMBOL)), maxConnection = max(nHubForPPI2$n), averageConnection = nHubForPPI2$mean[1], clusterCoef=transitivity(net, type = "global")) %>% mutate(connectivityScore=nHubInPPI/nGenes) # Sensetivity test for connectivity set.seed(2022) sumBack <-mammalianPPIBackground %>% group_by(color.background) %>% tally() sensetivityhubCount <- rbindlist(lapply(1:20, function(x){ t <- rbindlist(lapply(1:55, function(y){ i <- sample(1:nrow(mammalianPPIBackground), size = sumBack$n[y], replace = F) a <- mammalianPPIBackground[i,] %>% gather(source, destination, key = "node",value = "SYMBOL" ) b <- a%>% group_by(SYMBOL) %>% tally() %>% mutate(mean=mean(n))%>% filter(n>2) sources <- mammalianPPIBackground[i,] %>% group_by(color.background, shadow, shape) %>% distinct(source)%>% dplyr::rename(label=source) destinations <- mammalianPPIBackground[i,]%>% group_by(color.background, shadow, shape) %>% distinct(destination) %>% dplyr::rename(label=destination) nodes <- full_join(sources, destinations, by = c("label", "color.background","shadow", "shape"))%>% tibble::rowid_to_column("id")%>% mutate(font.size = 30) per_route <- mammalianPPIBackground[i,] %>% mutate(weight=combined_score) edges <- per_route %>% left_join(nodes, by = c("source" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(from = id) edges <- edges %>% left_join(nodes, by = c("destination" = "label", "color.background", "shadow", "shape")) %>% dplyr::rename(to = id) %>% dplyr::select(from, to, weight) net <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE) c <-data.frame(module="1100 Permutations", nHubInPPI=nrow(b), nGenes=length(unique(a$SYMBOL)), maxConnection = max(b$n), averageConnection = b$mean[1], clusterCoef=transitivity(net, type = "global")) %>% mutate(connectivityScore=nHubInPPI/nGenes) })) })) # p <- hubCount %>% ggplot(aes(x=clusterCoef, y=module, color=module))+geom_point(size=4)+scale_color_manual(values = cols)+xlab("Global cluster coefficient (transitivity)")+ylab("modules")+theme_classic()+geom_segment(aes(x=0, xend=clusterCoef, y=module, yend=module), color="black")+guides(color=FALSE)+ggtitle("Mammalian modules connectivity in PPI network")+theme(legend.position = "bottom")+geom_vline(xintercept = max(sensetivityhubCount$clusterCoef, na.rm = T), color="darkgrey", linetype="dashed")+geom_text(aes(x = max(sensetivityhubCount$clusterCoef, na.rm = T), y=5, label="Max in permutation"), color="black", size=3) ggsave("Mammalian modules connectivity in PPI network.pdf", p, width = 7, height = 7) # p <- hubCount %>% mutate(module="MammalianModules") %>% bind_rows(sensetivityhubCount)%>% dplyr::rename(analysis=module)%>%ggplot(aes(x=connectivityScore, fill=analysis))+geom_density(alpha=0.5)+xlab("HubGenes(>2 connections)/total genes")+theme_bw()+ggtitle("Permutation test")+theme(legend.position = "bottom")+geom_vline(xintercept = median(sensetivityhubCount$connectivityScore), color="red", linetype="dashed")+geom_vline(xintercept = nHubForPPI3$connectivityScore, color="darkgrey", linetype="dashed")+geom_vline(xintercept = median(hubCount$connectivityScore), color="turquoise", linetype="dashed")+geom_label(aes(x=nHubForPPI3$connectivityScore, y=10, label="PPI\nconnectivity\nscore"), alpha=0.01, fill="grey", size=2)+ylab("density") ggsave("Mammalian modules connectivity in PPI network, permutation.pdf", p, width = 4, height = 4) p2 <- hubCount %>% mutate(module="MammalianModules") %>% bind_rows(sensetivityhubCount)%>% dplyr::rename(analysis=module)%>%ggplot(aes(x=averageConnection, fill=analysis))+geom_density(alpha=0.5)+xlab("Average degree")+theme_bw()+ggtitle("Permutation test")+theme(legend.position = "bottom")+geom_vline(xintercept = median(sensetivityhubCount$averageConnection), color="red", linetype="dashed")+geom_vline(xintercept = nHubForPPI3$averageConnection, color="darkgrey", linetype="dashed")+geom_vline(xintercept = median(hubCount$averageConnection), color="turquoise", linetype="dashed")+geom_label(aes(x=nHubForPPI3$averageConnection, y=10, label="PPI\naverage\ndegree"), alpha=0.01, fill="grey", size=2)+ylab("density") ggsave("Mammalian modules average connection in PPI network, permutation.pdf", p2, width = 4, height = 4) p3 <- hubCount %>% mutate(module="MammalianModules") %>% bind_rows(sensetivityhubCount)%>% dplyr::rename(analysis=module)%>%ggplot(aes(x=maxConnection, fill=analysis))+geom_density(alpha=0.5)+xlab("Maximum connection")+theme_bw()+ggtitle("Permutation test")+theme(legend.position = "bottom")+geom_vline(xintercept = median(sensetivityhubCount$maxConnection), color="red", linetype="dashed")+geom_vline(xintercept = nHubForPPI3$maxConnection, color="darkgrey", linetype="dashed")+geom_vline(xintercept = median(hubCount$maxConnection), color="turquoise", linetype="dashed")+geom_label(aes(x=nHubForPPI3$averageConnection, y=10, label="PPI\naverage\nconnection"), alpha=0.01, fill="grey", size=2)+ylab("density") ggsave("Mammalian modules mac connectivity in PPI network, permutation.pdf", p3, width = 4, height = 4) pval <- round(sum(sensetivityhubCount$clusterCoef[which(!is.na(sensetivityhubCount$clusterCoef))]>median(hubCount$clusterCoef, na.rm = T))/nrow(sensetivityhubCount), 3) p4 <- hubCount %>% mutate(module="MammalianModules") %>% bind_rows(sensetivityhubCount)%>% dplyr::rename(analysis=module)%>%ggplot(aes(x=clusterCoef, fill=analysis))+geom_density(alpha=0.5)+xlab("Cluster coefficient")+theme_bw()+ggtitle("Permutation test")+theme(legend.position = "bottom")+geom_vline(xintercept = median(sensetivityhubCount$clusterCoef), color="red", linetype="dashed")+geom_vline(xintercept = median(hubCount$clusterCoef), color="turquoise", linetype="dashed")+ylab("density")+geom_text(aes(x=0.025, y=400, label=paste("pvalue=", pval, sep = "")), alpha=0.01, fill="black", size=4) #+geom_label(aes(x=nHubForPPI3$clusterCoef, y=200, label="PPI\ncluster\nCoef"), alpha=0.01, fill="grey", size=2) #+geom_vline(xintercept = nHubForPPI3$clusterCoef, color="darkgrey", linetype="dashed") ggsave("Mammalian modules cluster coefficient in PPI network, permutation.pdf", p4, width = 4, height = 4) p4 <- hubCount %>% mutate(module="MammalianModules") %>% bind_rows(sensetivityhubCount)%>% dplyr::rename(analysis=module)%>%ggplot(aes(y=clusterCoef,x=analysis , fill=analysis))+geom_boxplot(notch = T)+ylab("Cluster coefficient")+theme_bw()+ggtitle("Permutation test")+theme(legend.position = "none") #+geom_label(aes(x=nHubForPPI3$clusterCoef, y=200, label="PPI\ncluster\nCoef"), alpha=0.01, fill="grey", size=2) #+geom_vline(xintercept = nHubForPPI3$clusterCoef, color="darkgrey", linetype="dashed") ggsave("Mammalian modules cluster coefficient in PPI network, permutation.pdf", p4, width = 4, height = 4) ``` ```{r STRING-WGCNA correlation} genes <- readRDS("STRING gene dictionary.RDS") ## net1_top <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n)) %>% top_n(50, abs(KME))) }) names(net1_top) <- sapply(net1_top, function(x){x$modules[1]}) genes2 <- data.frame(CGid = geneMap$CGid, modules=mergedColors, SYMBOL=geneMap$SYMBOL)%>% filter(CGid%in%rbindlist(net1_top)$CGid) %>% left_join(genes)%>% filter(!is.na(STRING_id))%>% group_by(SYMBOL) %>% summarise(modules=paste(unique(modules), collapse = "; ")) %>% ungroup() %>% left_join(genes) %>% filter(!is.na(STRING_id)) %>% dplyr::rename(color.background=modules) mammalianPPI <- readRDS("mammalianPPI.RDS")%>% left_join(genes2, by=c("from"="STRING_id")) %>% dplyr::rename(source=SYMBOL)%>% left_join(genes2[,c(1,3)], by=c("to"="STRING_id")) %>% dplyr::rename(destination=SYMBOL) %>% dplyr::select(-from, -to)%>% distinct() %>% filter(!is.na(source)|!is.na(destination))%>% mutate(ID=paste(source,destination , sep = "-"))%>% filter(color.background%in%color.background[!grepl("(^NA)|(NA$)", color.background)])%>% mutate(shadow=ifelse(grepl(";", color.background), TRUE, FALSE)) %>% mutate(shape=ifelse(grepl(";", color.background), "ellipse", "circle")) %>% mutate(color.background=ifelse(grepl(";", color.background), str_extract("^\\w+", string = color.background), color.background)) %>% filter(!duplicated(ID)&!is.na(source)&!is.na(destination)) ## genes2 <- data.frame(CGid = geneMap$CGid, modules=mergedColors, SYMBOL=geneMap$SYMBOL)%>% filter(CGid%in%rbindlist(net1_top)$CGid) %>% left_join(genes)%>% filter(!is.na(STRING_id)&!duplicated(CGid)) i <- which(rownames(KMEs) %in% genes2$CGid) disMatrix <- readRDS("WGCNA results/No marsupials/disTOMmatrix net 1.RDS") disMatrix <- disMatrix[i,i] rownames(disMatrix) =colnames(disMatrix)= genes2$CGid net1Adjacency <- reshape2::melt(disMatrix)%>% setnames(new = c("CGid1", "CGid2", "mammalScore")) %>% mutate(mammalScore= -(mammalScore-1))%>% left_join(dplyr::select(.data=genes2, CGid, SYMBOL, modules), by=c("CGid1"="CGid"))%>% left_join(dplyr::select(.data=genes2, CGid, SYMBOL, modules), by=c("CGid2"="CGid"))%>% mutate(ID=paste(SYMBOL.x, SYMBOL.y, sep = "-"))%>% filter(modules.x==modules.y&SYMBOL.x!=SYMBOL.y) combinedAdjacency <- net1Adjacency %>% group_by(ID) %>% summarize(mammalScore=median(mammalScore)) %>% right_join(mammalianPPI) p1 <- combinedAdjacency %>% ggplot(aes(x=mammalScore, y=combined_score, color=color.background))+ggrastr::geom_point_rast()+geom_smooth(method = "lm", color="blue")+ theme_classic(base_size = 15)+ylab("PPI combined score")+xlab("Mammalian co-methylation score\n(median of gene-gene adjacencies\nwithin a module)")+ ggpubr::stat_cor(label.x.npc = 0.5, label.y.npc = 0.99, color="blue", size=5)+ theme(axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 15),legend.position="none", plot.title = element_text(hjust = 0.5, face="bold", size=15), plot.margin = unit(c(0,0,0,0),"cm"))+scale_color_manual(values = combinedAdjacency$color.background)+ggtitle("STRING PPI vs Mammalian Co-methylation\nadjacency correlation within modules") net1Adjacency <- reshape2::melt(disMatrix)%>% setnames(new = c("CGid1", "CGid2", "mammalScore")) %>% mutate(mammalScore= -(mammalScore-1))%>% left_join(dplyr::select(.data=genes2, CGid, SYMBOL, modules), by=c("CGid1"="CGid"))%>% left_join(dplyr::select(.data=genes2, CGid, SYMBOL, modules), by=c("CGid2"="CGid"))%>% mutate(ID=paste(SYMBOL.x, SYMBOL.y, sep = "-"))%>% filter(modules.x!=modules.y&SYMBOL.x!=SYMBOL.y) combinedAdjacency <- net1Adjacency %>% group_by(ID) %>% summarize(mammalScore=median(mammalScore)) %>% right_join(mammalianPPI) p2 <- combinedAdjacency %>% ggplot(aes(x=mammalScore, y=combined_score))+ggrastr::geom_point_rast()+geom_smooth(method = "lm", color="blue")+ theme_classic(base_size = 15)+ylab("PPI combined score")+xlab("Mammalian co-methylation score\n(median of gene-gene adjacencies\noutside the modules)")+ ggpubr::stat_cor(label.x.npc = 0.5, label.y.npc = 0.99, color="blue", size=5)+ theme(axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 15),legend.position="none", plot.title = element_text(hjust = 0.5, face="bold", size=15), plot.margin = unit(c(0,0,0,0),"cm"))+ggtitle("STRING PPI vs Mammalian Co-methylation\nadjacency correlation, outside modules") p <- ggpubr::ggarrange(p1, p2, nrow = 1) ggsave("STRING vs Mammalian network correlation.pdf", p,width = 10, height = 4) ``` ## Chromatin state enrichment ```{r} stackHmmEnrichment <- read_xlsx("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Chip project/full_stack_enrichments_hg19_coord.xlsx")[,-1] states = read.csv('~/Steve Horvath Lab Dropbox/Amin Haghani/MammalianArrayNormalizationTools/ChromatinStatesHaVu/Human/StackHMM, repeat elements, PRCchipseq/mammalian_hg19_fullStack_repeats_PRCchipseq, aggregated.V2.Amin.csv',stringsAsFactors = F) chromHMM <- states %>% group_by(full_stacked_state) %>% summarize(CGs = list(CGid)) %>% dplyr::rename(state=full_stacked_state) prcStates <- states %>% dplyr::select(CGid, PRC1_Amin, PRC2_Amin) %>% setnames(new = c("CGid", "PRC1", "PRC2")) %>% gather(-CGid, key="state", value="bind") %>% filter(bind==1) %>% dplyr::select(-bind) %>% group_by(state) %>% summarize(CGs = list(CGid)) PMDstates <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS")%>% dplyr::select(CGid, PMDstatusDetailed) %>% filter(!is.na(PMDstatusDetailed)) %>% group_by(PMDstatusDetailed) %>% summarize(CGs = list(CGid)) %>% setnames(c("state", "CGs")) PMDstates <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS")%>% dplyr::select(CGid, PMDstatusDetailed) %>% filter(!is.na(PMDstatusDetailed)) %>% group_by(PMDstatusDetailed) %>% summarize(CGs = list(CGid))%>% setnames(c("state", "CGs")) WCGW<- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS")%>% dplyr::select(CGid, WCGW_GenomWide) %>% filter(!is.na(WCGW_GenomWide)) %>% group_by(WCGW_GenomWide) %>% summarize(CGs = list(CGid))%>% setnames(c("state", "CGs")) WCGWcommonPMD<- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS")%>% dplyr::select(CGid, WCGW_inCommonPMDs) %>% filter(!is.na(WCGW_inCommonPMDs)) %>% group_by(WCGW_inCommonPMDs) %>% summarize(CGs = list(CGid))%>% setnames(c("state", "CGs")) %>% mutate(state="WCGW_inCommonPMDs") allStates <- bind_rows(chromHMM, prcStates) allStates <- bind_rows(allStates, PMDstates) allStates <- bind_rows(allStates, WCGW) allStates <- bind_rows(allStates, WCGWcommonPMD) means <- rbindlist(lapply(1:nrow(allStates), function(x){ dat <- bValsNoMars %>% tibble::rownames_to_column(var = "CGid") %>% filter(CGid%in%unlist(allStates[x,][[2]])) %>% dplyr::select(-CGid) res <- data.frame(States=allStates[x,][[1]], mean=mean(as.matrix(dat))) })) ``` ```{r chromatin state enrichment} # this function will do a hypergeometric test between the input and a different CpG sets. source("~/Google drive/My Drive/Amin documents/Steve projects/Research projects/Codes, and protocols/ChromHMM enrichment/ChromHMM enrichment Source Code.R") input <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n))%>% top_n(10000, abs(KME))) }) input <- bind_rows(input) input <-input %>% group_split(modules) names(input) <- sapply(input, function(x){x$modules[1]}) EWAS <- readRDS("EWAS results.RDS") input2 <- EWAS$ALL$targetTop[1:2] nam <- c("Lifespan", "Lifespan.AdjW") input2 <- plyr::llply(1:length(input2), function(x){a <- input2[[x]] %>% group_split(group) names(a) <- sapply(a, function(y){paste(nam[x], y$group[1], sep = " ")}) return(a) }) %>% purrr::flatten(.) geneM <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS") %>% filter(CGid %in% EWAS$ALL$EWAS$`Lifespan ALL`$CGid) chromHMM <- chromHMMenrichment.Amin(input, geneMap) chromHMMewas <- chromHMMenrichment.Amin(input2, geneM) chromHMM <- chromHMM %>% bind_rows(chromHMMewas) %>% mutate(state= factor(state, levels = rev(allStates$state))) %>% mutate(group=factor(group, levels=c(gsub("ME", "", names(input)), names(input2) ))) chromHMM2 <- chromHMM %>% group_by(group) %>% mutate(fdr= p.adjust(pval, method = "fdr")) %>% mutate(pval=ifelse(pval<1e-3, pval, NA)) chromHMM3 <- chromHMM2 %>% filter(!is.na(pval)) write.csv(chromHMM3, "stackHMM results.csv") max <- max(chromHMM2$foldChange) min <- min(chromHMM2$foldChange) mnCount <- min(chromHMM2$nCommon) mxCount <- max(chromHMM2$nCommon) pvalMatrix <- chromHMM2 %>% dplyr::select(state, pval, group) %>% mutate(pval=format.pval(pval, digits = 2)) %>% spread(key = "group", value = "pval") %>% tibble::column_to_rownames(var = "state") pvalMatrix[pvalMatrix!="NA"] <- "*" pvalMatrix[pvalMatrix=="NA"] <- "" pvalMatrix[is.na(pvalMatrix)] <- "" fcMatrix <- chromHMM%>% dplyr::select(state, foldChange, group) %>% mutate(foldChange=log(foldChange)) %>% spread(key = "group", value = "foldChange")%>% tibble::column_to_rownames(var = "state") fcMatrix[fcMatrix=="-Inf"] <- 0 fcMatrix[is.na(fcMatrix)] <- 0 i <- which(rowSums(pvalMatrix=="")==59) pvalMatrix <- pvalMatrix[-i,] fcMatrix <- fcMatrix[-i,] library(ComplexHeatmap) cols <- c(colnames(fcMatrix)[1:55], rep("white",4)) names(cols) <- colnames(fcMatrix) interCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", interventionModules), "maroon", "#E0E0E0")) interCols <- interCols$colors names(interCols) <- colnames(fcMatrix) ageCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c("purple"), "maroon", "#E0E0E0")) ageCols <- ageCols$colors names(ageCols) <- colnames(fcMatrix) maxCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", c(maxLifeModulesMarginal, c("MEgreen", "MEgreenyellow", "MEsteelblue"))), "maroon", "#E0E0E0")) maxCols <- maxCols$colors names(maxCols) <- colnames(fcMatrix) mortalityCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c( "magenta"), "maroon", "#E0E0E0")) mortalityCols <- mortalityCols$colors names(mortalityCols) <- colnames(fcMatrix) dogCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c("skyblue3", "magenta"), "maroon", "#E0E0E0")) dogCols <- dogCols$colors names(dogCols) <- colnames(fcMatrix) weightCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", weightModulesMarg), "maroon", "#E0E0E0")) weightCols <- weightCols$colors names(weightCols) <- colnames(fcMatrix) orderCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", c(orderModules1, monotremeModules, marsupialModules)), "maroon", "#E0E0E0")) orderCols <- orderCols$colors names(orderCols) <- colnames(fcMatrix) sexCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", sexModule), "maroon", "#E0E0E0")) sexCols <- sexCols$colors names(sexCols) <- colnames(fcMatrix) tissueCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", tissueModules$modules), "maroon", "#E0E0E0")) tissueCols <- tissueCols$colors names(tissueCols) <- colnames(fcMatrix) # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(fcMatrix), ageModules=colnames(fcMatrix), InterventionModules= colnames(fcMatrix), maxLifespanModules=colnames(fcMatrix), weightModules=colnames(fcMatrix), humanMortalityModules=colnames(fcMatrix), dogLifespanModules=colnames(fcMatrix), orderModules=colnames(fcMatrix), sexModules=colnames(fcMatrix), tissueModules=colnames(fcMatrix) ,col = list(Modules = cols, ageModules=ageCols, InterventionModules=interCols, maxLifespanModules=maxCols, weightModules=weightCols, humanMortalityModules=mortalityCols, dogLifespanModules=dogCols, orderModules=orderCols, sexModules=sexCols, tissueModules=tissueCols ), show_legend = F) stackHmmEnrichment <- read_xlsx("full_stack_enrichments_hg19_coord.xlsx")[,-1] %>% dplyr::rename(stackHMM = state, enrichment= enrichment_with_probes_hg19) stackHmmColors <- stackHmmEnrichment[, c("stackHMM", "color")] %>% filter(complete.cases(.)) %>% filter(stackHMM!="GapArtf2")%>% mutate(stackHMM2 = gsub("[0-9]", "", stackHMM)) groups <- data.frame(States=rownames(fcMatrix)) %>% mutate(group=c(rep("PMD annotation", 5), rep("PRC TFs", 2), rep("stackHMM", nrow(fcMatrix)-7))) %>% left_join(stackHmmColors, by = c("States"="stackHMM")) %>% mutate(color=ifelse(is.na(color), c(rep("maroon", 5), rep("cyan", 2)), color)) %>% mutate(stackHMM2=ifelse(is.na(stackHMM2), group, stackHMM2)) %>% mutate(color=gsub("#ffffff", "#DCDCDC", color)) %>% left_join(means) col1 <- groups$color names(col1) <- groups$stackHMM2 # row annotation ra =rowAnnotation(States = groups$stackHMM2 , col = list(States =col1), show_annotation_name = FALSE, show_legend = TRUE, meanMethylation = anno_barplot(groups$mean)) colPalette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[60:231] Heatmap <- Heatmap(fcMatrix, col = colPalette, show_column_names = TRUE, name = "log(foldChange)", show_row_names = TRUE, column_dend_reorder = TRUE, cluster_columns = TRUE, cluster_rows = TRUE, row_dend_reorder=TRUE, clustering_distance_rows="pearson", clustering_distance_columns="pearson",clustering_method_rows = "average",clustering_method_columns = "average", cell_fun = function(j, i, x, y, w, h, col) { grid.text(pvalMatrix[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=25))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15), column_title_gp=gpar(fontsize=15, fontface="bold", fill="orange", vjust=0.2),row_split = 3, column_split = 3, column_title = "Meta\nmodule %s", row_title=c("Cluster 1\nmainly TSS,PRC sites", "Cluster 2\nmainly Enhancer states", "Cluster 3\nmainly Transcription states") , row_title_gp=gpar(fontsize=15, fontface="bold", fill="#FFCCE5", vjust=0.2),row_gap = unit(3, "mm"), column_gap = unit(3, "mm"), border = TRUE, row_dend_width = unit(1, "in"), column_dend_height = unit(1, "in")) pdf(file = "Heatmap modules Chromatin states.pdf", width = 15, height = 21) draw(ra+Heatmap, heatmap_legend_side = "right", annotation_legend_side="right", padding = unit(c(2, 10, 10, 10), "mm"), column_title="Enrichment of the modules for different chromatin and genomic states (* p<1e-3)", column_title_gp=gpar(fontsize=17, fontface="bold")) dev.off() # library("dendextend") corMat <- fcMatrix%>% scale(.) %>% cor(use="everything",method = "pearson") dist <- as.dist(1-corMat) clus <- hclust(dist, method="average") dend <- as.dendrogram(clus) metaModules <- as.data.frame(cutree(dend, k=3)) %>% setnames(new = "metaModules") %>% tibble::rownames_to_column(var = "moduleColors") %>% mutate(metaModules=factor(metaModules, levels = c(1:3), labels=c(2, 3,1))) chromHMM3 <-chromHMM2 %>% filter(!is.na(pval)&foldChange>0) %>% left_join(means, by=c("state"="States"))%>% group_by(group) %>% summarize(chromatinStates = paste(state, collapse = ", "), mean=mean(mean)) %>% dplyr::rename(moduleColors=group) %>% right_join(sum2) %>% arrange(X) %>% left_join(metaModules) chromHMM3 %>% group_by(metaModules) %>% summarize(mean=mean(mean, na.rm=T)) write.csv(chromHMM3, "chroms.csv") ``` ```{r heatmap based on lifespan} input <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n))%>% top_n(10000, abs(KME))) }) input <- bind_rows(input) input <-input %>% group_split(modules) names(input) <- sapply(input, function(x){x$modules[1]}) chromHMM <- chromHMMenrichment.Amin(input, geneMap) chromHMM <- chromHMM %>% filter(group=="purple") %>% mutate(group="relativeAgeModule(+)(purple)") EWAS <- readRDS("EWAS results.RDS") input2 <- lapply(EWAS, function(x){x[[2]]}) input2 <- purrr::flatten(input2) input2 <- input2[!grepl("Phylo", names(input2))] rankpValue <- readRDS("rankPvalue EWAS lifespan top hits.RDS") rankpValue$rankPvalue <- rankpValue$rankPvalue %>% mutate(group=factor(dir, levels = c(-1, 1), labels = c("negative", "positive"))) input2 <- append(input2, rankpValue) names(input2)[7] <- "Lifespan rank.pvalue" nam <- gsub(" ", ".",names(input2)) input2 <- plyr::llply(1:length(input2), function(x){a <- input2[[x]] %>% group_split(group) names(a) <- sapply(a, function(y){paste(nam[x], y$group[1], sep = " ")}) return(a) }) %>% purrr::flatten(.) geneM <- readRDS("Human.Homo_sapiens.hg19.Amin.V9.RDS") %>% filter(CGid %in% EWAS$ALL$EWAS$`Lifespan ALL`$CGid) chromHMMewas <- chromHMMenrichment.Amin(input2, geneM) chromHMMewas <- chromHMMewas %>% bind_rows(chromHMM)%>% group_by(group) %>% mutate(fdr= p.adjust(pval, method = "fdr"))%>% mutate(pval=ifelse(pval<1e-3, pval, NA)) chromHMM3 <- chromHMMewas %>% filter(!is.na(pval)) write.csv(chromHMM3, "stackHMM results lifespan.csv") max <- max(chromHMMewas$foldChange) min <- min(chromHMMewas$foldChange) mnCount <- min(chromHMMewas$nCommon) mxCount <- max(chromHMMewas$nCommon) pvalMatrix <- chromHMMewas %>% dplyr::select(state, pval, group) %>% mutate(pval=format.pval(pval, digits = 2)) %>% spread(key = "group", value = "pval") %>% tibble::column_to_rownames(var = "state") pvalMatrix[pvalMatrix=="NA"] <- "" pvalMatrix[is.na(pvalMatrix)] <- "" fcMatrix <- chromHMMewas%>% dplyr::select(state, foldChange,pval,group) %>% mutate(foldChange=ifelse(pval>1e-3, 0,foldChange)) %>% dplyr::select(-pval) %>% mutate(foldChange=log(foldChange)) %>% spread(key = "group", value = "foldChange")%>% tibble::column_to_rownames(var = "state") fcMatrix[fcMatrix=="-Inf"] <- 0 fcMatrix[is.na(fcMatrix)] <- 0 i <- which(rowSums(pvalMatrix=="")==ncol(pvalMatrix)|rownames(pvalMatrix)%in%c("neither")) # pvalMatrix <- pvalMatrix[-i,c(7:12, 1:6, 13)] # fcMatrix <- fcMatrix[-i,c(7:12, 1:6, 13)] pvalMatrix <- pvalMatrix[-i,c(7:10, 13:14,11:12, 1:6, 15)] fcMatrix <- fcMatrix[-i,c(7:10, 13:14,11:12, 1:6, 15)] stackHmmEnrichment <- read_xlsx("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Collaborative projects/Chip project/full_stack_enrichments_hg19_coord.xlsx")[,-1] %>% dplyr::rename(stackHMM = state, enrichment= enrichment_with_probes_hg19) stackHmmColors <- stackHmmEnrichment[, c("stackHMM", "color")] %>% filter(complete.cases(.)) %>% filter(stackHMM!="GapArtf2")%>% mutate(stackHMM2 = gsub("[0-9]", "", stackHMM)) groups <- data.frame(States=rownames(fcMatrix)) %>% mutate(group=c(rep("PMD annotation", 5), rep("PRC TFs", 2), rep("stackHMM", nrow(fcMatrix)-7))) %>% left_join(stackHmmColors, by = c("States"="stackHMM")) %>% mutate(color=ifelse(is.na(color), c(rep("maroon", 5), rep("cyan", 2)), color)) %>% mutate(stackHMM2=ifelse(is.na(stackHMM2), group, stackHMM2)) %>% mutate(color=gsub("#ffffff", "#DCDCDC", color))%>% left_join(means) %>% mutate(stackHMM2=ifelse(stackHMM2%in%c("stackHMM", "PMD annotation", "PRC TFs"), "z.Other", stackHMM2)) # positive j <- grep("positive", names(pvalMatrix)) pvalMatrix2 <- pvalMatrix[,j] fcMatrix2 <- fcMatrix[,j] i <- which(rowSums(pvalMatrix2=="")==ncol(pvalMatrix2)|rownames(pvalMatrix2)%in%c("neither")) pvalMatrix2 <- pvalMatrix2[-i,] fcMatrix2 <- fcMatrix2[-i,] # positive j <- grep("(negative)|(purple)", names(pvalMatrix)) pvalMatrix3 <- pvalMatrix[,j] fcMatrix3 <- fcMatrix[,j] i <- which(rowSums(pvalMatrix3=="")==ncol(pvalMatrix3)|rownames(pvalMatrix3)%in%c("neither")) pvalMatrix3 <- pvalMatrix3[-i,] fcMatrix3 <- fcMatrix3[-i,] # row annotation groups2 <- groups %>% filter(States%in%rownames(fcMatrix2)) groups2 <- groups2[nrow(groups2):1,] pvalMatrix2 <- pvalMatrix2[groups2$States,] fcMatrix2 <- fcMatrix2[groups2$States,] col1 <- groups2$color names(col1) <- groups2$stackHMM2 ra =rowAnnotation(States = groups2$stackHMM2 , col = list(States =col1), show_annotation_name = FALSE, show_legend = TRUE, meanMethylation = anno_barplot(groups2$mean)) colPalette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[125:220] Heatmap<- Heatmap(fcMatrix2, col = colPalette, show_column_names = TRUE, name = "log(foldChange)", show_row_names = TRUE, column_dend_reorder = F, cluster_columns = F, cluster_rows = F, row_dend_reorder=F, clustering_distance_rows="pearson", clustering_distance_columns="pearson",clustering_method_rows = "average",clustering_method_columns = "average", cell_fun = function(j, i, x, y, w, h, col) { grid.text(pvalMatrix2[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=10))}, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15), column_title_gp=gpar(fontsize=15, fontface="bold", fill="orange", vjust=0.2), row_title_gp=gpar(fontsize=15, fontface="bold", fill="#FFCCE5", vjust=0.2), border = TRUE) pdf(file = "Heatmap modules Chromatin states lifespan positive.pdf", width = 8.5, height = 10) draw(ra+Heatmap, heatmap_legend_side = "right", annotation_legend_side="right", padding = unit(c(10, 10, 10, 10), "mm"), column_title="Lifespan positive ", column_title_gp=gpar(fontsize=17, fontface="bold")) dev.off() # row annotation groups3 <- groups %>% filter(States%in%rownames(fcMatrix3)) groups3 <- groups3[nrow(groups3):1,] pvalMatrix3 <- pvalMatrix3[groups3$States,] fcMatrix3 <- fcMatrix3[groups3$States,] col1 <- groups3$color names(col1) <- groups3$stackHMM2 ra =rowAnnotation(States = groups3$stackHMM2 , col = list(States =col1), show_annotation_name = FALSE, show_legend = TRUE, meanMethylation = anno_barplot(groups3$mean)) Heatmap <- Heatmap(fcMatrix3, col = colPalette, show_column_names = TRUE, name = "log(foldChange)", show_row_names = TRUE, column_dend_reorder = F, cluster_columns = F, cluster_rows = F, row_dend_reorder=F, clustering_distance_rows="pearson", clustering_distance_columns="pearson",clustering_method_rows = "average",clustering_method_columns = "average", cell_fun = function(j, i, x, y, w, h, col) { grid.text(pvalMatrix3[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=10))}, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15), column_title_gp=gpar(fontsize=15, fontface="bold", fill="orange", vjust=0.2), row_title_gp=gpar(fontsize=15, fontface="bold", fill="#FFCCE5", vjust=0.2), border = TRUE) pdf(file = "Heatmap modules Chromatin states lifespan negative.pdf", width = 10, height = 10) draw(ra+Heatmap, heatmap_legend_side = "right", annotation_legend_side="right", padding = unit(c(10, 10, 10, 10), "mm"), column_title="Lifespan negative", column_title_gp=gpar(fontsize=17, fontface="bold")) dev.off() ``` ```{r TF enrichment} source("~/Google drive/My Drive/Amin documents/Steve projects/Research projects/Codes, and protocols/ChromHMM enrichment/ChromHMM enrichment Source Code.R") input <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n))%>% top_n(10000, abs(KME))) }) input <- bind_rows(input) input <-input %>% group_split(modules) names(input) <- sapply(input, function(x){x$modules[1]}) EWAS <- readRDS("EWAS results.RDS") input2 <- EWAS$ALL$targetTop[1:2] nam <- c("Lifespan", "Lifespan.AdjW") input2 <- plyr::llply(1:length(input2), function(x){a <- input2[[x]] %>% group_split(group) names(a) <- sapply(a, function(y){paste(nam[x], y$group[1], sep = " ")}) return(a) }) %>% purrr::flatten(.) geneM <- readRDS("Human.Homo_sapiens.hg19.Amin.V9.RDS") %>% filter(CGid %in% EWAS$ALL$EWAS$`Lifespan ALL`$CGid) tf.modules <- regulatoryRegionEnrichment.Amin(input, geneMap) tf.ewas <- regulatoryRegionEnrichment.Amin(input2, geneM) tf.enrichement<- tf.modules %>% bind_rows(tf.ewas) %>% mutate(group=factor(group, levels=c(gsub("ME", "", names(input)), names(input2)), labels = c(gsub("ME", "", names(input)), gsub(" ", "\n",names(input2)))))%>% mutate(stateGroup=ifelse(grepl(".H+[2-4]", x = state), "histones", ifelse(grepl(".DNase", x = state), "ORF", ifelse(grepl("(hg19.)|(mm10.)", x = state), "TF", ifelse(grepl("(common)|(neither)", x = state), "lateReplicatingAnnotation","stackHMM")))))%>% relocate(stateGroup, .after="state")%>% group_by(group) %>% mutate(fdr= p.adjust(pval, method = "fdr")) %>% filter(fdr<0.05&foldChange>1) %>% relocate(fdr, .after="pval") %>% filter(stateGroup=="TF") %>% filter(grepl("(Lifespan)|(midnightblue)|(greenyellow)|(black)|(tan)|(magenta)|(^green)", group)&grepl("hg19", state)) %>% mutate(state=gsub("hg19.", "", state)) %>% ungroup() %>% mutate(state=factor(state, levels = rev(unique(state)[order(log10(fdr))]))) %>% filter(state%in%c("Cmyc", "Cjun", "Jund", "Cfos", "Nanog", "POU5F1")) p <- tf.enrichement %>% ggplot(aes(x=-log10(fdr), y=state, color=foldChange))+geom_segment(aes(xend=-log10(fdr), x=0, yend=state), color="darkgrey")+ geom_point(size=4)+facet_wrap(~group, nrow = 1)+ylab("TF motifs")+theme_classic(base_size = 15)+scale_colour_gradient(low = "#FCBBA1", high ="#67000D" )+theme(strip.text = element_text(size=12)) ggsave("TF motifs.pdf", width = 9, height = 3) # tf.enrichement<- tf.modules %>% bind_rows(tf.ewas) %>% mutate(group=factor(group, levels=c(gsub("ME", "", names(input)), names(input2)), labels = c(gsub("ME", "", names(input)), gsub(" ", "\n",names(input2)))))%>% mutate(stateGroup=ifelse(grepl(".H+[2-4]", x = state), "histones", ifelse(grepl(".DNase", x = state), "ORF", ifelse(grepl("(hg19.)|(mm10.)", x = state), "TF", ifelse(grepl("(common)|(neither)", x = state), "lateReplicatingAnnotation","stackHMM")))))%>% relocate(stateGroup, .after="state")%>% group_by(group) %>% mutate(fdr= p.adjust(pval, method = "fdr")) %>% filter(fdr<0.05&foldChange>1) %>% relocate(fdr, .after="pval") %>% filter(stateGroup=="TF") %>% filter(grepl("(Lifespan)|(midnightblue)|(greenyellow)|(black)|(tan)|(^magenta)|(^green)", group)&grepl("hg19", state)) %>% mutate(state=gsub("hg19.", "", state)) %>% ungroup() %>% mutate(state=factor(state, levels = rev(unique(state)[order(log10(fdr))]))) p <- tf.enrichement %>% ggplot(aes(x=-log10(fdr), y=state, color=foldChange))+geom_segment(aes(xend=-log10(fdr), x=0, yend=state), color="darkgrey")+ geom_point(size=4)+facet_wrap(~group, nrow = 1)+ylab("TF motifs")+theme_classic(base_size = 15)+scale_colour_gradient(low = "#FCBBA1", high ="#67000D" )+theme(strip.text = element_text(size=8)) ggsave("TF motifs.pdf", width = 10, height = 10) # tf.enrichement<- tf.modules %>% bind_rows(tf.ewas) %>% mutate(group=factor(group, levels=c(gsub("ME", "", names(input)), names(input2)), labels = c(gsub("ME", "", names(input)), gsub(" ", "\n",names(input2)))))%>% mutate(stateGroup=ifelse(grepl(".H+[2-4]", x = state), "histones", ifelse(grepl(".DNase", x = state), "ORF", ifelse(grepl("(hg19.)|(mm10.)", x = state), "TF", ifelse(grepl("(common)|(neither)", x = state), "lateReplicatingAnnotation","stackHMM")))))%>% relocate(stateGroup, .after="state")%>% group_by(group) %>% mutate(fdr= p.adjust(pval, method = "fdr")) %>% filter(fdr<0.05&foldChange>1) %>% relocate(fdr, .after="pval") %>% filter(stateGroup=="TF") %>% filter(grepl("(Lifespan)|(midnightblue)|(greenyellow)|(black)|(tan)|(magenta)|(^green)", group)&grepl("hg19", state)) %>% mutate(state=gsub("hg19.", "", state)) %>% ungroup() %>% mutate(state=factor(state, levels = rev(unique(state)[order(log10(fdr))]))) %>% group_by(group) %>% top_n(3, -fdr) tf.enrichement<- tf.modules %>% bind_rows(tf.ewas) %>% mutate(group=factor(group, levels=c(names(input2)[c(2,1,4,3)],gsub("ME", "", names(input))), labels = c(gsub(" ", "\n",names(input2)[c(2,1,4,3)]),gsub("ME", "", names(input)))))%>% mutate(stateGroup=ifelse(grepl(".H+[2-4]", x = state), "histones", ifelse(grepl(".DNase", x = state), "ORF", ifelse(grepl("(hg19.)|(mm10.)", x = state), "TF", ifelse(grepl("(common)|(neither)", x = state), "lateReplicatingAnnotation","stackHMM")))))%>% relocate(stateGroup, .after="state")%>% group_by(group) %>% mutate(fdr= p.adjust(pval, method = "fdr")) %>% filter(fdr<0.05&foldChange>1) %>% relocate(fdr, .after="pval") %>% filter(stateGroup=="TF") %>% filter(grepl("(Lifespan)|(midnightblue)|(greenyellow)|(^black)|(^tan)|(^magenta)|(^green)", group)&grepl("hg19", state)) %>% mutate(state=gsub("hg19.", "", state)) %>% ungroup() %>% filter(state%in%tf.enrichement$state|state%in%c("Cmyc", "Cjun", "Jund", "Cfos", "Nanog", "POU5F1"))%>% mutate(state=factor(state, levels = unique(state)[order(log10(fdr))])) p <- tf.enrichement %>% ggplot(aes(x=-log10(fdr), y=state, color=foldChange))+geom_segment(aes(xend=-log10(fdr), x=0, yend=state), color="darkgrey")+ geom_point(size=4)+facet_wrap(~group, nrow = 1)+ylab("TF motifs")+theme_classic(base_size = 15)+scale_colour_gradient(low = "#FCBBA1", high ="#67000D" )+theme(strip.text = element_text(size=8),panel.spacing = unit(0.1, "lines"))+xlab("-log10(FDR)") ggsave("TF motifs subset.pdf", width = 8, height = 3.5) # ignore direction input2 <- EWAS$ALL$targetTop[1:2] nam <- c("Lifespan", "Lifespan.AdjW") input2 <- plyr::llply(1:length(input2), function(x){a <- input2[[x]] %>% group_by(group) %>% top_n(250, -pval) }) names(input2) <- nam tf.ewas <- regulatoryRegionEnrichment.Amin(input2, geneM) tf.ewas <- tf.ewas %>% mutate(stateGroup=ifelse(grepl(".H+[2-4]", x = state), "histones", ifelse(grepl(".DNase", x = state), "ORF", ifelse(grepl("(hg19.)|(mm10.)", x = state), "TF", ifelse(grepl("(common)|(neither)", x = state), "lateReplicatingAnnotation","stackHMM")))))%>% relocate(stateGroup, .after="state")%>% group_by(group) %>% mutate(fdr= p.adjust(pval, method = "fdr")) %>% filter(fdr<0.05&foldChange>1) %>% relocate(fdr, .after="pval") %>% filter(stateGroup=="TF")%>% filter(grepl("(Lifespan)|(midnightblue)|(greenyellow)|(^black)|(^tan)|(^magenta)|(^green)", group)&grepl("hg19", state)) %>% mutate(state=gsub("hg19.", "", state))%>% mutate(state=factor(state, levels = rev(unique(state)[order(log10(fdr))]))) p <- tf.ewas %>% ggplot(aes(x=-log10(fdr), y=state, color=foldChange))+geom_segment(aes(xend=-log10(fdr), x=0, yend=state), color="darkgrey")+ geom_point(size=4)+facet_wrap(~group, nrow = 1)+ylab("TF motifs")+theme_classic(base_size = 15)+scale_colour_gradient(low = "#FCBBA1", high ="#67000D" )+theme(strip.text = element_text(size=8),panel.spacing = unit(0.1, "lines"))+xlab("-log10(FDR)") ggsave("TF motifs lifespan no direction.pdf", width = 4, height = 3.5) ``` ```{r regulatory region enrichment only modules} source("~/Google drive/My Drive/Amin documents/Steve projects/Research projects/Codes, and protocols/ChromHMM enrichment/ChromHMM enrichment Source Code.R") input <- lapply(1:(ncol(KMEs)-1), function(x){ n <- colnames(KMEs)[x] nModule <- gsub("MM.", "", n) assign(nModule, KMEs %>% dplyr::select(n, modules) %>% tibble::rownames_to_column(var = "CGid") %>% filter(modules%in%nModule) %>% dplyr::rename(KME = paste(n))%>% top_n(10000, abs(KME))) }) input <- bind_rows(input) input <-input %>% group_split(modules) names(input) <- sapply(input, function(x){x$modules[1]}) regRegion <- regulatoryRegionEnrichment.Amin(input, geneMap) regRegion <- regRegion %>% mutate(state= factor(state, levels = rev(regRegions$state))) %>% mutate(group=factor(group, levels=gsub("ME", "", sum2$modules))) regRegion2 <- regRegion %>% mutate(pval=ifelse(pval<1e-5, pval, NA)) %>% filter(!is.na(pval)) max <- max(regRegion2$foldChange) min <- min(regRegion2$foldChange) mnCount <- min(regRegion2$nCommon) mxCount <- max(regRegion2$nCommon) pvalMatrix <- regRegion2 %>% dplyr::select(state, pval, group) %>% mutate(pval=format.pval(pval, digits = 2)) %>% spread(key = "group", value = "pval") %>% tibble::column_to_rownames(var = "state") pvalMatrix[pvalMatrix!="NA"] <- "*" pvalMatrix[pvalMatrix=="NA"] <- "" pvalMatrix[is.na(pvalMatrix)] <- "" fcMatrix <- regRegion2%>% dplyr::select(state, pval, group) %>% mutate(pval=-log10(pval)) %>% spread(key = "group", value = "pval")%>% tibble::column_to_rownames(var = "state") fcMatrix[fcMatrix=="-Inf"] <- 0 fcMatrix[is.na(fcMatrix)] <- 0 library(ComplexHeatmap) cols <- colnames(fcMatrix) names(cols) <- colnames(fcMatrix) interCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", interventionModules), "maroon", "#E0E0E0")) interCols <- interCols$colors names(interCols) <- colnames(fcMatrix) ageCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c("purple"), "maroon", "#E0E0E0")) ageCols <- ageCols$colors names(ageCols) <- colnames(fcMatrix) maxCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", c(maxLifeModulesMarginal, phyloModules)), "maroon", "#E0E0E0")) maxCols <- maxCols$colors names(maxCols) <- colnames(fcMatrix) mortalityCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c( "magenta"), "maroon", "#E0E0E0")) mortalityCols <- mortalityCols$colors names(mortalityCols) <- colnames(fcMatrix) dogCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c("skyblue3", "magenta"), "maroon", "#E0E0E0")) dogCols <- dogCols$colors names(dogCols) <- colnames(fcMatrix) weightCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", weightModulesMarg), "maroon", "#E0E0E0")) weightCols <- weightCols$colors names(weightCols) <- colnames(fcMatrix) orderCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", c(orderModules1, monotremeModules, marsupialModules)), "maroon", "#E0E0E0")) orderCols <- orderCols$colors names(orderCols) <- colnames(fcMatrix) sexCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", sexModule), "maroon", "#E0E0E0")) sexCols <- sexCols$colors names(sexCols) <- colnames(fcMatrix) tissueCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", tissueModules), "maroon", "#E0E0E0")) tissueCols <- tissueCols$colors names(tissueCols) <- colnames(fcMatrix) # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(fcMatrix), ageModules=colnames(fcMatrix), InterventionModules= colnames(fcMatrix), maxLifespanModules=colnames(fcMatrix), weightModules=colnames(fcMatrix), humanMortalityModules=colnames(fcMatrix), dogLifespanModules=colnames(fcMatrix), orderModules=colnames(fcMatrix), sexModules=colnames(fcMatrix), tissueModules=colnames(fcMatrix) ,col = list(Modules = cols, ageModules=ageCols, InterventionModules=interCols, maxLifespanModules=maxCols, weightModules=weightCols, humanMortalityModules=mortalityCols, dogLifespanModules=dogCols, orderModules=orderCols, sexModules=sexCols, tissueModules=tissueCols ), show_legend = F) colPalette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[125:250] Heatmap <- Heatmap(fcMatrix, col = colPalette, show_column_names = TRUE, name = "-log10(pval)", show_row_names = TRUE, column_dend_reorder = TRUE, cluster_columns = TRUE, cluster_rows = TRUE, row_dend_reorder=TRUE, clustering_distance_rows="pearson", clustering_distance_columns="pearson",clustering_method_rows = "average",clustering_method_columns = "complete", cell_fun = function(j, i, x, y, w, h, col) { grid.text(pvalMatrix[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=25))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15), column_title_gp=gpar(fontsize=15, fontface="bold", fill="orange", vjust=0.2) , row_title_gp=gpar(fontsize=15, fontface="bold", fill="#FFCCE5", vjust=0.2),row_gap = unit(3, "mm"), column_gap = unit(3, "mm"), border = TRUE, row_dend_width = unit(1, "in"), column_dend_height = unit(1, "in")) pdf(file = "Heatmap modules Regulatory regions.pdf", width = 15, height = 25) draw(Heatmap, heatmap_legend_side = "right", annotation_legend_side="right", padding = unit(c(2, 10, 10, 10), "mm"), column_title="Enrichment of the modules for TF binding and histon marks (* p<1e-5)", column_title_gp=gpar(fontsize=17, fontface="bold")) dev.off() # top 5 hits pvalMatrix1 <- regRegion2 %>% dplyr::select(state, pval, group) %>% mutate(stateGroup=ifelse(grepl(".H+[2-4]", x = state), "histons", "TFs")) %>% group_by(group, stateGroup) %>% top_n(3, state) %>% ungroup() %>% mutate(id = paste(state, group, sep = " ")) pvalMatrix <- regRegion2 %>% dplyr::select(state, pval, group) %>% mutate(pval=format.pval(pval, digits = 2))%>% mutate(id = paste(state, group, sep = " ")) %>% filter(id%in%pvalMatrix1$id) %>% dplyr::select(-id) %>% spread(key = "group", value = "pval") %>% tibble::column_to_rownames(var = "state") pvalMatrix[pvalMatrix!="NA"] <- "*" pvalMatrix[pvalMatrix=="NA"] <- "" pvalMatrix[is.na(pvalMatrix)] <- "" fcMatrix <- regRegion2%>% dplyr::select(state, pval, group)%>% mutate(id = paste(state, group, sep = " ")) %>% filter(id%in%pvalMatrix1$id) %>% dplyr::select(-id) %>% mutate(pval=-log10(pval)) %>% spread(key = "group", value = "pval")%>% tibble::column_to_rownames(var = "state") fcMatrix[fcMatrix=="-Inf"] <- 0 fcMatrix[is.na(fcMatrix)] <- 0 library(ComplexHeatmap) cols <- colnames(fcMatrix) names(cols) <- colnames(fcMatrix) interCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", interventionModules), "maroon", "#E0E0E0")) interCols <- interCols$colors names(interCols) <- colnames(fcMatrix) ageCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c("purple"), "maroon", "#E0E0E0")) ageCols <- ageCols$colors names(ageCols) <- colnames(fcMatrix) maxCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", c(maxLifeModulesMarginal, phyloModules)), "maroon", "#E0E0E0")) maxCols <- maxCols$colors names(maxCols) <- colnames(fcMatrix) mortalityCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c( "magenta"), "maroon", "#E0E0E0")) mortalityCols <- mortalityCols$colors names(mortalityCols) <- colnames(fcMatrix) dogCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%c("skyblue3", "magenta"), "maroon", "#E0E0E0")) dogCols <- dogCols$colors names(dogCols) <- colnames(fcMatrix) weightCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", weightModulesMarg), "maroon", "#E0E0E0")) weightCols <- weightCols$colors names(weightCols) <- colnames(fcMatrix) orderCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", c(orderModules1, monotremeModules, marsupialModules)), "maroon", "#E0E0E0")) orderCols <- orderCols$colors names(orderCols) <- colnames(fcMatrix) sexCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", sexModule), "maroon", "#E0E0E0")) sexCols <- sexCols$colors names(sexCols) <- colnames(fcMatrix) tissueCols <- data.frame(cols = colnames(fcMatrix)) %>% mutate(colors=ifelse(cols%in%gsub("ME", "", tissueModules), "maroon", "#E0E0E0")) tissueCols <- tissueCols$colors names(tissueCols) <- colnames(fcMatrix) # A bar plot of module frequency ha = HeatmapAnnotation(Modules = colnames(fcMatrix), ageModules=colnames(fcMatrix), InterventionModules= colnames(fcMatrix), maxLifespanModules=colnames(fcMatrix), weightModules=colnames(fcMatrix), humanMortalityModules=colnames(fcMatrix), dogLifespanModules=colnames(fcMatrix), orderModules=colnames(fcMatrix), sexModules=colnames(fcMatrix), tissueModules=colnames(fcMatrix) ,col = list(Modules = cols, ageModules=ageCols, InterventionModules=interCols, maxLifespanModules=maxCols, weightModules=weightCols, humanMortalityModules=mortalityCols, dogLifespanModules=dogCols, orderModules=orderCols, sexModules=sexCols, tissueModules=tissueCols ), show_legend = F) colPalette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[125:250] Heatmap <- Heatmap(fcMatrix, col = colPalette, show_column_names = TRUE, name = "-log10(pval)", show_row_names = TRUE, column_dend_reorder = TRUE, cluster_columns = TRUE, cluster_rows = TRUE, row_dend_reorder=TRUE, clustering_distance_rows="pearson", clustering_distance_columns="pearson",clustering_method_rows = "average",clustering_method_columns = "average", cell_fun = function(j, i, x, y, w, h, col) { grid.text(pvalMatrix[i, j], x, y, vjust = 0.8, gp=gpar(fontsize=25))}, bottom_annotation = ha, rect_gp = gpar(col = "grey", lwd = 1), row_names_gp = gpar(fontsize = 15), column_title_gp=gpar(fontsize=15, fontface="bold", fill="orange", vjust=0.2) , row_title_gp=gpar(fontsize=15, fontface="bold", fill="#FFCCE5", vjust=0.2),row_gap = unit(3, "mm"), column_gap = unit(3, "mm"), border = TRUE, row_dend_width = unit(1, "in"), column_dend_height = unit(1, "in")) pdf(file = "Heatmap modules Regulatory regions top 5.pdf", width = 15, height = 15) draw(Heatmap, heatmap_legend_side = "right", annotation_legend_side="right", padding = unit(c(2, 10, 10, 10), "mm"), column_title="Enrichment of the modules for TF binding and histon marks (* p<1e-5, top 6 per module)", column_title_gp=gpar(fontsize=17, fontface="bold")) dev.off() ``` ## Reprogramming analysis ```{r Reprogramming} miceadds::load.Rdata("NormalizedData/all_probes_sesame_normalized.Rdata", objname = "N55data") N55data <- N55data%>% tibble::column_to_rownames("CGid") N55data <- N55data[rownames(KMEs),] #identical(rownames(N55data), rownames(KMEs)) N55data <- t(N55data) N55MEs = moduleEigengenes(N55data, colors = mergedColors) N55MEs = N55MEs$eigengenes # a <- data.frame(Age = c(0,12,15,20,22, 25, 26), y=0) %>% ggplot(aes(x=Age))+theme_classic(base_size = 15)+scale_x_continuous(breaks = c(0,12,15,20,22, 25, 26), limits=c(0,26), expand = c(0, 0))+xlab("Age (months)")+theme(axis.line.x = element_line(size=1, color="darkblue"), axis.ticks = element_line(size=1, color="darkblue"), axis.text.x = element_text(angle=45, hjust=1, color="darkblue"), axis.title.x = element_text(color="darkblue")) # a <- data.frame(Age = c(0,2,7), y=0) %>% ggplot(aes(x=Age))+theme_classic(base_size = 25)+scale_x_continuous(breaks = c(0,2,7), limits=c(0,7), expand = c(0, 0))+xlab("Days")+theme(axis.line.x = element_line(size=1, color="darkblue"), axis.ticks = element_line(size=1, color="darkblue"), axis.text.x = element_text(angle=45, hjust=1, color="darkblue"), axis.title.x = element_text(color="darkblue")) # # ggsave("x axis.jpeg",a,width = 2, height = 1, units = "in", dpi = 300) N55samples <- read.csv("SampleSheetAgeN55final.csv")%>% filter(CanBeUsedForAgingStudies=="yes")%>% mutate(dosage = as.numeric(as.character(factor(Group, levels=c("6.B6-old", "1.B6+Dox", "2.4F+1mDox", "3.4F+7mDox", "4.4F+10mDox"), labels = c(0,0,1,7,10))))) %>% filter(!is.na(dosage)) %>% filter(Tissue%in%c("Kidney", "Skin")) %>% mutate(Tissue=factor(Tissue, levels = c("Skin","Kidney"))) %>% group_split(Tissue) names(N55samples) <- sapply(N55samples, function(x){x$Tissue[1]}) targetModules <- c("MEpurple", "MEsalmon4", "MEskyblue3", "MEivory","MEroyalblue", "MElavenderblush3", "MEmagenta", "MEblack", "MEmidnightblue", "MEtan") plots <- lapply(1:length(N55samples), function(x){ gdat <- N55MEs %>% tibble::rownames_to_column(var = "Basename") %>% filter(Basename%in%N55samples[[x]]$Basename)%>% right_join(N55samples[[x]], by="Basename") %>% dplyr::select(targetModules, dosage, Age) %>% gather(targetModules, key = "modules", value = "eigengene") %>% mutate(modules = factor(modules, levels = targetModules, labels = gsub("ME", "", targetModules)))%>% group_by(modules) %>% mutate(residuals = residuals(lm(eigengene~Age))) %>% ungroup() p1 <- gdat %>% ggplot(aes(x=dosage, y=residuals))+geom_point_rast()+geom_smooth(method = "lm", color="blue")+ theme_classic(base_size = 15)+ylab("Eigengene\nAge adjusted")+ facet_wrap(.~modules,nrow = 1)+ ggpubr::stat_cor(label.x.npc = 0.1, label.y.npc = 0.99, color="blue", size=3.5)+ggtitle(N55samples[[x]]$Tissue[1])+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), axis.text.y = element_text(size = 15), strip.text = element_text(size=17), legend.position="none", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold", size=20), plot.margin = unit(c(0,0,0,0),"cm"))+ scale_y_continuous(expand = expansion(mult = c(0, .2)))+xlab("4F dosage (months)") if(x==1){ p1 <- p1+xlab("") } return(p1) }) p <- ggpubr::ggarrange(plotlist = plots, nrow = 2) jpeg(file = "N55 experiment.jpeg", width = 15, height = 6, units = "in", res = 300) p dev.off() # N55samps <- rbindlist(N55samples, use.names = F) gdat <- N55MEs %>% tibble::rownames_to_column(var = "Basename") %>% filter(Basename%in%N55samps$Basename)%>% right_join(N55samps, by="Basename") %>% dplyr::select(targetModules, dosage, Age, Tissue) %>% gather(targetModules, key = "modules", value = "eigengene") %>% mutate(modules = factor(modules, levels = targetModules, labels = gsub("ME", "", targetModules)))%>% group_by(modules, Tissue) %>% mutate(residuals = residuals(lm(eigengene~Age))) %>% ungroup() p1 <- gdat %>% ggplot(aes(x=dosage, y=residuals, color=Tissue))+geom_point_rast()+geom_smooth(method = "lm")+ theme_classic(base_size = 15)+ylab("Eigengene\nAge adjusted")+ facet_wrap(.~modules,nrow = 2, ncol=6, scales = "free_x")+ ggpubr::stat_cor(label.x.npc = 0.1, label.y.npc = 0.99, size=3.5, geom = "label")+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), axis.text.y = element_text(size = 15), strip.text = element_text(size=17), legend.position="top", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold", size=20), plot.margin = unit(c(0,0,0,0),"cm"))+ scale_y_continuous(expand = expansion(mult = c(0, .2)))+xlab("4F dosage (months)")+scale_color_manual(values = c("#00688B", "#B22222")) pdf(file = "N55 experiment.pdf", width = 15, height = 6) p1 dev.off() # associations results targetModules2 <- colnames(N55MEs) asssociationResults <- lapply(1:length(targetModules2), function(z){ Results <- as.data.frame(t(sapply(1:length(N55samples), function(y){ samp <- N55samples[[y]] %>% filter(!is.na(DurationTreatment)) gdatN55 <- N55MEs %>% tibble::rownames_to_column(var = "Basename") %>% filter(Basename%in%samp$Basename)%>% right_join(samp, by="Basename") %>% dplyr::select(targetModules2, Tissue, Condition, Age, Female, DurationTreatment, dosage) %>% gather(targetModules2, key = "modules", value = "eigengene") %>%mutate(modules = factor(modules, levels = targetModules2)) %>% filter(modules==targetModules2[z]) a <- summary(lm(eigengene~DurationTreatment+Age, data=gdatN55)) a <- a$coefficients[2,] }))) rownames(Results) <- names(N55samples) return(Results) }) names(asssociationResults) <- targetModules2 asssociationResults <- rbindlist(asssociationResults, idcol = "modules") plots <- lapply(1:length(N55samples), function(y){ tit <- names(N55samples)[y] gdatN55 <- N55MEs %>% tibble::rownames_to_column(var = "Basename") %>% filter(Basename%in%N55samples[[y]]$Basename)%>% right_join(N55samples[[y]], by="Basename") %>% dplyr::select(targetModules, Tissue, Condition, Age, Female) %>% gather(targetModules, key = "modules", value = "eigengene") %>% mutate(modules = factor(modules, levels = targetModules)) p1 <- gdatN55 %>% ggplot(aes(x=Condition, y=eigengene, fill=Condition))+geom_boxplot(notch = T)+ theme_classic(base_size = 15)+ylab("Eigengene")+ scale_fill_manual(values = c("maroon", "darkgreen"))+ facet_wrap(.~Tissue+modules,ncol = 5, scales = "free")+ ggpubr::stat_compare_means(label = "p.signif", method = "t.test", ref.group = "Control", size=6, label.y.npc=0.9, color="red")+ggtitle( paste("Experiment N55", tit, sep = " "))+ theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1), strip.text = element_text(size=15), legend.position="none", legend.key.size = unit(0.3, "cm"), plot.title = element_text(hjust = 0.5, face="bold"), plot.margin = unit(c(1,1,1,1),"cm")) pdf(file = paste("Experiment N55", tit, ".pdf", sep = " "), width = 13, height = 20) print(p1) dev.off() return(p1) }) ``` ## Overlap with EWAS age and lifespan ```{r EWAS results} EWAS_Age <- read.delim("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Ake/UniversalClock/EWAS/final/EWASresult/Metal_pgm5_combine_all_species_tissue_stouffer_step2_1.HG38.txt.gz") %>% dplyr::select("CpG", "Meta.Z") %>% dplyr::rename(CGid = CpG, EWAS_Age = Meta.Z) EWAS_Age_Blood <- read.csv("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Ake/UniversalClock/EWAS/final/EWASresult/Metal_pgm6_combine_all_species_Blood_tissue_stouffer_1.HG38.csv.gz") %>% dplyr::select("CpG", "Meta.Z") %>% dplyr::rename(CGid = CpG, EWAS_Age_blood = Meta.Z) files <- list.files("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Ake/UniversalClock/EWAS/final/EWASresult/") files <- files[grep(".gz$", files)] targetFiles <- c("all_species_tissue", "Blood","Brain","Cortex", "Liver", "Skin", "Muscle") targets <- c("all", "blood","brain","cortex", "liver", "skin", "muscle") all <- read.delim("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Ake/UniversalClock/EWAS/final/EWASresult/Metal_pgm5_combine_all_species_tissue_stouffer_step2_1.HG38.txt.gz") targetList <- lapply(2:length(targetFiles), function(x){ i <- grep(targetFiles[x], files) r <- read.csv(file = paste("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Ake/UniversalClock/EWAS/final/EWASresult/", files[i], sep = "")) }) names(targetList) <- targets[2:7] targetList$all <- all targetList <- targetList[c(7,1:6)] EWAS_Age <- plyr::join_all(lapply(1:length(targetList), function(x){ nam <- paste("EWAS_age", names(targetList)[x], sep = "_") a <- targetList[[x]] %>% dplyr::select("CpG", "Meta.Z") %>% setnames(new= c("CGid", paste(nam))) })) ``` ```{r EWAS max age} ### Use Pvalue and Correlations to get Z values: ## NOTE: change mypath accordingly to your path, and if it's Eutherian path, change them to Eutherian path generic_association = readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allEWAS_correlation.RDS")# corrs file myanalysis = "lifespan" ## if this is phylo results, please change to "phylo", # in the next step, getStouffer function will summarise the columns to those column tissues accordingly ### convert correlation to Z stacked.p = readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allEWAS_pvalue.RDS") # pvalue file generic_association = sign(generic_association) * (- qnorm(stacked.p/2)) # If you want to summarise other columns, e.g. no logmaxAgeCaesar, but just maxAgeCaesar, or only a few tissues, # you can change mycols = c(...) accordingly # Gneeric Tissue columns, (in order): All, blood, skin, liver, brain, muscle. Phylo Tissues: All, Blood, skin, Liver, Brain, Muscle if(myanalysis == "generic") { mycols = c(1, 8, 15, 22, 25, 28) } else { mycols = c(1, 4, 7, 12, 15, 18) } getStouffer <- function(mydf, myweights = NA) { if(!length(myweights) == ncol(mydf)) { stop("Number of weights do not match number of columns") } return(apply(mydf, 1, function(x) return(sum(x * myweights) / sqrt(sum(myweights^2))))) } ns = sapply(strsplit(colnames(generic_association)[mycols], "\\.N"), "[", c(2)) ns = as.numeric(ns)[-1] tissuesStouffer = getStouffer(generic_association[, mycols[-1]], myweights = sqrt(ns)) #stacked = cbind(stacked, tissuesStouffer); colnames(stacked)[ncol(stacked)] = "tissuesStouffer" generic_association <- as.data.frame(generic_association) generic_association$Meta <- tissuesStouffer ## Code produces a new column named "tissuesStouffer" #### Log <- grep("(Log)", colnames(generic_association)) OrderALL <- grep("(OrderALL)|(OrderAll)", colnames(generic_association)) Log <- Log[Log%in%OrderALL] targets <- c("ALL","Blood", "Brain","Liver","Skin","Meta") i <- sapply(targets, function(x){ if(x=="Meta"){ a <- grep("Meta", colnames(generic_association)) } else { a <- Log[which(Log %in% grep(paste("Tissue", x, sep = ""), colnames(generic_association)))] } return(a) }) EWAS_maxAge <- generic_association[,i] %>% setnames(new = paste("EWAS_lifespan", tolower(targets), sep = "_")) %>% tibble::rownames_to_column(var = "CGid") rm(generic_association, stacked.p) ``` ```{r EWAS max age adjW} ## GenericWeightAgj#### ### Use Pvalue and Correlations to get Z values: ## NOTE: change mypath accordingly to your path, and if it's Eutherian path, change them to Eutherian path lifespan_AdjWeight_association = readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allEWAS_weightAdjusted_correlation.RDS")# corrs file myanalysis = "lifespan_Wadj" ## if this is phylo results, please change to "phylo", # in the next step, getStouffer function will summarise the columns to those column tissues accordingly ### convert correlation to Z stacked.p = readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allEWAS_weightAdjusted_pvalue.RDS") # pvalue file lifespan_AdjWeight_association = sign(lifespan_AdjWeight_association) * (- qnorm(stacked.p/2)) # If you want to summarise other columns, e.g. no logmaxAgeCaesar, but just maxAgeCaesar, or only a few tissues, # you can change mycols = c(...) accordingly # Gneeric Tissue columns, (in order): All, blood, skin, liver, brain, muscle. Phylo Tissues: All, Blood, skin, Liver, Brain, Muscle if(myanalysis == "lifespan_Wadj") { mycols = c(1, 8, 15, 22, 25, 28) } else { mycols = c(1, 4, 7, 12, 15, 18) } getStouffer <- function(mydf, myweights = NA) { if(!length(myweights) == ncol(mydf)) { stop("Number of weights do not match number of columns") } return(apply(mydf, 1, function(x) return(sum(x * myweights) / sqrt(sum(myweights^2))))) } ns = sapply(strsplit(colnames(lifespan_AdjWeight_association)[mycols], "\\.N"), "[", c(2)) ns = as.numeric(ns)[-1] tissuesStouffer = getStouffer(lifespan_AdjWeight_association[, mycols[-1]], myweights = sqrt(ns)) #stacked = cbind(stacked, tissuesStouffer); colnames(stacked)[ncol(stacked)] = "tissuesStouffer" lifespan_AdjWeight_association <- as.data.frame(lifespan_AdjWeight_association) lifespan_AdjWeight_association$`Meta analysis` <- tissuesStouffer Log <- grep("(Log)", colnames(lifespan_AdjWeight_association)) OrderALL <- grep("(OrderALL)|(OrderAll)", colnames(lifespan_AdjWeight_association)) Log <- Log[Log%in%OrderALL] targetTissue <- paste("lifespan_AdjWeight", targets, sep = "_") i <- sapply(targets, function(x){ if(x=="Meta"){ a <- grep("Meta", colnames(lifespan_AdjWeight_association)) } else { a <- Log[which(Log %in% grep(paste("Tissue", x, sep = ""), colnames(lifespan_AdjWeight_association)))] } return(a) }) EWAS_maxAge_AdjWeight <- lifespan_AdjWeight_association[,i] %>% setnames(new = paste("EWAS_lifespanAdjW", tolower(targets), sep = "_")) %>% tibble::rownames_to_column(var = "CGid") rm(lifespan_AdjWeight_association, stacked.p) ``` ```{r phylo} ### Use Pvalue and Correlations to get Z values: ## NOTE: change mypath accordingly to your path, and if it's Eutherian path, change them to Eutherian path phylogenetic_association <- readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allPhyloEWAS_zvalue.RDS") # corrs file myanalysis = "phylo" ## if this is phylo results, please change to "phylo", # in the next step, getStouffer function will summarise the columns to those column tissues accordingly # If you want to summarise other columns, e.g. no logmaxAgeCaesar, but just maxAgeCaesar, or only a few tissues, # you can change mycols = c(...) accordingly # Gneeric Tissue columns, (in order): All, blood, skin, liver, brain, muscle. Phylo Tissues: All, Blood, skin, Liver, Brain, Muscle if(myanalysis == "generic") { mycols = c(1, 8, 15, 22, 25, 28) } else { mycols = c(1, 4, 7, 12, 15, 18) } getStouffer <- function(mydf, myweights = NA) { if(!length(myweights) == ncol(mydf)) { stop("Number of weights do not match number of columns") } return(apply(mydf, 1, function(x) return(sum(x * myweights) / sqrt(sum(myweights^2))))) } ns = sapply(strsplit(colnames(phylogenetic_association)[mycols], "\\.N"), "[", c(2)) ns = as.numeric(ns)[-1] tissuesStouffer = getStouffer(phylogenetic_association[, mycols[-1]], myweights = sqrt(ns)) #stacked = cbind(stacked, tissuesStouffer); colnames(stacked)[ncol(stacked)] = "tissuesStouffer" phylogenetic_association <- as.data.frame(phylogenetic_association) phylogenetic_association$Meta <- tissuesStouffer ## Code produces a new column named "tissuesStouffer" #### Log <- grep("(Log)", colnames(phylogenetic_association)) OrderALL <- grep("(OrderALL)|(OrderAll)", colnames(phylogenetic_association)) Log <- Log[Log%in%OrderALL] i <- sapply(targets, function(x){ if(x=="Meta"){ a <- grep("Meta", colnames(phylogenetic_association)) } else { a <- Log[which(Log %in% grep(paste("Tissue", x, sep = ""), colnames(phylogenetic_association)))] } return(a) }) EWAS_maxAgePhylo <- phylogenetic_association[,i] %>% setnames(new = paste("EWAS_lifespanPhylo", tolower(targets), sep = "_")) %>% tibble::rownames_to_column(var = "CGid") rm(phylogenetic_association) ``` ```{r} matchedNetworks <- readRDS("all networks with matched colors.RDS") %>% dplyr::select(CGid, net1)%>% left_join(EWAS_maxAge)%>% left_join(EWAS_maxAge_AdjWeight) %>% left_join(EWAS_maxAgePhylo)%>% gather(-CGid, -net1, key="EWAS", value="z") %>% mutate(z=as.numeric(z))%>% mutate(id=paste(EWAS, net1)) %>% mutate(net1=as.factor(net1)) sum1 <- matchedNetworks %>% group_by(net1, EWAS) %>% summarize(mean=mean(z), median=median(z))%>%filter(abs(median)>2) %>% group_by(net1) %>% add_count() %>% filter(n>5)%>% mutate(id=paste(EWAS, net1)) sum2 <- matchedNetworks %>% filter(!net1%in%sum1$net1)%>% group_by(net1, EWAS) %>% summarize(mean=mean(z), median=median(z)) %>% group_by(EWAS)%>%top_n(5, abs(median)) %>% mutate(id=paste(EWAS, net1)) sum1 <- matchedNetworks %>% group_by(net1, EWAS) %>% summarize(mean=mean(z), median=median(z)) %>% mutate(dir= sign(median)) %>%filter(dir==1)%>% group_by(EWAS)%>%top_n(5, abs(median)) %>% mutate(id=paste(EWAS, net1)) sum2 <- matchedNetworks %>% group_by(net1, EWAS) %>% summarize(mean=mean(z), median=median(z)) %>% mutate(dir= sign(median)) %>%filter(dir== -1)%>% group_by(EWAS)%>%top_n(2, abs(median)) %>% mutate(id=paste(EWAS, net1)) sum <- rbind(sum1, sum2) a <- matchedNetworks%>% filter(id%in%sum$id) %>% mutate(net1=factor(net1, levels=unique(net1)[order(abs(z))])) p <- a%>% ggplot(aes(x=net1, y=z, fill=net1))+geom_boxplot(notch = T)+scale_fill_manual(values=levels(a$net1))+theme_classic(base_size = 15)+facet_wrap(~EWAS, nrow =5 , ncol = 4, scales = "free", )+theme(axis.text.x = element_text(angle=45, hjust = 1), legend.position = "none", plot.title = element_text(size=20, face="bold", hjust=0.5))+geom_hline(yintercept = c(2,-2), linetype="dashed", color="red")+ggtitle("Top 7 modules (5 positive, 2 negative) in each EWAS model")+ylab("Z score of association")+xlab("") ggsave("EWAS results.pdf",p, width = 12, height = 14) ``` ## EWAS of maximum lifespan ```{r module enrichment} source("..//Codes, and protocols/ChromHMM enrichment/ChromHMM enrichment Source Code.R") EWAS <- readRDS("..//Collaborative projects/Caesar analysis/Figure for the paper/caesarResults.RDS") moduleCpGs <- readRDS("all networks with matched colors.RDS") input <- EWAS$ALL$targetTop[1:2] N55Hits <- read.csv("N55 EWAS dosage top hits Adj Age Sex.csv") %>% filter(Tissue%in%c("Skin", "Kidney")) %>% dplyr::select(CGid, Tissue, EWAS_z) %>% left_join(geneMap) library(GeneOverlap) c <- newGeneOverlap(input[[1]]$SYMBOL, input[[2]]$SYMBOL, genome.size = length(unique(geneMap$SYMBOL))) go.obj <- GeneOverlap::testGeneOverlap(c) print(go.obj) geneM <- geneMap %>% filter(CGid%in%moduleCpGs$CGid) moduleEnrichLifespan <- moduleEnrichment.Amin(input, geneM) moduleEnrichLifespan1 <- moduleEnrichLifespan %>% mutate(FDR=p.adjust(pval, method = "fdr")) %>% filter(FDR<0.001)%>% mutate(group=factor(group, levels = c("Lifespan ALL", "Lifespan (AdjWeight) ALL"))) moduleEnrichLifespan2 <- moduleEnrichLifespan%>% mutate(FDR=p.adjust(pval, method = "fdr")) %>% mutate(group=factor(group, levels = c("Lifespan ALL", "Lifespan (AdjWeight) ALL"), labels=c("Lifespan", "LifespanAdjW"))) %>% filter(mammlianModules%in%moduleEnrichLifespan1$mammlianModules)%>% mutate(mammlianModules=factor(mammlianModules, levels = unique(mammlianModules)[order(-pval)])) p <- ggplot(moduleEnrichLifespan2, aes(x=-log10(FDR), y=mammlianModules, fill=mammlianModules))+geom_bar(stat="identity")+facet_wrap(~group)+theme_classic(base_size = 13)+theme(legend.position = "none", plot.title = element_text(face="bold", size=15))+scale_fill_manual(values = levels(moduleEnrichLifespan2$mammlianModules))+ggtitle("Module enrichment\n(FDR<0.001)") ggsave("module enrichemnt lifespan.pdf", p, height = 5, width = 3.5) moduleEnrichLifespan3 <- moduleEnrichLifespan2 %>% filter(group=="Lifespan"&FDR<0.01) %>% droplevels() p2 <- moduleEnrichLifespan3 %>% ggplot(aes(x=-log10(FDR), y=mammlianModules, fill=mammlianModules))+geom_bar(stat="identity")+theme_classic(22)+theme(legend.position = "none", plot.title = element_text(face="bold", size=18))+scale_fill_manual(values = levels(moduleEnrichLifespan3$mammlianModules))+ylab("co-methylation modules") ggsave("module enrichemnt lifespan.pdf", p2, height = 4, width = 4) ``` ```{r manhattan plot} targetModules <- gsub("ME", "", maxLifeModulesMarginal) maxAgeModules <- readRDS("all networks with matched colors.RDS") %>% dplyr::select(CGid, net1) mappability3 <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mappability file. Eutherians and Marsupials.RDS") %>% dplyr::select(CGid, Mouse) mappability4 <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mouse.mm9.Amin.RDS")%>% left_join(mappability3) %>% filter(!is.na(CGstart)&Mouse>=0.8) geneM <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Human.Homo_sapiens.hg19.Amin.V9.RDS") %>% filter(CGid %in% mappability4$CGid) geneM$seqnames <- as.factor(geneM$seqnames) chr <- levels(geneM$seqnames)[c(1,12,16:22,2:11,13:15, 23:24)] geneM$seqnames <- factor(geneM$seqnames, levels = chr, labels = gsub("chr", "", chr)) EWAS <- readRDS("..//Collaborative projects/Caesar analysis/Figure for the paper/caesarResults.RDS") input <- EWAS$ALL$EWAS[1:2] plots <- lapply(1:2, function(x){ label <- "SYMBOL"; highlightTr = 5.756454; IDname="CGid"; annotateTr = 5.756454; annotateTop = 15; maxTextOverlap = 15; stripSize = 20; labelSize = 4 if(x==1){tit="EWAS of Lifespan"} else{tit="EWAS of Lifespan (adjWeight)"} mod <- moduleEnrichLifespan%>% filter(group==names(input)[x]) manh <- geneM %>% dplyr::select(CGid, "seqnames", "CGstart", "GeneRegionID", paste(label)) %>% left_join(input[[x]][,1:3], by="CGid")%>% setNames(c(names(.)[1:5],"Beta", "P")) %>% dplyr::rename(CHR = seqnames, BP=CGstart) %>% filter(complete.cases(.)) %>% droplevels() %>% mutate(BP = as.numeric(BP)) %>% mutate(ID = paste(IDname, GeneRegionID, sep = " : ")) manh <- manh %>% # Compute chromosome size group_by(CHR) %>% summarise(chr_len=max(BP)) %>% # Calculate cumulative position of each chromosome mutate(tot=cumsum(chr_len)-chr_len) %>% dplyr::select(-chr_len) %>% # Add this info to the initial dataset dplyr::left_join(manh, ., by=c("CHR"="CHR")) %>% # Add a cumulative position of each SNP arrange(CHR, BP) %>% mutate( BPcum=BP+tot) %>% # Add highlight and annotation information mutate(is_highlight_positive=ifelse(-log10(P)>highlightTr&Beta>0, "yes", "no")) %>% mutate(is_highlight_negative=ifelse(-log10(P)>highlightTr&Beta<0, "yes", "no")) %>% mutate( is_annotate=ifelse(-log10(P)>annotateTr, "yes", "no")) %>% mutate(Title = "All") annot <- manh %>% filter(is_annotate=="yes") %>% group_by(sign(Beta)) %>% top_n(annotateTop, -P) %>% dplyr::select(paste(IDname), "is_annotate") manh <- manh %>% dplyr::select(-is_annotate) %>% left_join(annot, by=paste(IDname)) axisdf = manh %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) ) / 2 ) nChr <- floor(length(unique(manh$CHR))/2)+2 manh <- manh %>% mutate(P=-log10(P)*sign(Beta)) %>% left_join(maxAgeModules) # plot p1 <- ggplot(manh, aes(x=BPcum, y=P)) + # Show all points ggrastr::geom_point_rast(aes(color=as.factor(CHR)), alpha=0.8, size=1.3)+ geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) + scale_color_manual(values = rep(c("grey", "darkgrey"), nChr )) + # custom X axis: scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) + scale_y_continuous(expand = c(0, 0) ) + # remove space between plot area and x axis geom_text_repel( data=subset(manh, is_annotate=="yes"), aes(label=get(paste(label))), size=labelSize, max.overlaps = maxTextOverlap) + # Custom the theme: theme_bw() + theme( legend.position="none", #panel.border = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), strip.text = element_text(size=stripSize), strip.background = element_rect(fill="white"), plot.title = element_text(face="bold", hjust=0.5, size=stripSize) )+ xlab("Chromosome")+ geom_hline(yintercept = c(highlightTr, -highlightTr),linetype="dashed", color = "blue")+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle(tit)+ylab("signed log10(pvalue)")+scale_y_continuous(expand = expansion(mult = c(0.1, 0.1))) # Add highlighted points for(i in 1:length(mod$mammlianModules)){ p1 <- p1+ggrastr::geom_point_rast(data=subset(manh, net1==mod$mammlianModules[i]&abs(P)>highlightTr), color=mod$mammlianModules[i], size=2) } return(p1) }) p <- ggpubr::ggarrange(plotlist = plots, nrow=1) ggsave("manhattan.pdf",p,width = 10, height = 4.5) # plots <- lapply(1, function(x){ label <- "SYMBOL"; highlightTr = 5.756454; IDname="CGid"; annotateTr = 5.756454; annotateTop = 1; maxTextOverlap = 6; stripSize = 20; labelSize = 7 if(x==1){tit="EWAS of Lifespan"} else{tit="EWAS of Lifespan (adjWeight)"} mod <- moduleEnrichLifespan%>% filter(group==names(input)[x]) manh <- geneM %>% dplyr::select(CGid, "seqnames", "CGstart", "GeneRegionID", paste(label)) %>% left_join(input[[x]][,1:3], by="CGid")%>% setNames(c(names(.)[1:5],"Beta", "P")) %>% dplyr::rename(CHR = seqnames, BP=CGstart) %>% filter(complete.cases(.)) %>% droplevels() %>% mutate(BP = as.numeric(BP)) %>% mutate(ID = paste(IDname, GeneRegionID, sep = " : ")) manh <- manh %>% # Compute chromosome size group_by(CHR) %>% summarise(chr_len=max(BP)) %>% # Calculate cumulative position of each chromosome mutate(tot=cumsum(chr_len)-chr_len) %>% dplyr::select(-chr_len) %>% # Add this info to the initial dataset dplyr::left_join(manh, ., by=c("CHR"="CHR")) %>% # Add a cumulative position of each SNP arrange(CHR, BP) %>% mutate( BPcum=BP+tot) %>% # Add highlight and annotation information mutate(is_highlight_positive=ifelse(-log10(P)>highlightTr&Beta>0, "yes", "no")) %>% mutate(is_highlight_negative=ifelse(-log10(P)>highlightTr&Beta<0, "yes", "no")) %>% mutate( is_annotate=ifelse(-log10(P)>annotateTr, "yes", "no")) %>% mutate(Title = "All") xlabels <- as.character(sort(unique(manh$CHR))) xlabels[seq(2, length(xlabels), 2)] <- "" annot <- manh %>% filter(is_annotate=="yes") %>% group_by(sign(Beta)) %>% top_n(annotateTop, -P) %>% dplyr::select(paste(IDname), "is_annotate") manh <- manh %>% dplyr::select(-is_annotate) %>% left_join(annot, by=paste(IDname)) axisdf = manh %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) ) / 2 ) nChr <- floor(length(unique(manh$CHR))/2)+2 manh <- manh %>% mutate(P=-log10(P)*sign(Beta)) %>% left_join(maxAgeModules) # plot p1 <- ggplot(manh, aes(x=BPcum, y=P)) + # Show all points ggrastr::geom_point_rast(aes(color=as.factor(CHR)), alpha=0.8, size=1.3)+ ggrastr::geom_point_rast( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) + scale_color_manual(values = rep(c("grey", "darkgrey"), nChr )) + # custom X axis: scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) + scale_y_continuous(expand = c(0, 0) ) + # remove space between plot area and x axis geom_label_repel( data=subset(manh, is_annotate=="yes"), aes(label=get(paste(label))), size=labelSize, max.overlaps = maxTextOverlap, alpha=0.8, color="blue") + # Custom the theme: theme_bw(base_size = 20) + theme( legend.position="none", #panel.border = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), strip.text = element_text(size=stripSize), strip.background = element_rect(fill="white"), plot.title = element_text(face="bold", hjust=0.5, size=stripSize) )+ xlab("Chromosome")+ geom_hline(yintercept = c(highlightTr, -highlightTr),linetype="dashed", color = "blue")+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle(tit)+ylab("signed log10(pvalue)")+scale_y_continuous(expand = expansion(mult = c(0.1, 0.1)))+scale_x_discrete(labels = xlabels) # Add highlighted points for(i in 1:length(mod$mammlianModules)){ p1 <- p1+ggrastr::geom_point_rast(data=subset(manh, net1==mod$mammlianModules[i]&abs(P)>highlightTr), color=mod$mammlianModules[i], size=2) } return(p1) }) ggsave("manhattan2.pdf",plots[[1]],width = 5, height = 4.5) ``` ```{r volcano plot for lifespan overlap with Gtex} input <- rbindlist(EWAS$ALL$targetTop[1:2], idcol = "EWAS") %>% group_by(EWAS) %>% summarize(SYMBOL=unique(SYMBOL)) rmCor <- readRDS("..//Transcriptome studies/rmCor summary.RDS") %>% filter(SYMBOL%in%input$SYMBOL) volcPlot <- ggplot(rmCor, aes(y=-log10(rmP), x=rmCor))+ggrastr::geom_point_rast(color="grey", alpha=0.5)+geom_text_repel(data = subset(rmCor, rmP<0.005),aes(label=SYMBOL), max.overlaps = 45, size=4)+theme_classic(base_size = 15)+ggrastr::geom_point_rast(data = subset(rmCor, rmP<0.005&rmCor>0), color="red")+ggrastr::geom_point_rast(data = subset(rmCor, rmP<0.005&rmCor<0), color="blue")+ggtitle("Mammalian lifespan genes\nin Human GTEx")+xlab("Age related rmCor")+theme(plot.title = element_text(hjust=0.5, face="bold")) ggsave("rmCor volcanot plot for lifespan.pdf", volcPlot, width = 5, height = 5) ``` ```{r venn diagram } input <- rbindlist(EWAS$ALL$targetTop[1:2], idcol = "model") %>% group_by(model) %>% summarize(SYMBOL=unique(SYMBOL)) rmCor <- readRDS("..//Transcriptome studies/rmCor summary.RDS") %>% filter(SYMBOL%in%geneMap$SYMBOL) %>% mutate(dir=sign(rmCor)) %>% group_by(dir) %>% top_n(500, -rmP) %>% mutate(model="rmCor") rmCor <- readRDS("..//Transcriptome studies/rmCor summary.RDS") %>% filter(rmP<1e-50) %>% mutate(dir=sign(rmCor)) %>% top_n(1000, -rmP) %>% mutate(model="rmCor") input <- bind_rows(input, rmCor) %>% dplyr::select(model, SYMBOL) %>% mutate(group=1)%>% spread(key = "model", value = "group") %>% mutate(across(everything(), ~ifelse(is.na(.), 0, .))) pdf("venn diagram, lifespan gtex.pdf", width = 6, height = 5.5) limma::vennDiagram(input[,c(2:4)], include=c("both"), counts.col=c("maroon"), cex = 1, show.include = FALSE, ) dev.off() input<- plyr::llply(EWAS$ALL$targetTop[1:2], function(x){x <-x %>% summarize(SYMBOL=unique(SYMBOL))}) geneM <- geneMap %>% filter(!duplicated(SYMBOL)) library(GeneOverlap) c <- newGeneOverlap(input[[1]]$SYMBOL, rmCor$SYMBOL, genome.size = length(unique(geneMap$SYMBOL))) go.obj <- GeneOverlap::testGeneOverlap(c) print(go.obj) sel <- readRDS("..//Transcriptome studies/rmCor summary.RDS") %>% filter(SYMBOL%in%geneMap$SYMBOL) %>% mutate(dir=sign(rmCor)) %>% group_by(dir) %>% top_n(500, -rmP) %>% mutate(model="rmCor")%>% filter(SYMBOL%in%input$`Lifespan ALL`$SYMBOL&SYMBOL%in%input$`Lifespan (AdjWeight) ALL`$SYMBOL) %>% arrange(rmP) sel <- readRDS("..//Transcriptome studies/rmCor summary.RDS") %>% filter(rmP<1e-50) %>% mutate(model="rmCor")%>% filter(!SYMBOL%in%input$`Lifespan ALL`$SYMBOL&SYMBOL%in%input$`Lifespan (AdjWeight) ALL`$SYMBOL) %>% arrange(rmP) paste(sel$SYMBOL[which(sel$rmCor>0)], collapse = ", ") ``` ```{r overlap N55} input <- EWAS$ALL$targetTop[1]$`Lifespan ALL` %>% filter(!duplicated(SYMBOL)) N55Hits <- read.csv("../Collaborative projects/Individual mammalian projects/N55 and N84 rejuvenation/N55 EWAS dosage top hits Adj Age Sex.csv") %>% filter(Tissue%in%c("Skin", "Kidney"))%>% dplyr::select(CGid, Tissue) %>% left_join(geneM) %>% group_by(Tissue) %>% filter(!duplicated(SYMBOL)) library(GeneOverlap) c <- newGeneOverlap(input$SYMBOL, N55Hits$SYMBOL[which(N55Hits$Tissue=="Skin")], genome.size = length(unique(geneM$SYMBOL))) go.obj <- GeneOverlap::testGeneOverlap(c) print(go.obj) input <- rbindlist(EWAS$ALL$targetTop[1], idcol = "Tissue") %>% dplyr::select(Tissue, SYMBOL) %>% distinct() input <- bind_rows(input, N55Hits) %>% dplyr::select(Tissue, SYMBOL) %>% distinct()%>% mutate(group=1)%>% spread(key = "Tissue", value = "group") %>% mutate(across(everything(), ~ifelse(is.na(.), 0, .))) pdf("venn diagram, pverlap yamanaka factors.pdf", width = 6, height = 5.5) limma::vennDiagram(input[,c(2,4,3)], include=c("both"), counts.col=c("maroon"), cex = 1, show.include = FALSE, ) dev.off() sel <- input[which(rowSums(input[,-1])==3),] %>% left_join(EWAS$ALL$targetTop[1]$`Lifespan ALL`) %>% group_by(SYMBOL) %>% top_n(1, abs(EWAS_z)) %>% arrange(EWAS_z) paste(sel$SYMBOL[which(sel$EWAS_z<0)], collapse = ", ") ``` ```{r overlap with OSKM} input1 <- rbindlist(EWAS$ALL$targetTop[1], idcol = "Tissue") %>% dplyr::select(Tissue, SYMBOL) %>% distinct() input <- bind_rows(input1, N55Hits) %>% dplyr::select(Tissue, SYMBOL) %>% distinct() %>% filter(Tissue!="Kidney") load("OSKM_top_genes, Qi.RData") OSKM <- data.frame(SYMBOL=gene_ewas_meta, Tissue="human_meta") %>% bind_rows(data.frame(SYMBOL=gene_ewas_k16, Tissue="human_K16")) input <- bind_rows(input, OSKM)%>% mutate(group=1)%>% spread(key = "Tissue", value = "group") %>% mutate(across(everything(), ~ifelse(is.na(.), 0, .))) pdf("venn diagram, overlap yamanaka factors, human mouse.pdf", width = 6, height = 5.5) limma::vennDiagram(input[,c(3,4,5)], include=c("both"), counts.col=c("maroon"), cex = 1, show.include = FALSE, ) dev.off() library(GeneOverlap) c <- newGeneOverlap(input1$SYMBOL, OSKM$SYMBOL[which(OSKM$Tissue=="human_meta")]) go.obj <- GeneOverlap::testGeneOverlap(c) print(go.obj) sel <- input[which(rowSums(input[,c(-1,-2)])==3),] %>% left_join(EWAS$ALL$targetTop[1]$`Lifespan ALL`) %>% group_by(SYMBOL) %>% top_n(1, abs(EWAS_z)) %>% arrange(EWAS_z) paste(sel$SYMBOL[which(sel$EWAS_z>0)], collapse = ", ") ``` ```{r heatmap N55} input <- EWAS$ALL$targetTop[1]$`Lifespan ALL` %>% filter(EWAS_z<0) %>% filter(!duplicated(SYMBOL)) N55Hits <- read.csv("N55 EWAS dosage top hits Adj Age Sex.csv") %>% filter(Tissue%in%c("Skin")) %>% dplyr::select(CGid, Tissue, EWAS_z) %>% left_join(geneM) %>% group_by(Tissue, SYMBOL) %>% top_n(1, abs(EWAS_z)) %>% filter(!is.na(SYMBOL)&SYMBOL%in%sel$SYMBOL) N55samples <- read.csv("SampleSheetAgeN55final.csv") %>% dplyr::rename(groups=Group, dosage=DurationTreatment) %>% filter(CanBeUsedForAgingStudies=="yes"&!is.na(dosage)&Tissue=="Skin") miceadds::load.Rdata("NormalizedData/all_probes_sesame_normalized.Rdata", objname = "N55data") N55data <-N55data%>% filter(CGid %in%N55Hits$CGid) %>% gather(-CGid, key = "Basename", value = "bval") %>% right_join(N55samples)%>% mutate(bval=residuals(lm(bval~Age))) %>% group_by(dosage,CGid) %>% summarize(bval=mean(bval))%>% ungroup() %>% distinct() %>% spread( key = "dosage", value = "bval")%>% left_join(dplyr::select(.data=geneM, CGid, GeneRegionID)) %>% dplyr::select(-CGid) %>% mutate(GeneRegionID=gsub("Intergenic_", "", GeneRegionID)) %>% tibble::column_to_rownames("GeneRegionID") N55data <- as.data.frame(t(apply(N55data, 1, scale))) colnames(N55data) <- c(0,1,7,10) colPalette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[60:231] Heatmap <- Heatmap(N55data, col = colPalette, show_column_names = TRUE, name = "Scaled\nmean DNAm", show_row_names = TRUE, column_dend_reorder = F, cluster_columns = F, cluster_rows = TRUE, row_dend_reorder=TRUE, clustering_distance_rows="pearson", clustering_distance_columns="pearson",clustering_method_rows = "average",clustering_method_columns = "average", row_title_gp=gpar(fontsize=10, fontface="bold", fill="#FFCCE5", vjust=0.2), border = TRUE, row_names_gp = gpar(fontsize = 8)) pdf(file = "Heatmap N55 overlap with lifespan, mouse.pdf", width = 10, height = 10) draw(Heatmap, heatmap_legend_side = "right", annotation_legend_side="right", padding = unit(c(2, 10, 10, 10), "mm"), column_title="DNAm change by 4F duration in mouse skin", column_title_gp=gpar(fontsize=13, fontface="bold")) dev.off() # input <- EWAS$ALL$targetTop[1]$`Lifespan ALL` %>% filter(EWAS_z>0) %>% filter(!duplicated(SYMBOL)) N55Hits <- read.csv("N55 EWAS dosage top hits Adj Age Sex.csv") %>% filter(Tissue%in%c("Skin")) %>% dplyr::select(CGid, Tissue, EWAS_z) %>% left_join(geneM) %>% group_by(Tissue, SYMBOL) %>% top_n(1, abs(EWAS_z)) %>% filter(!is.na(SYMBOL)&SYMBOL%in%input$SYMBOL) N55samples <- read.csv("SampleSheetAgeN55final.csv") %>% dplyr::rename(groups=Group, dosage=DurationTreatment) %>% filter(CanBeUsedForAgingStudies=="yes"&!is.na(dosage)&Tissue=="Skin") miceadds::load.Rdata("NormalizedData/all_probes_sesame_normalized.Rdata", objname = "N55data") N55data <-N55data%>% filter(CGid %in%N55Hits$CGid) %>% gather(-CGid, key = "Basename", value = "bval") %>% right_join(N55samples)%>% mutate(bval=residuals(lm(bval~Age))) %>% group_by(dosage,CGid) %>% summarize(bval=mean(bval))%>% ungroup() %>% distinct() %>% spread( key = "dosage", value = "bval")%>% left_join(dplyr::select(.data=geneM, CGid, GeneRegionID)) %>% dplyr::select(-CGid) %>% mutate(GeneRegionID=gsub("Intergenic_", "", GeneRegionID)) %>% tibble::column_to_rownames("GeneRegionID") N55data <- as.data.frame(t(apply(N55data, 1, scale))) colnames(N55data) <- c(0,1,7,10) colPalette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[60:231] Heatmap2 <- Heatmap(N55data, col = colPalette, show_column_names = TRUE, name = "Scaled\nmean DNAm", show_row_names = TRUE, column_dend_reorder = F, cluster_columns = F, cluster_rows = TRUE, row_dend_reorder=TRUE, clustering_distance_rows="pearson", clustering_distance_columns="pearson",clustering_method_rows = "average",clustering_method_columns = "average", row_title_gp=gpar(fontsize=10, fontface="bold", fill="#FFCCE5", vjust=0.2), border = TRUE, row_names_gp = gpar(fontsize = 8)) pdf(file = "Heatmap N55 overlap with lifespan, positive.pdf", width = 5, height = 12) draw(Heatmap2, heatmap_legend_side = "right", annotation_legend_side="right", padding = unit(c(2, 10, 10, 10), "mm"), column_title="DNAm change by 4F duration\nGenes with positive DNAm relation to lifespan", column_title_gp=gpar(fontsize=13, fontface="bold")) dev.off() ``` ```{r overlap MLS TWAS} geneMouse <- readRDS("~/Google Drive/My Drive/Amin documents/Steve projects/Research projects/Mammalian beadchip annotation/New geneMaps based on QuasR alignment/Latest annotation/Mouse.mm9.Amin.RDS") %>% filter(CGid %in% mappability4$CGid) geneMouse$seqnames <- as.factor(geneMouse$seqnames) chr <- levels(geneMouse$seqnames)[c(1,12,16:22,2:11,13:15, 23:24)] geneMouse$seqnames <- factor(geneMouse$seqnames, levels = chr, labels = gsub("chr", "", chr)) input <- rbindlist(EWAS$ALL$targetTop[1:2], idcol = "EWAS") %>% dplyr::select(EWAS, CGid, EWAS_z) %>% left_join(geneMouse) %>% group_by(EWAS, SYMBOL) %>%top_n(1, abs(EWAS_z)) MLS.vera <- read_xlsx("MLS TWAS vera.xlsx") %>% setnames(new = c("SYMBOL", "cor", "p.adj")) %>% filter(p.adj<0.05&SYMBOL%in%geneMouse$SYMBOL) %>% top_n(370, -p.adj) MLSadjW.vera <- read_xlsx("MLS adjW TWAS vera.xlsx") %>% dplyr::select(1:3)%>% setnames(new = c("SYMBOL", "cor", "p.adj")) %>% filter(p.adj<0.05&SYMBOL%in%geneMouse$SYMBOL)%>% top_n(1000, -p.adj) library(GeneOverlap) c <- newGeneOverlap(input$SYMBOL[which(input$EWAS=="Lifespan ALL")], MLS.vera$SYMBOL, genome.size = length(unique(geneMouse$SYMBOL))) go.obj <- GeneOverlap::testGeneOverlap(c) print(go.obj) library(GeneOverlap) c <- newGeneOverlap(input$SYMBOL[which(input$EWAS=="Lifespan (AdjWeight) ALL")], MLSadjW.vera$SYMBOL, genome.size = length(unique(geneMouse$SYMBOL))) go.obj <- GeneOverlap::testGeneOverlap(c) print(go.obj) # rodentia lifespan_association = readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allEWAS_correlation.RDS") %>% as.data.frame() %>% as.matrix() stacked.p = readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allEWAS_pvalue.RDS")%>% as.data.frame() %>% as.matrix()# pvalue file lifespan_association = as.data.frame(sign(lifespan_association) * (- qnorm(stacked.p/2))) lifespan_association = lifespan_association %>% tibble::rownames_to_column("CGid") %>% gather(-CGid, key = "EWAS", value = "z") %>% mutate(dir=sign(z)) %>% group_by(EWAS,dir) %>%top_n(500, abs(z)) %>% left_join(geneMouse) %>% ungroup() %>% group_split(EWAS) names(lifespan_association) <- sapply(lifespan_association, function(x){x$EWAS[1]}) res <-rbindlist(plyr::llply(lifespan_association, function(x){ dat <-x %>% filter(!duplicated(SYMBOL)) c <- newGeneOverlap(dat$SYMBOL, MLS.vera$SYMBOL, genome.size = length(unique(geneMouse$SYMBOL))) go.obj <- GeneOverlap::testGeneOverlap(c) a <- data.frame(pval = go.obj@pval) }), idcol = "EWAS") # rodentia lifespan_association.adjw = readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allEWAS_weightAdjusted_correlation.RDS") %>% as.data.frame() %>% as.matrix() stacked.p = readRDS("~/Steve Horvath Lab Dropbox/Amin Haghani/HorvathLabCoreMembers/Caesar/ProjectEWASmaxlifespan/allEWAS_maxlifespan/Feb2022/Eutherians_allEWAS_weightAdjusted_pvalue.RDS")%>% as.data.frame() %>% as.matrix()# pvalue file lifespan_association.adjw = as.data.frame(sign(lifespan_association.adjw) * (- qnorm(stacked.p/2))) lifespan_association.adjw = lifespan_association.adjw %>% tibble::rownames_to_column("CGid") %>% gather(-CGid, key = "EWAS", value = "z") %>% mutate(dir=sign(z)) %>% group_by(EWAS,dir) %>%top_n(500, abs(z)) %>% left_join(geneMouse) %>% ungroup() %>% group_split(EWAS) names(lifespan_association.adjw) <- sapply(lifespan_association.adjw, function(x){x$EWAS[1]}) res.adjw <-rbindlist(plyr::llply(lifespan_association.adjw, function(x){ dat <-x %>% filter(!duplicated(SYMBOL)) c <- newGeneOverlap(dat$SYMBOL, MLSadjW.vera$SYMBOL, genome.size = length(unique(geneMouse$SYMBOL))) go.obj <- GeneOverlap::testGeneOverlap(c) a <- data.frame(pval = go.obj@pval) }), idcol = "EWAS") ```