https://github.com/shorvath/MammalianMethylationConsortium
Tip revision: da8c94df7c75063273be48b70db89ec5d7b6f8c8 authored by Amin Haghani on 12 August 2025, 18:58:48 UTC
Update README.md
Update README.md
Tip revision: da8c94d
2- Consensus WGCNA.Rmd
---
Author: "Amin Haghani"
Paper: "DNA Methylation Networks Underlying Mammalian Traits"
Authors: "Amin Haghani1, 2 †*; Caesar Z. Li3, 4 †; Todd R. Robeck5; Joshua Zhang1; Ake T. Lu1, 2; Julia Ablaeva6; Victoria A. Acosta-Rodríguez7; Danielle M. Adams8; Abdulaziz N. Alagaili9, 10; Javier Almunia11; Ajoy Aloysius12; Nabil M.S. Amor13; Reza Ardehali14; Adriana Arneson15, 16; C. Scott Baker17; Gareth Banks18; Katherine Belov19; Nigel C. Bennett20; Peter Black21; Daniel T. Blumstein22, 23; Eleanor K. Bors17; Charles E. Breeze24; Robert T. Brooke25; Janine L. Brown26; Gerald Carter27; Alex Caulton28, 29; Julie M. Cavin30; Lisa Chakrabarti31; Ioulia Chatzistamou32; Andreas S. Chavez27, 33; Hao Chen34; Kaiyang Cheng35; Priscila Chiavellini36; Oi-Wa Choi37, 38; Shannon Clarke28; Joseph A. Cook39; Lisa N. Cooper40; Marie-Laurence Cossette41; Joanna Day42; Joseph DeYoung37, 38; Stacy Dirocco43; Christopher Dold44; Jonathan L. Dunnum39; Erin E. Ehmke45; Candice K. Emmons46; Stephan Emmrich6; Ebru Erbay47, 48, 49; Claire Erlacher-Reid43; Chris G. Faulkes50, 51; Zhe Fei3, 52; Steven H. Ferguson53, 54; Carrie J. Finno55; Jennifer E. Flower56; Jean-Michel Gaillard57; Eva Garde58; Livia Gerber59, 60; Vadim N. Gladyshev61; Rodolfo G. Goya36; Matthew J Grant62; Carla B. Green7; M. Bradley Hanson46; Daniel W. Hart20; Martin Haulena63; Kelsey Herrick64; Andrew N. Hogan65; Carolyn J. Hogg19; Timothy A. Hore66; Taosheng Huang67; Juan Carlos Izpisua Belmonte2; Anna J. Jasinska37, 68, 69; Gareth Jones70; Eve Jourdain71; Olga Kashpur72; Harold Katcher73; Etsuko Katsumata74; Vimala Kaza75; Hippokratis Kiaris76; Michael S. Kobor77; Pawel Kordowitzki78; William R. Koski79; Michael Krützen60; Soo Bin Kwon16, 15; Brenda Larison22, 80; Sang-Goo Lee61; Marianne Lehmann36; Jean-François Lemaître57; Andrew J. Levine81; Xinmin Li82; Cun Li83, 84; Andrea R. Lim1; David T.S. Lin85; Dana M. Lindemann43; Schuyler W. Liphardt86; Thomas J. Little87; Nicholas Macoretta6; Dewey Maddox88; Craig O. Matkin89; Julie A. Mattison90; Matthew McClure91; June Mergl92; Jennifer J. Meudt93; Gisele A. Montano5; Khyobeni Mozhui94; Jason Munshi-South95; William J. Murphy96, 97; Asieh Naderi76; Martina Nagy98; Pritika Narayan62; Peter W. Nathanielsz83, 84; Ngoc B. Nguyen14; Christof Niehrs99, 100; Batsaikhan Nyamsuren101; Justine K. O'Brien42; Perrie O'Tierney Ginn72; Duncan T Odom102, 103; Alexander G. Ophir104; Steve Osborn105; Elaine A. Ostrander65; Kim M. Parsons46; Kimberly C. Paul81; Amy B. Pedersen87; Matteo Pellegrini106; Katharina J. Peters60, 107; Jessica L. Petersen108; Darren W. Pietersen109; Gabriela M. Pinho22; Jocelyn Plassais65; Jesse R. Poganik61; Natalia A. Prado110, 26; Pradeep Reddy111, 2; Benjamin Rey57; Beate R. Ritz112, 113, 81; Jooke Robbins114; Magdalena Rodriguez115; Jennifer Russell105; Elena Rydkina6; Lindsay L. Sailer104; Adam B. Salmon116; Akshay Sanghavi73; Kyle M. Schachtschneider117, 118, 119; Dennis Schmitt120; Todd Schmitt64; Lars Schomacher99; Lawrence B. Schook117, 121; Karen E. Sears22; Ashley W. Seifert12; Aaron B.A. Shafer122; Anastasia V. Shindyapina61; Melanie Simmons45; Kavita Singh123; Ishani Sinha22; Jesse Slone67; Russel G. Snell62; Elham Soltanmohammadi76; Matthew L. Spangler108; Maria Spriggs21; Lydia Staggs43; Nancy Stedman21; Karen J. Steinman124; Donald T Stewart125; Victoria J. Sugrue66; Balazs Szladovits126; Joseph S. Takahashi7, 127; Masaki Takasugi6; Emma C. Teeling128; Michael J. Thompson106; Bill Van Bonn129; Sonja C. Vernes130, 131; Diego Villar132; Harry V. Vinters133; Ha Vu15, 16; Mary C. Wallingford72; Nan Wang37, 38; Gerald S. Wilkinson8; Robert W. Williams134; Qi Yan3, 2; Mingjia Yao3; Brent G. Young54; Bohan Zhang61; Zhihui Zhang6; Yang Zhao6; Peng Zhao14, 135; Wanding Zhou136, 137; Joseph A. Zoller3; Jason Ernst15, 16; Andrei Seluanov138; Vera Gorbunova138; X. William Yang37, 38; Ken Raj139; Steve Horvath1, 2 *"
Date: "07-24-2023"
output: html_document
---
```{r libraries, message=FALSE}
library(easypackages)
libraries("readxl", "psych", "tidyr", "dplyr","gplots", "RColorBrewer","limma", "ggplot2", "metap", "anRichment", "clusterProfiler", "RColorBrewer", "ggstatsplot", "gridExtra", "ggupset", "psych", "corrplot", "limma", "edgeR", "gridExtra", "ggupset", "ggrepel", "data.table", "stringr")
source("~/Google Drive/Amin documents/Steve projects/Research projects/Human unique methylation project/summarizeFunctions.R")
```
Color palattes
```{r colour pallets}
my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231]
my_palette2 <- colorRampPalette(c("green", "white", "red"))(n = 299)
my_palette3 <- brewer.pal(9,"RdYlGn")
my_palette4 <- brewer.pal(8,"Dark2")
my_palette5 <- rev(brewer.pal(9,"BrBG"))
my_palette5 <- rev(brewer.pal(9,"PiYG"))
my_palette6 <- brewer.pal(9,"YlOrBr")[2:6]
my_palette7 <- colorRampPalette(c("yellow", "orange", "red"))(n = 299)
my_palette8 <- colorRampPalette(c("blue", "purple"))(n = 299)
my_palette9 <- brewer.pal(9,"Purples")[2:9]
my_palette10 <- brewer.pal(9,"Oranges")[2:9]
blues <- brewer.pal(9,"Blues")
reds <- brewer.pal(9,"Reds")
my_palette11 <- colorRampPalette(c(rev(blues)[1:3],"grey", "grey" ,"grey","grey", reds[2],reds[2:5]))(n = 299)
my_palette12 <- colorRampPalette(c("blue", "grey", "red"))(n = 299)
my_palette13 <- rev(brewer.pal(9,"Spectral"))
```
```{r import data, message=FALSE, warning=FALSE}
samplesNoMars <- readRDS("samplesNoMars.RDS")
bValsNoMars <- readRDS("DNAmDataNoMarsupials.RDS")
```
```{r}
multiSampels <- samplesNoMars %>% group_split(SpeciesLatinName,Tissue)
names(multiSampels) <- sapply(multiSampels, function(x){paste(x$SpeciesLatinName[1], x$Tissue[1], sep = "_")})
multiSampels <- plyr::compact(plyr::llply(multiSampels, function(x){
if(nrow(x)<50){x <- NULL}
return(x)
}))
multiExpr <- plyr::llply(multiSampels, function(x){
bval <- bValsNoMars %>% dplyr::select(x$Basename)
bval <- list(data = t(bval))
})
#multiExpr <- multiExpr[1:10]
#saveRDS(multiExpr, "multiExpr.RDS")
```
```{r consensus WGCNA}
# Check that the data has the correct format for many functions operating on multiple sets:
exprSize = checkSets(multiExpr)
exprSize
# Define data set dimensions
nGenes = exprSize$nGenes;
nSamples = exprSize$nSamples;
# Get the number of sets in the multiExpr structure.
nSets = checkSets(multiExpr)$nSets
# Choose a set of soft-thresholding powers
powers = c(c(1), seq(from = 2, to=20, by=2));
# Initialize a list to hold the results of scale-free analysis
powerTables = vector(mode = "list", length = nSets);
# Call the network topology analysis function for each set in turn
for (set in 1:nSets)
powerTables[[set]] = list(data = pickSoftThreshold(multiExpr[[set]]$data, powerVector=powers,
verbose = 2)[[2]]);
collectGarbage();
names(powerTables) <- names(multiExpr)
dat <- powerTables[[1]]$data
powers <- rbindlist(lapply(powerTables, function(x){
dat <- x$data
}), idcol = "set", use.names = TRUE)
#saveRDS(powers, "ConsensusPowerTable.RDS")
p1 <- ggplot(powers, aes(x=Power, y=`SFT.R.sq` ))+geom_point(aes(color=as.character(set)))+geom_line(aes(color=as.character(set)))+geom_smooth(method = "loess")
pdf("ConsensusPowerTrend.pdf", width = 4, height = 4)
p1
dev.off()
power <- powers %>% group_by(Power) %>% summarise(medianRsq = median(`SFT.R.sq`)) %>% top_n(1, medianRsq)
powers <- powers %>% group_by(set) %>% top_n(1, `SFT.R.sq`)%>% top_n(1, -Power) %>% ungroup() %>% tibble::column_to_rownames(var = "set")
powers <- powers[names(multiExpr),]
```
```{r soft power}
# Initialize an appropriate array to hold the adjacencies
adjacencies = array(0, dim = c(nSets, nGenes, nGenes));
# Calculate adjacencies in each individual data set
for (set in 1:nSets)
adjacencies[set, , ] = abs(cor(multiExpr[[set]]$data, use = "p"))^powers$Power[set];
# Initialize an appropriate array to hold the TOMs
TOM = array(0, dim = c(nSets, nGenes, nGenes));
# Calculate TOMs in each individual data set
for (set in 1:nSets)
TOM[set, , ] = TOMsimilarity(adjacencies[set, , ]);
```
Since the data is too large, I decided to split them in chunk and create a concensus TOM
```{r split in chunk analysis}
x <- seq_along(1:nSets)
sets <- split(1:nSets, ceiling(x/5))
z <- lapply(1:length(sets), function(x){
adjacencies = array(0, dim = c(length(sets[[x]]), nGenes, nGenes));
# Calculate adjacencies in each individual data set
for (set in 1:length(sets[[x]]))
adjacencies[set, , ] = abs(cor(multiExpr[[set]]$data, use = "p"))^powers$Power[set];
# Initialize an appropriate array to hold the TOMs
TOM = array(0, dim = c(length(sets[[x]]), nGenes, nGenes));
# Calculate TOMs in each individual data set
for (set in 1:length(sets[[x]]))
TOM[set, , ] = TOMsimilarity(adjacencies[set, , ]);
#saveRDS(TOM, paste("TOM_", x, ".RDS" ,sep = ""))
# Define the reference percentile
scaleP = 0.95
# Set RNG seed for reproducibility of sampling
set.seed(12345)
nSet <- dim(TOM)[1]
# Sample sufficiently large number of TOM entries
nSamples = as.integer(1/(1-scaleP) * 1000);
# Choose the sampled TOM entries
scaleSample = sample(nGenes*(nGenes-1)/2, size = nSamples)
TOMScalingSamples = list();
# These are TOM values at reference percentile
scaleQuant = rep(1, nSet)
# Scaling powers to equalize reference TOM values
scalePowers = rep(1, nSet)
# Loop over sets
for (set in 1:nSet){
# Select the sampled TOM entries
TOMScalingSamples[[set]] = as.dist(TOM[set, , ])[scaleSample]
# Calculate the 95th percentile
scaleQuant[set] = quantile(TOMScalingSamples[[set]],
probs = scaleP, type = 8);
assign("scaleQuant1", scaleQuant[1], envir = .GlobalEnv)
# Scale the TOM
if ((x==1&set>1)|x>1) {
scalePowers[set] = log(scaleQuant1)/log(scaleQuant[set]);
TOM[set, ,] = TOM[set, ,]^scalePowers[set]; }
}
z1 <- lapply(1:dim(TOM)[1], function(x){TOM[x, , ]})
consensusTOM <- do.call(pmin,z1)
saveRDS(consensusTOM, paste("TOM_", x, ".RDS" ,sep = ""))
return(consensusTOM)
})
consensusTOM <- do.call(pmin,z)
rm(z)
saveRDS(consensusTOM, "consensusTOM.RDS")
```
```{r scaling topology ovelap}
# Define the reference percentile
scaleP = 0.95
# Set RNG seed for reproducibility of sampling
set.seed(12345)
# Sample sufficiently large number of TOM entries
nSamples = as.integer(1/(1-scaleP) * 1000);
# Choose the sampled TOM entries
scaleSample = sample(nGenes*(nGenes-1)/2, size = nSamples)
TOMScalingSamples = list();
# These are TOM values at reference percentile
scaleQuant = rep(1, nSets)
# Scaling powers to equalize reference TOM values
scalePowers = rep(1, nSets)
# Loop over sets
for (set in 1:nSets){
# Select the sampled TOM entries
TOMScalingSamples[[set]] = as.dist(TOM[set, , ])[scaleSample]
# Calculate the 95th percentile
scaleQuant[set] = quantile(TOMScalingSamples[[set]],
probs = scaleP, type = 8);
# Scale the TOM
if (set>1) {
scalePowers[set] = log(scaleQuant[1])/log(scaleQuant[set]);
TOM[set, ,] = TOM[set, ,]^scalePowers[set]; }
}
```
```{r calculating consensus TOM}
z <- lapply(1:dim(TOM)[1], function(x){TOM[x, , ]})
consensusTOM <- do.call(pmin,z)
rm(z)
```
```{r}
# Clustering
consTree = hclust(as.dist(1-consensusTOM), method = "average");
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 30;
# Module identification using dynamic tree cut:
unmergedLabels = cutreeDynamic(dendro = consTree, distM = 1-consensusTOM,
deepSplit = 2, cutHeight = 0.995, minClusterSize = minModuleSize, pamRespectsDendro = FALSE );
unmergedColors = labels2colors(unmergedLabels)
```
```{r merge modules}
# Calculate module eigengenes
unmergedMEs = multiSetMEs(multiExpr, colors = NULL, universalColors = unmergedColors)
# Calculate consensus dissimilarity of consensus module eigengenes
consMEDiss = consensusMEDissimilarity(unmergedMEs);
# Cluster consensus modules
consMETree = hclust(as.dist(consMEDiss), method = "average"); # Plot the result
sizeGrWindow(7,6)
par(mfrow = c(1,1))
plot(consMETree, main = "Consensus clustering of consensus module eigengenes",
xlab = "", sub = "")
abline(h=0.25, col = "red")
merge = mergeCloseModules(multiExpr, unmergedLabels, cutHeight = 0.25, verbose = 3)
# Numeric module labels
moduleLabels = merge$colors;
# Convert labels to colors
moduleColors = labels2colors(moduleLabels)
# Eigengenes of the new merged modules:
consMEs = merge$newMEs;
save(consMEs, moduleColors, moduleLabels, consTree, file = "Consensus-NetworkConstruction-mammals.RData")
```
```{r plot}
pdf("consensus tree 1.pdf", width = 9, height = 6)
sizeGrWindow(9,6)
plotDendroAndColors(consTree, cbind(unmergedColors, moduleColors),
c("Unmerged", "Merged"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
dev.off()
pdf("consensus tree 2.pdf", width = 9, height = 6)
sizeGrWindow(9,6)
plotDendroAndColors(consTree, cbind(moduleColors),
c("Modules"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
dev.off()
```
## Read consesus WGCNA
```{r}
load("WGCNA results/Consensus WGCNA 57 species tissues/Consensus-NetworkConstruction-mammals.RData")
```
```{r}
datExpr <- t(bValsNoMars)
consMEList = moduleEigengenes(datExpr, colors = matchedCons$consMatched)
consMEs = consMEList$eigengenes
saveRDS(consMEs, "WGCNA results/Consensus WGCNA 57 species tissues/newConsMEsMatchedCols.RDS")
```
