1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275 | ---
Author: "Amin Haghani"
Paper: "DNA Methylation Networks Underlying Mammalian Traits"
Authors: "Amin Haghani1, 2 †*; Caesar Z. Li3, 4 †; Todd R. Robeck5; Joshua Zhang1; Ake T. Lu1, 2; Julia Ablaeva6; Victoria A. Acosta-Rodríguez7; Danielle M. Adams8; Abdulaziz N. Alagaili9, 10; Javier Almunia11; Ajoy Aloysius12; Nabil M.S. Amor13; Reza Ardehali14; Adriana Arneson15, 16; C. Scott Baker17; Gareth Banks18; Katherine Belov19; Nigel C. Bennett20; Peter Black21; Daniel T. Blumstein22, 23; Eleanor K. Bors17; Charles E. Breeze24; Robert T. Brooke25; Janine L. Brown26; Gerald Carter27; Alex Caulton28, 29; Julie M. Cavin30; Lisa Chakrabarti31; Ioulia Chatzistamou32; Andreas S. Chavez27, 33; Hao Chen34; Kaiyang Cheng35; Priscila Chiavellini36; Oi-Wa Choi37, 38; Shannon Clarke28; Joseph A. Cook39; Lisa N. Cooper40; Marie-Laurence Cossette41; Joanna Day42; Joseph DeYoung37, 38; Stacy Dirocco43; Christopher Dold44; Jonathan L. Dunnum39; Erin E. Ehmke45; Candice K. Emmons46; Stephan Emmrich6; Ebru Erbay47, 48, 49; Claire Erlacher-Reid43; Chris G. Faulkes50, 51; Zhe Fei3, 52; Steven H. Ferguson53, 54; Carrie J. Finno55; Jennifer E. Flower56; Jean-Michel Gaillard57; Eva Garde58; Livia Gerber59, 60; Vadim N. Gladyshev61; Rodolfo G. Goya36; Matthew J Grant62; Carla B. Green7; M. Bradley Hanson46; Daniel W. Hart20; Martin Haulena63; Kelsey Herrick64; Andrew N. Hogan65; Carolyn J. Hogg19; Timothy A. Hore66; Taosheng Huang67; Juan Carlos Izpisua Belmonte2; Anna J. Jasinska37, 68, 69; Gareth Jones70; Eve Jourdain71; Olga Kashpur72; Harold Katcher73; Etsuko Katsumata74; Vimala Kaza75; Hippokratis Kiaris76; Michael S. Kobor77; Pawel Kordowitzki78; William R. Koski79; Michael Krützen60; Soo Bin Kwon16, 15; Brenda Larison22, 80; Sang-Goo Lee61; Marianne Lehmann36; Jean-François Lemaître57; Andrew J. Levine81; Xinmin Li82; Cun Li83, 84; Andrea R. Lim1; David T.S. Lin85; Dana M. Lindemann43; Schuyler W. Liphardt86; Thomas J. Little87; Nicholas Macoretta6; Dewey Maddox88; Craig O. Matkin89; Julie A. Mattison90; Matthew McClure91; June Mergl92; Jennifer J. Meudt93; Gisele A. Montano5; Khyobeni Mozhui94; Jason Munshi-South95; William J. Murphy96, 97; Asieh Naderi76; Martina Nagy98; Pritika Narayan62; Peter W. Nathanielsz83, 84; Ngoc B. Nguyen14; Christof Niehrs99, 100; Batsaikhan Nyamsuren101; Justine K. O'Brien42; Perrie O'Tierney Ginn72; Duncan T Odom102, 103; Alexander G. Ophir104; Steve Osborn105; Elaine A. Ostrander65; Kim M. Parsons46; Kimberly C. Paul81; Amy B. Pedersen87; Matteo Pellegrini106; Katharina J. Peters60, 107; Jessica L. Petersen108; Darren W. Pietersen109; Gabriela M. Pinho22; Jocelyn Plassais65; Jesse R. Poganik61; Natalia A. Prado110, 26; Pradeep Reddy111, 2; Benjamin Rey57; Beate R. Ritz112, 113, 81; Jooke Robbins114; Magdalena Rodriguez115; Jennifer Russell105; Elena Rydkina6; Lindsay L. Sailer104; Adam B. Salmon116; Akshay Sanghavi73; Kyle M. Schachtschneider117, 118, 119; Dennis Schmitt120; Todd Schmitt64; Lars Schomacher99; Lawrence B. Schook117, 121; Karen E. Sears22; Ashley W. Seifert12; Aaron B.A. Shafer122; Anastasia V. Shindyapina61; Melanie Simmons45; Kavita Singh123; Ishani Sinha22; Jesse Slone67; Russel G. Snell62; Elham Soltanmohammadi76; Matthew L. Spangler108; Maria Spriggs21; Lydia Staggs43; Nancy Stedman21; Karen J. Steinman124; Donald T Stewart125; Victoria J. Sugrue66; Balazs Szladovits126; Joseph S. Takahashi7, 127; Masaki Takasugi6; Emma C. Teeling128; Michael J. Thompson106; Bill Van Bonn129; Sonja C. Vernes130, 131; Diego Villar132; Harry V. Vinters133; Ha Vu15, 16; Mary C. Wallingford72; Nan Wang37, 38; Gerald S. Wilkinson8; Robert W. Williams134; Qi Yan3, 2; Mingjia Yao3; Brent G. Young54; Bohan Zhang61; Zhihui Zhang6; Yang Zhao6; Peng Zhao14, 135; Wanding Zhou136, 137; Joseph A. Zoller3; Jason Ernst15, 16; Andrei Seluanov138; Vera Gorbunova138; X. William Yang37, 38; Ken Raj139; Steve Horvath1, 2 *"
Date: "07-24-2023"
output: html_document
---
```{r libraries, message=FALSE}
library(easypackages)
libraries("readxl", "psych", "tidyr", "dplyr","gplots", "RColorBrewer","limma", "ggplot2", "metap", "anRichment", "clusterProfiler", "RColorBrewer", "ggstatsplot", "gridExtra", "ggupset", "psych", "corrplot", "limma", "edgeR", "gridExtra", "ggupset", "ggrepel", "data.table", "stringr")
source("~/Google Drive/Amin documents/Steve projects/Research projects/Human unique methylation project/summarizeFunctions.R")
```
Color palattes
```{r colour pallets}
my_palette <- rev(colorRampPalette(brewer.pal(11, "RdBu"))(256))[25:231]
my_palette2 <- colorRampPalette(c("green", "white", "red"))(n = 299)
my_palette3 <- brewer.pal(9,"RdYlGn")
my_palette4 <- brewer.pal(8,"Dark2")
my_palette5 <- rev(brewer.pal(9,"BrBG"))
my_palette5 <- rev(brewer.pal(9,"PiYG"))
my_palette6 <- brewer.pal(9,"YlOrBr")[2:6]
my_palette7 <- colorRampPalette(c("yellow", "orange", "red"))(n = 299)
my_palette8 <- colorRampPalette(c("blue", "purple"))(n = 299)
my_palette9 <- brewer.pal(9,"Purples")[2:9]
my_palette10 <- brewer.pal(9,"Oranges")[2:9]
blues <- brewer.pal(9,"Blues")
reds <- brewer.pal(9,"Reds")
my_palette11 <- colorRampPalette(c(rev(blues)[1:3],"grey", "grey" ,"grey","grey", reds[2],reds[2:5]))(n = 299)
my_palette12 <- colorRampPalette(c("blue", "grey", "red"))(n = 299)
my_palette13 <- rev(brewer.pal(9,"Spectral"))
```
```{r import data, message=FALSE, warning=FALSE}
samplesNoMars <- readRDS("samplesNoMars.RDS")
bValsNoMars <- readRDS("DNAmDataNoMarsupials.RDS")
```
```{r}
multiSampels <- samplesNoMars %>% group_split(SpeciesLatinName,Tissue)
names(multiSampels) <- sapply(multiSampels, function(x){paste(x$SpeciesLatinName[1], x$Tissue[1], sep = "_")})
multiSampels <- plyr::compact(plyr::llply(multiSampels, function(x){
if(nrow(x)<50){x <- NULL}
return(x)
}))
multiExpr <- plyr::llply(multiSampels, function(x){
bval <- bValsNoMars %>% dplyr::select(x$Basename)
bval <- list(data = t(bval))
})
#multiExpr <- multiExpr[1:10]
#saveRDS(multiExpr, "multiExpr.RDS")
```
```{r consensus WGCNA}
# Check that the data has the correct format for many functions operating on multiple sets:
exprSize = checkSets(multiExpr)
exprSize
# Define data set dimensions
nGenes = exprSize$nGenes;
nSamples = exprSize$nSamples;
# Get the number of sets in the multiExpr structure.
nSets = checkSets(multiExpr)$nSets
# Choose a set of soft-thresholding powers
powers = c(c(1), seq(from = 2, to=20, by=2));
# Initialize a list to hold the results of scale-free analysis
powerTables = vector(mode = "list", length = nSets);
# Call the network topology analysis function for each set in turn
for (set in 1:nSets)
powerTables[[set]] = list(data = pickSoftThreshold(multiExpr[[set]]$data, powerVector=powers,
verbose = 2)[[2]]);
collectGarbage();
names(powerTables) <- names(multiExpr)
dat <- powerTables[[1]]$data
powers <- rbindlist(lapply(powerTables, function(x){
dat <- x$data
}), idcol = "set", use.names = TRUE)
#saveRDS(powers, "ConsensusPowerTable.RDS")
p1 <- ggplot(powers, aes(x=Power, y=`SFT.R.sq` ))+geom_point(aes(color=as.character(set)))+geom_line(aes(color=as.character(set)))+geom_smooth(method = "loess")
pdf("ConsensusPowerTrend.pdf", width = 4, height = 4)
p1
dev.off()
power <- powers %>% group_by(Power) %>% summarise(medianRsq = median(`SFT.R.sq`)) %>% top_n(1, medianRsq)
powers <- powers %>% group_by(set) %>% top_n(1, `SFT.R.sq`)%>% top_n(1, -Power) %>% ungroup() %>% tibble::column_to_rownames(var = "set")
powers <- powers[names(multiExpr),]
```
```{r soft power}
# Initialize an appropriate array to hold the adjacencies
adjacencies = array(0, dim = c(nSets, nGenes, nGenes));
# Calculate adjacencies in each individual data set
for (set in 1:nSets)
adjacencies[set, , ] = abs(cor(multiExpr[[set]]$data, use = "p"))^powers$Power[set];
# Initialize an appropriate array to hold the TOMs
TOM = array(0, dim = c(nSets, nGenes, nGenes));
# Calculate TOMs in each individual data set
for (set in 1:nSets)
TOM[set, , ] = TOMsimilarity(adjacencies[set, , ]);
```
Since the data is too large, I decided to split them in chunk and create a concensus TOM
```{r split in chunk analysis}
x <- seq_along(1:nSets)
sets <- split(1:nSets, ceiling(x/5))
z <- lapply(1:length(sets), function(x){
adjacencies = array(0, dim = c(length(sets[[x]]), nGenes, nGenes));
# Calculate adjacencies in each individual data set
for (set in 1:length(sets[[x]]))
adjacencies[set, , ] = abs(cor(multiExpr[[set]]$data, use = "p"))^powers$Power[set];
# Initialize an appropriate array to hold the TOMs
TOM = array(0, dim = c(length(sets[[x]]), nGenes, nGenes));
# Calculate TOMs in each individual data set
for (set in 1:length(sets[[x]]))
TOM[set, , ] = TOMsimilarity(adjacencies[set, , ]);
#saveRDS(TOM, paste("TOM_", x, ".RDS" ,sep = ""))
# Define the reference percentile
scaleP = 0.95
# Set RNG seed for reproducibility of sampling
set.seed(12345)
nSet <- dim(TOM)[1]
# Sample sufficiently large number of TOM entries
nSamples = as.integer(1/(1-scaleP) * 1000);
# Choose the sampled TOM entries
scaleSample = sample(nGenes*(nGenes-1)/2, size = nSamples)
TOMScalingSamples = list();
# These are TOM values at reference percentile
scaleQuant = rep(1, nSet)
# Scaling powers to equalize reference TOM values
scalePowers = rep(1, nSet)
# Loop over sets
for (set in 1:nSet){
# Select the sampled TOM entries
TOMScalingSamples[[set]] = as.dist(TOM[set, , ])[scaleSample]
# Calculate the 95th percentile
scaleQuant[set] = quantile(TOMScalingSamples[[set]],
probs = scaleP, type = 8);
assign("scaleQuant1", scaleQuant[1], envir = .GlobalEnv)
# Scale the TOM
if ((x==1&set>1)|x>1) {
scalePowers[set] = log(scaleQuant1)/log(scaleQuant[set]);
TOM[set, ,] = TOM[set, ,]^scalePowers[set]; }
}
z1 <- lapply(1:dim(TOM)[1], function(x){TOM[x, , ]})
consensusTOM <- do.call(pmin,z1)
saveRDS(consensusTOM, paste("TOM_", x, ".RDS" ,sep = ""))
return(consensusTOM)
})
consensusTOM <- do.call(pmin,z)
rm(z)
saveRDS(consensusTOM, "consensusTOM.RDS")
```
```{r scaling topology ovelap}
# Define the reference percentile
scaleP = 0.95
# Set RNG seed for reproducibility of sampling
set.seed(12345)
# Sample sufficiently large number of TOM entries
nSamples = as.integer(1/(1-scaleP) * 1000);
# Choose the sampled TOM entries
scaleSample = sample(nGenes*(nGenes-1)/2, size = nSamples)
TOMScalingSamples = list();
# These are TOM values at reference percentile
scaleQuant = rep(1, nSets)
# Scaling powers to equalize reference TOM values
scalePowers = rep(1, nSets)
# Loop over sets
for (set in 1:nSets){
# Select the sampled TOM entries
TOMScalingSamples[[set]] = as.dist(TOM[set, , ])[scaleSample]
# Calculate the 95th percentile
scaleQuant[set] = quantile(TOMScalingSamples[[set]],
probs = scaleP, type = 8);
# Scale the TOM
if (set>1) {
scalePowers[set] = log(scaleQuant[1])/log(scaleQuant[set]);
TOM[set, ,] = TOM[set, ,]^scalePowers[set]; }
}
```
```{r calculating consensus TOM}
z <- lapply(1:dim(TOM)[1], function(x){TOM[x, , ]})
consensusTOM <- do.call(pmin,z)
rm(z)
```
```{r}
# Clustering
consTree = hclust(as.dist(1-consensusTOM), method = "average");
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 30;
# Module identification using dynamic tree cut:
unmergedLabels = cutreeDynamic(dendro = consTree, distM = 1-consensusTOM,
deepSplit = 2, cutHeight = 0.995, minClusterSize = minModuleSize, pamRespectsDendro = FALSE );
unmergedColors = labels2colors(unmergedLabels)
```
```{r merge modules}
# Calculate module eigengenes
unmergedMEs = multiSetMEs(multiExpr, colors = NULL, universalColors = unmergedColors)
# Calculate consensus dissimilarity of consensus module eigengenes
consMEDiss = consensusMEDissimilarity(unmergedMEs);
# Cluster consensus modules
consMETree = hclust(as.dist(consMEDiss), method = "average"); # Plot the result
sizeGrWindow(7,6)
par(mfrow = c(1,1))
plot(consMETree, main = "Consensus clustering of consensus module eigengenes",
xlab = "", sub = "")
abline(h=0.25, col = "red")
merge = mergeCloseModules(multiExpr, unmergedLabels, cutHeight = 0.25, verbose = 3)
# Numeric module labels
moduleLabels = merge$colors;
# Convert labels to colors
moduleColors = labels2colors(moduleLabels)
# Eigengenes of the new merged modules:
consMEs = merge$newMEs;
save(consMEs, moduleColors, moduleLabels, consTree, file = "Consensus-NetworkConstruction-mammals.RData")
```
```{r plot}
pdf("consensus tree 1.pdf", width = 9, height = 6)
sizeGrWindow(9,6)
plotDendroAndColors(consTree, cbind(unmergedColors, moduleColors),
c("Unmerged", "Merged"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
dev.off()
pdf("consensus tree 2.pdf", width = 9, height = 6)
sizeGrWindow(9,6)
plotDendroAndColors(consTree, cbind(moduleColors),
c("Modules"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
dev.off()
```
## Read consesus WGCNA
```{r}
load("WGCNA results/Consensus WGCNA 57 species tissues/Consensus-NetworkConstruction-mammals.RData")
```
```{r}
datExpr <- t(bValsNoMars)
consMEList = moduleEigengenes(datExpr, colors = matchedCons$consMatched)
consMEs = consMEList$eigengenes
saveRDS(consMEs, "WGCNA results/Consensus WGCNA 57 species tissues/newConsMEsMatchedCols.RDS")
```
|