Revision b7e3088e28025ac34513778ccb23368246fc532c authored by Victoria Sugrue on 18 June 2021, 08:47:24 UTC, committed by GitHub on 18 June 2021, 08:47:24 UTC
1 parent 65507f1
FigS6BC_CastrationMass.R
####This analysis is to compare ASDMR methylation in young males where we have mass data
library(dplyr)
#install.packages("broom")
library(broom)
options(stringsAsFactors = FALSE)
setwd("~/Dropbox/Sugrue2020_MS/GitHub_FINAL_SUGRUE2021/data/")
#read in ram methylation data and turn into a useful dataframe
data <- read.csv("MASTER_Betas_Minfi_Normalised.csv", stringsAsFactors = F)
datExpr = as.data.frame(t(data[, -c(1:4)]))
colnames(datExpr) <- data$ExternalSampleID
#Split the sex/castration status, remove blood
datExpr[1:10,1:10]
datExpr$Sex
datExpr.ewe <- filter(datExpr, Sex=="Ewe")
datExpr.ram <- filter(datExpr, Sex=="Ram")
datExpr.wether <- filter(datExpr, Sex=="Wether")
#define the ASDMRs and their names
ASDMRs <- read.csv("~/Dropbox/Sugrue2020_MS/Figures_Tables/ELIFE formatted and numbered/Supplementary Tables/Table S3 androgenSensitiveSites.csv", stringsAsFactors = F)
ASDMRs.gene <- c("MKLN1", "ETAA1", "LMO4", "KIAA2026")
ASDMRs.CpG <- c("cg21524116", "cg01822430", "cg15851301", "cg00658920")
#plot the methylation values for the ASDMRS....this is just to check everything is working
par(mfrow = c(2,2))
for (i in 1:length(ASDMRs.gene)){
eval(parse(text=paste("plot(y=as.numeric(datExpr.ewe$",ASDMRs.CpG[i],"), x=as.numeric(datExpr.ewe$Age), ylim=c(0,1), frame.plot=F, col=\"white\", main=\"",ASDMRs.gene[i],"\")", sep="")))
eval(parse(text=paste("points(y=as.vector(datExpr.ram$",ASDMRs.CpG[i],"), x=datExpr.ram$Age, ylim=c(0,1), col=\"blue\")", sep="")))
eval(parse(text=paste("points(y=as.vector(datExpr.wether$",ASDMRs.CpG[i],"), x=datExpr.wether$Age, ylim=c(0,1), col=\"green\")", sep="")))
}
##############repeat analysis but only looking at the young rams, add in mass data############
datExpr <- filter(datExpr, Age=="0.539356605")
#read in the mass data
data.mass <- read.csv("YoungRamWetherInfo_DNAQuantEtc.csv", stringsAsFactors = F)
#order both datasets and then combine, split for Ram and Wether
data.mass <- data.mass[(order(data.mass$Name)),]
datExpr <- datExpr[(order(rownames(datExpr))),]
datExpr <- cbind(datExpr, data.mass)
datExpr.ram <- filter(datExpr, Sex=="Ram")
datExpr.wether <- filter(datExpr, Sex=="Wether")
#plot the methylation vs mass values for the 4 'iconic' asDMRS
par(mfrow = c(1,4))
#for the asDMRs, calculate slope for rams and wethers, and put into Coeffic.table.ram or Coeffic.table.wether.
for (i in 1:length(ASDMRs.gene)){
eval(parse(text=paste("plot(y=datExpr$",ASDMRs.CpG[i],", x=datExpr$Mass..kg., frame.plot = F, xlab = \"Mass (kg)\", ylab = \"",ASDMRs.CpG[i],"_methylation\", main=\"",ASDMRs.gene[i],"\")", sep="")))
eval(parse(text=paste("points(y=datExpr.ram$",ASDMRs.CpG[i],", x=datExpr.ram$Mass..kg., col=\"green\")", sep="")))
eval(parse(text=paste("abline(lm(as.numeric(datExpr.ram$",ASDMRs.CpG[i],") ~ datExpr.ram$Mass..kg.), col=\"green\")", sep="")))
eval(parse(text=paste("print(summary(lm(as.numeric(datExpr.ram$",ASDMRs.CpG[i],") ~ datExpr.ram$Mass..kg.)))", sep="")))
eval(parse(text=paste("points(y=datExpr.wether$",ASDMRs.CpG[i],", x=datExpr.wether$Mass..kg., col=\"blue\")", sep="")))
eval(parse(text=paste("abline(lm(as.numeric(datExpr.wether$",ASDMRs.CpG[i],") ~ datExpr.wether$Mass..kg.), col=\"blue\")", sep="")))
eval(parse(text=paste("print(summary(lm(as.numeric(datExpr.wether$",ASDMRs.CpG[i],") ~ datExpr.wether$Mass..kg.)))", sep="")))
eval(parse(text=paste("print(tidy(summary(lm(as.numeric(datExpr.wether$",ASDMRs.CpG[i],") ~ datExpr.wether$Mass..kg.))))", sep="")))
eval(parse(text=paste("print(confint(lm(as.numeric(datExpr.wether$",ASDMRs.CpG[i],") ~ datExpr.wether$Mass..kg.)))", sep="")))
}
#############################Calculate correlation between ASDMPs windows and lamb mass######
#ttestlist.test
ttestlist.test <- c()
#define a list for slopes
mean.slope.ram.list <- c()
mean.slope.wether.list <- c()
mass.vs.meth <- c()
for (j in 1:450){
#Define overlap and windows (in this case 10 and 100)
ASDMRs.gene <- ASDMRs$Gene.Symbol[((10*j)+1):((10*j)+100)]
ASDMRs.CpG <- ASDMRs$CpG.ID[((10*j)+1):((10*j)+100)]
#Define a table to put slope into
Coeffic.table.ram <- c()
Coeffic.table.wether <- c()
#for the asDMRs, calculate slope for rams and wethers, and put into Coeffic.table.ram or Coeffic.table.wether.
for (i in 1:length(ASDMRs.gene)){
eval(parse(text=paste("Coeffic.table <- tidy(summary(lm(as.numeric(datExpr.ram$",ASDMRs.CpG[i],") ~ datExpr.ram$Mass..kg.)))", sep="")))
Coeffic.table.ram <- rbind(Coeffic.table.ram,Coeffic.table[2,])
eval(parse(text=paste("Coeffic.table <- tidy(summary(lm(as.numeric(datExpr.wether$",ASDMRs.CpG[i],") ~ datExpr.wether$Mass..kg.)))", sep="")))
Coeffic.table.wether <- rbind(Coeffic.table.wether,Coeffic.table[2,])
mean.slope.ram <- mean(mass.vs.meth$ram)
mean.slope.wether <- mean(mass.vs.meth$wether)
}
mass.vs.meth <- list("ram"=Coeffic.table.ram$estimate, "wether"=Coeffic.table.wether$estimate)
t.test.test <- t.test(mass.vs.meth$ram)
ttestlist.test <- c(ttestlist.test,t.test.test$p.value)
print(paste("Replication ",j," is complete", sep = ""))
#make list of slopes
mean.slope.ram.list <- c(mean.slope.ram.list, mean.slope.ram)
mean.slope.wether.list <- c(mean.slope.wether.list, mean.slope.wether)
}
par(mfrow=c(2,1))
#plot slope for each window
plot(mean.slope.ram.list, frame.plot = F, col="green", ylim = c(-0.005,0.005), las=1, ylab="Association", pch=16)
abline(h=0, col="black")
points(mean.slope.wether.list, col="blue", pch=16)
#plot significance
plot(ttestlist.test, log ="y", col="red", frame.plot=F, las=1, ylab="Significance", pch=16)
################Plot the signficance line###############
####### note this works once the "ttestlist.test.control.txt" file has been created, otherwise run "simulation for background" below.....
#read the file ttestlist.test.control
ttestlist.test.control.txt <- read.csv("ttestlist.test.control.txt")
#find 99% quantile and plot it
threshold_control=as.numeric(quantile(ttestlist.test.control,probs = 0.01))
abline(h=threshold_control, col="red")
##########################END FOR FIGURE##############
##############running a simulation for the background###################
#create ttestlist.test.control
ttestlist.test.control <- c()
for (j in 1:10000){
#define random numbers for the sampling
ran.num.ASDMR <- sample(4:37495, 100)
#Use these random numbers to pick out random CpGs
ASDMRs.gene.control <- "test"
ASDMRs.CpG.control <- colnames(datExpr[ran.num.ASDMR])
#Define a control table to put slope into
Coeffic.table.ram.control <- c()
Coeffic.table.wether.control <- c()
#for control asDMRs, calculate slope for rams and wethers, and put into Coeffc.table.ram or Coeffic.table.wether.control
for (i in 1:length(ASDMRs.CpG.control)){
eval(parse(text=paste("Coeffic.table <- tidy(summary(lm(as.numeric(datExpr.ram$",ASDMRs.CpG.control[i],") ~ datExpr.ram$Mass..kg.)))", sep="")))
Coeffic.table.ram.control <- rbind(Coeffic.table.ram.control,Coeffic.table[2,])
eval(parse(text=paste("corr.ram.control <- cor(as.numeric(datExpr.ram$",ASDMRs.CpG.control[i],"), datExpr.ram$Mass..kg.)", sep="")))
eval(parse(text=paste("Coeffic.table <- tidy(summary(lm(as.numeric(datExpr.wether$",ASDMRs.CpG.control[i],") ~ datExpr.wether$Mass..kg.)))", sep="")))
Coeffic.table.wether.control <- rbind(Coeffic.table.wether.control,Coeffic.table[2,])
eval(parse(text=paste("corr.wether.control <- cor(as.numeric(datExpr.wether$",ASDMRs.CpG.control[i],"), datExpr.wether$Mass..kg.)", sep="")))
}
mass.vs.meth <- list("ram"=Coeffic.table.ram$estimate, "wether"=Coeffic.table.wether$estimate, "control.ram"=Coeffic.table.ram.control$estimate, "control.wether"=Coeffic.table.wether.control$estimate)
#t.test.ramvswether <- t.test(mass.vs.meth$ram, mass.vs.meth$wether)
t.test.control <- t.test(mass.vs.meth$control.ram, mass.vs.meth$control.wether)
ttestlist.test.control <- c(ttestlist.test.control,t.test.control$p.value)
print(paste("Replication ",j," is complete", sep = ""))
}
#save the file ttestlist.test.control
write.csv(ttestlist.test.control, "ttestlist.test.control.txt", row.names = F)
################Plot the signficance line###############
####### note this works once the "ttestlist.test.control.txt" file has been created, otherwise run "simulation for background" below.....
#read the file ttestlist.test.control
ttestlist.test.control.txt <- read.csv("ttestlist.test.control.txt")
#find 99% quantile and plot it
threshold_control=as.numeric(quantile(ttestlist.test.control,probs = 0.01))
abline(h=threshold_control, col="red")
##############Plot the top 50 ASDMPs, plus the random samples###################
mass.vs.meth <- list("ram"=Coeffic.table.ram$estimate, "wether"=Coeffic.table.wether$estimate, "control.ram"=Coeffic.table.ram.control$estimate, "control.wether"=Coeffic.table.wether.control$estimate)
dev.off()
stripchart(mass.vs.meth, pch=16, col=c("green", "blue"), vertical=TRUE, frame.plot=F, method="jitter", las=2)
t.test.ramvswether <- t.test(mass.vs.meth$ram, mass.vs.meth$wether)
t.test.control <- t.test(mass.vs.meth$control.ram, mass.vs.meth$control.wether)
Computing file changes ...