https://github.com/ejh243/SCZEWAS/
Tip revision: 006e92b11dbd3eb7e75dcc173853010fa93461a5 authored by ejh243 on 19 November 2020, 09:40:47 UTC
Update README.md
Update README.md
Tip revision: 006e92b
calcDNAmDerivedVariables.r
## this script calculates 1) smoking score, and 2) PhenoAge from matrix of DNA methylation values
## this script was run on each dataset in turn and the estimates added to the phenotype data
## NB DNAmAge and cell composition estimates were calculated using the online epigenetic clock web tool.
## Calculate smoking score
smokingScore<-function(betas){
load("SmokingScoreRefData.rda") ## results taken from Elliot et al Smoking EWAS
#contains Illig_data, Illig_data_up and Illig_data_down
#subsetting own data
#SABRE_data_down
select <- rownames(betas) %in% Illig_data_down$cpgs
A_down <- subset(betas, select =="TRUE")
#SABRE_data_up
select <- rownames(betas) %in% Illig_data_up$cpgs
A_up <- subset(betas, select =="TRUE")
#sort SABRE data by Cpg name
A_up <- A_up[order(rownames(A_up)),]
A_down <- A_down[order(rownames(A_down)),]
#match Illig data by by Cpg name
Illig_data_up<-Illig_data_up[match(rownames(A_up), Illig_data_up$cpgs),]
Illig_data_down<-Illig_data_down[match(rownames(A_down), Illig_data_down$cpgs),]
#as some outliers have been removed and replaced with NAs need to handle missing values
matrix_up_A<- matrix(nrow=nrow(Illig_data_up), ncol=ncol(A_up))
for (i in 1:ncol(A_up)){
matrix_up_A[,i]<- (A_up[,i])-(Illig_data_up$reference_never_median_beta_all)}
colnames(matrix_up_A)<- colnames(A_up)
rownames(matrix_up_A)<- Illig_data_up$cpgs
#Calculate scores - ##UP##
#calculate scores for each individual in the dataset
scores_up_A<- as.numeric(rep(NA,ncol(A_up)))
for (i in 1:ncol(A_up)){
scores_up_A[i]<-sum(matrix_up_A[,i]*Illig_data_up$weights)}
#Calculate diffs between SABRE beta values and the reference for each site - ##DOWN###
matrix_down_A<- matrix(nrow=nrow(Illig_data_down), ncol=ncol(A_down))
for (i in 1:ncol(A_down)){
matrix_down_A[,i]<- (Illig_data_down$reference_never_median_beta_all)-(A_down[,i])}
colnames(matrix_down_A)<- colnames(A_down)
rownames(matrix_down_A)<- Illig_data_down$cpgs
#Calculate scores - ##DOWN##
#calculate scores for each individual in the dataset
scores_down_A<- as.numeric(rep(NA,ncol(A_down)))
for (i in 1:ncol(A_down)){
scores_down_A[i]<-sum(matrix_down_A[,i]*Illig_data_down$weights)}
##combine scores
scores_combined_A <- scores_up_A + scores_down_A
return(scores_combined_A)
}
setwd()
load("Normalised.rdat")
pheno<-sampleSheet
betas<-betas(mset450k.pf)
pheno<-cbind(pheno, smokingScore(betas))
colnames(pheno)[ncol(pheno)]<-"SmokingScore"
## Calculate PhenoAge
calcPhenoAge<-function(betas){
intercept<-phenoAgeCoeff$Weight[1]
coeffs<-phenoAgeCoeff$Weight[-1]
index<-match(phenoAgeCoeff$CpG[-1], rownames(betas))
coeffs<-coeffs[!is.na(index)]
index<-index[!is.na(index)]
predAge<-intercept+coeffs %*% betas[index,]
return(t(predAge))
}
phenoAgeCoeff<-read.csv("PhenoAge/PhenoAgeCoeff.csv", stringsAsFactors = FALSE, fill = TRUE) ## this is a table of coefficinets taken from the orginial PhenoAge manuscript
pheno<-cbind(pheno, calcPhenoAge(betas))
colnames(pheno)[ncol(pheno)]<-"PhenoAge"
save(betas,pheno, file = "Normalised.rdat")