https://github.com/ejh243/SCZEWAS/
Raw File
Tip revision: 006e92b11dbd3eb7e75dcc173853010fa93461a5 authored by ejh243 on 19 November 2020, 09:40:47 UTC
Update README.md
Tip revision: 006e92b
calcDNAmDerivedVariables.r
## this script calculates 1) smoking score, and 2) PhenoAge from matrix of DNA methylation values
## this script was run on each dataset in turn and the estimates added to the phenotype data
## NB DNAmAge and cell composition estimates were calculated using the online epigenetic clock web tool.

## Calculate smoking score
smokingScore<-function(betas){
  
	load("SmokingScoreRefData.rda") ## results taken from Elliot et al Smoking EWAS
	#contains Illig_data, Illig_data_up and Illig_data_down
  
	#subsetting own data 
	#SABRE_data_down
	select <- rownames(betas) %in% Illig_data_down$cpgs
	A_down <- subset(betas, select =="TRUE")
  
	#SABRE_data_up
	select <- rownames(betas) %in% Illig_data_up$cpgs
	A_up <- subset(betas, select =="TRUE")
  
	#sort SABRE data by Cpg name
	A_up <- A_up[order(rownames(A_up)),]
	A_down <- A_down[order(rownames(A_down)),]
		
	#match Illig data by by Cpg name
	Illig_data_up<-Illig_data_up[match(rownames(A_up), Illig_data_up$cpgs),]
	Illig_data_down<-Illig_data_down[match(rownames(A_down), Illig_data_down$cpgs),]
  
	#as some outliers have been removed and replaced with NAs need to handle missing values
	matrix_up_A<- matrix(nrow=nrow(Illig_data_up), ncol=ncol(A_up))
	for (i in 1:ncol(A_up)){
	  matrix_up_A[,i]<- (A_up[,i])-(Illig_data_up$reference_never_median_beta_all)}
	colnames(matrix_up_A)<- colnames(A_up)
	rownames(matrix_up_A)<- Illig_data_up$cpgs

	#Calculate scores - ##UP##
	#calculate scores for each individual in the dataset
	scores_up_A<- as.numeric(rep(NA,ncol(A_up)))
	for (i in 1:ncol(A_up)){
	  scores_up_A[i]<-sum(matrix_up_A[,i]*Illig_data_up$weights)}

	  
	#Calculate diffs between SABRE beta values and the reference for each site - ##DOWN###
	matrix_down_A<- matrix(nrow=nrow(Illig_data_down), ncol=ncol(A_down))
	for (i in 1:ncol(A_down)){
	  matrix_down_A[,i]<- (Illig_data_down$reference_never_median_beta_all)-(A_down[,i])}
	colnames(matrix_down_A)<- colnames(A_down)
	rownames(matrix_down_A)<- Illig_data_down$cpgs

	#Calculate scores - ##DOWN##
	#calculate scores for each individual in the dataset
	scores_down_A<- as.numeric(rep(NA,ncol(A_down)))
	for (i in 1:ncol(A_down)){
	  scores_down_A[i]<-sum(matrix_down_A[,i]*Illig_data_down$weights)}

	##combine scores
	scores_combined_A <- scores_up_A + scores_down_A
  
  return(scores_combined_A)
}

setwd()
load("Normalised.rdat")
pheno<-sampleSheet
betas<-betas(mset450k.pf)

pheno<-cbind(pheno, smokingScore(betas))
colnames(pheno)[ncol(pheno)]<-"SmokingScore"

## Calculate PhenoAge
calcPhenoAge<-function(betas){
	intercept<-phenoAgeCoeff$Weight[1]
	coeffs<-phenoAgeCoeff$Weight[-1]
	index<-match(phenoAgeCoeff$CpG[-1], rownames(betas))
	coeffs<-coeffs[!is.na(index)]
	index<-index[!is.na(index)]
	predAge<-intercept+coeffs %*% betas[index,]
	return(t(predAge))
}

phenoAgeCoeff<-read.csv("PhenoAge/PhenoAgeCoeff.csv", stringsAsFactors = FALSE, fill = TRUE) ## this is a table of coefficinets taken from the orginial PhenoAge manuscript

pheno<-cbind(pheno, calcPhenoAge(betas))
colnames(pheno)[ncol(pheno)]<-"PhenoAge"

save(betas,pheno, file = "Normalised.rdat")
back to top