Content - c67e46a6c3d945c0c8b6580a56aa9aca6e030759 - 0ba921d/Record_of_weights_and_projections_GS

visit type:

https://github.com/DanniGadd/EpiScores-for-protein-levels

17 November 2023, 07:53:22 UTC

Tip revision: a5130fab3895a0d95f0dcc8826aa9fb5e8c0fa86 authored by DanniGadd on 16 February 2022, 08:46:38 UTC
Merge branch 'main' of https://github.com/DanniGadd/EpiScores-for-protein-levels

Tip revision: a5130fa

Record_of_weights_and_projections_GS

### This script is a record of projections and weights used in the study 

# I have collated files below so that we can make sure the projections match those that 
# are generated once in MethylDetectR 

library(tidyverse)
library(readxl)

###############################################################################

# EpiScore projections taken from cox model script EpiScore prep section

###############################################################################

# LBC proxies in GS
LBC <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Model_projections/GS_combined_9537_projections.csv")

# LBC list to keep 
list1 <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/LBC_list_of_proteins_for_cox_models.csv")
list1 <- filter(list1, Protein != "IL.12B") # make sure IL.12B removed here 

# Get only LBC proxies passing list 
names <- LBC[1]
prots <- LBC[,which(colnames(LBC) %in% list1$Protein)]
LBC <- cbind(names, prots)

# KORA proxies in GS 
KORA <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Model_projections/KORA_GS_combined_9537_projections.csv", check.names = F)

# list of those to keep 
list2 <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/KORA_correlations_in_STRADL_filtered.csv")

# Get only those passing list 
names <- KORA[1]
prots <- KORA[,which(colnames(KORA) %in% list2$SeqId)]
KORA <- cbind(names, prots)
names(KORA)[1] <- "ID"

# Join the proxies together based on DNAm_id
pred <- left_join(LBC, KORA, by = "ID")

write.csv(pred, "/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/A_record_of_weights_projections/EpiScore_projections_GS_9537.csv", row.names = F)

###############################################################################

# Weights files used in EpiScore projections scripts

###############################################################################

# LBC weights 
cpgs1 <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Extract_weights/predictors_joint_to_phenotypes_230221.csv", check.names = F)
cpgs1 <- cpgs1[which(cpgs1$Predictor %in% list1$Protein),] # filter to just the 25 episcores taken forward 

# KORA weights 
cpgs2 <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/Linker_file_protein_naming_corrected_with_annotation.csv")
cpgs2 <- cpgs2[which(cpgs2$SeqId %in% list2$SeqId),] # filter to just the 84 episcores taken forward 

# Write out weights files used in study
write.csv(cpgs1, "/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/A_record_of_weights_projections/LBC_weights_finalised_120121.csv", row.names = F)
write.csv(cpgs2, "/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/A_record_of_weights_projections/KORA_weights_finalised_120121.csv", row.names = F)