https://github.com/DanniGadd/EpiScores-for-protein-levels
Raw File
Tip revision: a5130fab3895a0d95f0dcc8826aa9fb5e8c0fa86 authored by DanniGadd on 16 February 2022, 08:46:38 UTC
Merge branch 'main' of https://github.com/DanniGadd/EpiScores-for-protein-levels
Tip revision: a5130fa
15_validation_combine_tables_results_LBC1936.R
Copyright (c) <2022>, <DanniGadd>
All rights reserved.

This source code is licensed under the MIT license found in the
LICENSE file in the root directory.

########################################################################################
########################################################################################
############################ Combining validation results ###############################
########################################################################################
########################################################################################
 
# Combining results from Olink projections into STRADL and LBC1921
# Also adding in a holdout test set performance as an indicator of performance for the 
# 4 episcores that did not have a test set available (as mentioned in the paper)

library(tidyverse)

########################################################################################

### Read in all tables required for combination

########################################################################################

# The metrics from the elnet test/train split in LBC1936 in creion of proxies
LBC36_inflam <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/sensitivity_regressing_LBC_pQTLs/inflam_pass_no_pQTLs_regressed.csv")
LBC36_neuro <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/sensitivity_regressing_LBC_pQTLs/neuro_pass_no_pQTLs_regressed_out.csv")

# The Olink LBC1921 n=162 results for correlations with proxy projections (neuro only avialble)
olink_neuro <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/LBC_validation_in_LBC1921_neuro.csv")

# The soma scan STRADL n=778 results for correlations with proxy projections 
soma_inflam <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/STRADL_inflam_correlations.csv")
soma_neuro <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/STRADL_neuro_correlations.csv")


########################################################################################

### Combine neuro

########################################################################################

# Combine LBCC1936 and LC1921 olink results 

LBC36_neuro <- LBC36_neuro[c(1:3)]

olink_neuro <- olink_neuro[c(1:3)]

join <- left_join(LBC36_neuro, olink_neuro, by = "Protein")

names(join) <- c("Protein", "LC1936 r", "LBC193 p", "LBC1921 r", "LC1921 p")

# Add in the soma scan results 

soma_neuro <- soma_neuro[c(3,1:2,4,5,6)]
names(soma_neuro)[1] <- "Protein"

join2 <- left_join(join, soma_neuro, by = "Protein")

join2 <- join2[-c(6,7,8)]

names(join2) <- c("Protein", "LC1936 r", "LBC193 p", "LBC1921 r", "LC1921 p", "STRADL r", "STRDL p")

write.csv(join2, "/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/LBC_NEURO_VALIDATION_JOINT_100321_no_pqtls.csv", row.names = F)


########################################################################################

### Combine inflam

########################################################################################

# Combine inflam LBC1936 and soma scan results 

LBC36_inflam <- LBC36_inflam[c(1:3)]

names(LBC36_inflam) <- c("Protein", "LBC36 r", "LBC36 p")

soma_inflam <- soma_inflam[c(3,1:2,4,5,6)]
names(soma_inflam)[1] <- "Protein"

join3 <- left_join(LBC36_inflam, soma_inflam, by = "Protein")

join3 <- join3[-c(4,5,6)]

names(join3) <- c("Protein", "LBC36 r", "LBC36 p", "STRADL r", "STRADL p")

write.csv(join3, "/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/LBC_INFLAM_VALIDATION_JOINT_100321_no_pqtls.csv", row.names = F)

# Get labelling for which pass and fail 
library(readxl)
neuro <- read_excel("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/LBC_NEURO_VALIDATION_JOINT_annotated.xlsx")
inflam <- read_excel("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/LBC_INFLAM_VALIDATION_JOINT_annotated.xlsx")
neuro <- neuro %>% as.data.frame()
inflam<- inflam %>% as.data.frame()

# Highlight those that did not fail in the comparisons
neu <- neuro %>% filter(!Status == "fail")
neu_names <- neu$Protein %>% unique()
length(neu_names) # 13

# Highlight those that did not fail in the comparisons
inf <- inflam %>% filter(!Status == "fail")
inf_names <- inf$Protein %>% unique()
length(inf_names) # 13

# Get lists joined together for these updated proteins
i <- inf_names %>% as.data.frame()
n <- neu_names %>% as.data.frame()
joint <- rbind(i, n)
names(joint)[1] <- "Protein"
write.csv(joint, "/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/00_Running_pQTLs_regressed/Correlations_test_sets/LBC_list_of_proteins_for_cox_models.csv", row.names = F)


back to top