https://github.com/DanniGadd/EpiScores-for-protein-levels
Raw File
Tip revision: a5130fab3895a0d95f0dcc8826aa9fb5e8c0fa86 authored by DanniGadd on 16 February 2022, 08:46:38 UTC
Merge branch 'main' of https://github.com/DanniGadd/EpiScores-for-protein-levels
Tip revision: a5130fa
29_covid_hospitalisations_episcores.R
Copyright (c) <2022>, <DanniGadd>
All rights reserved.

This source code is licensed under the MIT license found in the
LICENSE file in the root directory.

####################################################################################

# Covid EpiScores analyses 

####################################################################################

library(tidyverse)
library(readxl)
library(lme4)

# Load covid data from archie
t <- read_excel("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/COVID_update/2021-09-03 C19 cases Feb.xlsx")
t <- as.data.frame(t)

# Take a look at variables available in this file (including those without DNAm data)
length(unique(t$id)) # 1713 unique individuals 
table(t$covid) # 554 with diagnosis of covid 
table(t$smr) # 31 in hospital 
table(t$icu) # 6 in ICU 

e <- t %>% filter(t$covid == "1") 
names(e)[1] <- "Sample_Name"

# > dim(e)
# [1] 554   8

# Now add in age at covid 
g <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/COVID_update/C19_test_dates_25Oct2021.csv")
names(g)[1] <- "Sample_Name"

e2 <- merge(e, g, by.x = 'Sample_Name', all.x = TRUE)

hasAntiBD <- !is.na(e2$S3_AntiBD_Year)
hasSwab <- !is.na(e2$S3_SwabDate_Year)
hasLinkedTest <- !is.na(e2$Linked_TestDate)

# Remove individuals who reported having covid in CL1 but not in CL2
Mismatch <- (e2$Had_COVID > 0) & (!e2$S2_Had_COVID > 0)

# Replace NA with 0 in covidLife1And2Mismatch
Mismatch[is.na(Mismatch)] <- 0
e2 <- e2[!Mismatch, ] # only one person has been removed - 553 now 


# Read in appt table to extract all baseline appointment dates (not just those in CovidLife)
apptTable <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/COVID_update/2021-07-30_appt.csv")

apptToTargetTableIndex <- match(e2$Sample_Name, apptTable$id)
apptDateString <- apptTable[apptToTargetTableIndex, 'appt']

# Extract Date from first half of the timestamp
apptDate <- lapply(apptDateString, function(x) {as.Date(strsplit(x, ' ')[[1]][[1]], '%Y-%m-%d')})

# Use date from the following sources if available: linked test, antibd, swab. Else use an approximate date of 01/01/2021
covidDate <- lapply(1:nrow(e2), function(rowName) {
  row <- e2[rowName, ]
  if (!is.na(row$Linked_TestDate)) {
    as.Date(row$Linked_TestDate, '%Y-%m-%d')
  } else if (!is.na(row$S3_AntiBD_Year)) {
    as.Date(paste(row$S3_AntiBD_Year, row$S3_AntiBD_Month, row$S3_AntiBD_Day, sep = '/'), '%Y/%m/%d')
  } else if (!is.na(row$S3_SwabDate_Year)) {
    as.Date(paste(row$S3_SwabDate_Year, row$S3_SwabDate_Month, row$S3_SwabDate_Day, sep = '/'), '%Y/%m/%d')
  } else {
    as.Date('2021-01-01', '%Y-%m-%d')
  }
})

# Difference between appointment (baseline) date and covid date
covidApptDiff <- sapply(1:length(apptDate), function(i) {as.numeric(covidDate[[i]] - apptDate[[i]]) / 365})

######################################################

# Load the episcores d1 file that is rank transformed 
d1 <- read.csv("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/COVID_update/d1_080321.csv", check.names = F)

# Join episcores file into those with covid 
d2 <- left_join(e2, d1, by = "Sample_Name")
dim(d2)

# > dim(d2)
# [1] 553 251

# How many individuals have episcore data 
table(is.na(d2$SMPD1))
# 269 people have covid and episcores data 
table(d2$smr)
# 29 cases

######################################################

# Add covid appt difference onto baseline age
d2$covidAge <- d2$Age + covidApptDiff

# Assign difference as extra column
d2$covidDiff <- covidApptDiff

# filter to just smr cases 
d2_smr <- d2[which(d2$smr %in% "1"),] # 29 cases

# calculate mean difference 
mean <- mean(d2_smr$covidDiff, na.rm = T) # 11.864
sd <- sd(d2_smr$covidDiff) # 1.354

# find minimum follow up difference 


######################################################

library(glm2)

d2$smr <- as.factor(d2$smr)
results <- data.frame(episcore = "X", outcome = "X", n = "X", Beta = "X", SE = "X", p = "X")
markers <- colnames(d2)[133:242]
outcome <- "hospitalised covid"

for (i in 1:110){
     name <- as.character(markers[i])
     m <- glm(smr ~ scale(d2[,name]) + scale(covidAge) + factor(Female), data = d2, 
     family = binomial)

     Beta <- coef(summary(m))[2,1]
     SE <- coef(summary(m))[2,2]
     p <- coef(summary(m))[2,4]
     n <- nobs(m)

     results[i,1] <- name
     results[i,2] <- outcome
     results[i,3] <- n
     results[i,4] <- Beta
     results[i,5] <- SE
     results[i,6] <- p

     print(name)
     print(outcome)
}


# Order results by P 
result <- results[order(results$p),]

top <- result[which(result$p < 0.05),] # 6 associations - which dont have the trigger for singular fits 



# Add annotation context 
anno <- read_excel("/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/COVID_update/Annotations_for_reference.xlsx")
anno <- as.data.frame(anno)
anno <- anno[c(1,4,18,13)]
names(anno)[1] <- "episcore"

result <- left_join(result, anno, by = "episcore")

result <- result[-which(result$episcore == "IL.12B"),]

# Write off results file 
write.csv(result, "/Cluster_Filespace/Marioni_Group/Danni/LBC_proteins_Jan2021/COVID_update/results/result_glm_hospitalisations_041121.csv", row.names = F)

back to top