BrainAge_VidalPineiro_UKB_Tests.Rmd
---
title"BrainAge_VidalPineiro_Tests(early life v. aging) (XGB & LASSO)"
author: "dvp"
date: "11/2/2020"
output:
html_document: default
pdf_document: default
---
# Setup
```{r setup, include=FALSE}
#install.packages("~/R/imported_packages/3.6.0/itsadug_2.3.tar.gz", repos = NULL, type = "source")
knitr::opts_chunk$set(echo = TRUE)
library(sgof)
library(tidyverse)
library(magrittr)
library(lme4)
library(mgcv)
library(GGally)
library(itsadug)
library(lavaan)
library(reshape2)
library(lmerTest)
library(MuMIn)
library(fastICA)
data_folder=file.path("./data/BrainAge",paste0("noExcl_scaled"))
pgs_folder="./data-raw/BrainAge/UKB/PGS"
options(bitmapType = "cairo")
# update non-MRI data
raw_folder="./data-raw/BrainAge/UKB/tabulated_data"
```
# Loading data and common Preproc
load data
```{r load}
load(file.path(data_folder, "ResultsBrainAge_XGB_LASSO.Rda"))
df.update = read.csv(file.path(raw_folder, "ID_all_earlylife.csv"))
df.update2 = read.csv(file.path(raw_folder, "ID_all_noMRI2.csv"))
df.update = left_join(df.update, df.update2)
df.update = df.update[,!names(df.update) %in% names(df.Test)[-1]]
df.Test %<>% left_join(., df.update)
tmp = df.Test %>% group_by(eid) %>% summarise(AgeBsl = min(age, na.rm = T))
# compute age at Bsl
df.Test = left_join(df.Test, tmp) %>%
mutate(Time = age - AgeBsl,
BAc = age + BAG_corr_gam, # BA (corrected) # we use GAM_corrected as default
LASSO.BAc = age + LASSO.BAG_corr_gam, # BA (corrected) # we use GAM_corrected as default
Center = UK_Biobank_assessment_centre_c_54.3.0,
BirthWeight_unf = if_else(is.na(Birth_weight_c_20022.1.0),
Birth_weight_c_20022.0.0,
Birth_weight_c_20022.1.0),
MultipleBirth = if_else(is.na(Part_of_a_multiple_birth_c_1777.1.0),
Part_of_a_multiple_birth_c_1777.0.0,
Part_of_a_multiple_birth_c_1777.1.0),
BirthWeightNormal = if_else(BirthWeight_unf < 2.5 | BirthWeight_unf > 4.5,NaN, BirthWeight_unf))
```
# 1) Aging Hypothesis
###Prepare data
```{r pivot_wider}
# Select Variables of Interest
tmp = df.Test %>%
dplyr::select(eid,
age,
wave,
AgeBsl,
sex,
Center.Newcastle,
eICV,
BAG_corr_gam,
LASSO.BAG_corr_gam,
BAG_tp,
BirthWeightNormal
)
df.Test.long = pivot_wider(tmp,
names_from = wave,
values_from = c(age, BAG_corr_gam,LASSO.BAG_corr_gam,BAG_tp,eICV)) %>%
mutate(Time = age_2 - age_1,
eICV = .5*(eICV_1 + eICV_2),
LASSO.BAG_change_T = (LASSO.BAG_corr_gam_2 - LASSO.BAG_corr_gam_1)/(Time),
BAG_change_T = (BAG_corr_gam_2 - BAG_corr_gam_1)/(Time),
LASSO.BAG_mean = .5*(LASSO.BAG_corr_gam_2 + LASSO.BAG_corr_gam_1),
BAG_mean = .5*(BAG_corr_gam_2 + BAG_corr_gam_1),
BAGtp_change_T = (BAG_tp_2 - BAG_tp_1)/(Time),
BAGtp_mean = .5*(BAG_tp_2 + BAG_tp_1))
```
### a) Correlation between T1 and T2 delta
```{r BrainAging with Time}
cor(df.Test.long$BAG_corr_gam_2,df.Test.long$BAG_corr_gam_1)
#cor(df.Test.long$LASSO.BAG_corr_gam_2,df.Test.long$LASSO.BAG_corr_gam_1) # Same for LASSO if wanted
#cor(df.Test$BAG_corr_gam,df.Test$LASSO.BAG_corr_gam) # correlation between LASSO and XGB
ggplot(df.Test.long, aes(BAG_corr_gam_2,BAG_corr_gam_1)) +
geom_point() +
geom_smooth(method = "lm", color = "blue") +
geom_smooth(color = "red") +
geom_abline(intercept = 0,
slope = 1,
colour = "grey60",
linetype = 4)
```
### b) mean Brain aging GAP (GAP) predicts brain aging?
```{r Brain aging to aging2}
#Does the Brain Aging GAP predict Aging?
## BAG (Brain Age Gap bias-corrected at tp1 predicts less Brain Aging Change with time)
lm.pred = lm(BAG_change_T ~ BAG_mean + AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.pred)
#uve
lm.pred_rm = lm(BAG_change_T ~ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.pred)$r.squared - summary(lm.pred_rm)$r.squared
# visually check assumptions
plot(lm.pred)
lm.pred.LASSO = lm(LASSO.BAG_change_T ~ LASSO.BAG_mean + AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.pred.LASSO)
#uve
lm.pred_rm = lm(LASSO.BAG_change_T ~ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.pred.LASSO)$r.squared - summary(lm.pred_rm)$r.squared
# bag as tp1 and tp2 modelled as two different datasets
lm.pred.tp = lm(BAGtp_change_T ~ BAGtp_mean + AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.pred.tp)
```
```{r Plot Figure 2a-b. Brain Age to Brain Aging}
colorscale <- c('#edf8fb','#ccece6','#99d8c9','#66c2a4','#2ca25f','#006d2c')
#colorscale = c('#edf8fb','#bfd3e6','#9ebcda','#8c96c6','#8856a7','#810f7c')
#"#66a61e" "#1b9e77"
# UKB - Extreme Bossting
gs = ggplot(data = df.Test.long,
mapping = aes(x = BAG_mean,
y = BAG_change_T)) +
geom_point(shape = 21, color = "black", fill = "#66a61e", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#1b9e77",
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Brain Age Delta"[long])) +
xlab(expression("Brain Age Delta"[cross])) +
ggtitle("UKB - XGB")
ggsave("figures/BrainAge_XGB.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
# UKB - LASSO
gs = ggplot(data = df.Test.long,
mapping = aes(x = LASSO.BAG_mean,
y = LASSO.BAG_change_T)) +
geom_point(shape = 21, color = "black", fill = "#66a61e", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#1b9e77",
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Brain Age Delta"[long])) +
xlab(expression("Brain Age Delta"[cross])) +
ggtitle("UKB - LASSO")
ggsave("figures/BrainAge_LASSO.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
```
# 2) Early life
### a) main test
```{r lme BirthWeight Normal}
# info with birth weight in the test sample
sum(!is.na(df.Test$BirthWeightNormal))/2
sum(!is.na(df.Test$BirthWeight_unf))/2
lm.main = lmer(BAG_corr_gam ~ Time*BirthWeightNormal + AgeBsl + sex + scale(eICV) + Center.Newcastle + (1 | eid), data = df.Test, REML = T)
summary(lm.main)
# UVE - unique variance explained
lm.main_rm = lmer(BAG_corr_gam ~ Time + AgeBsl + sex + scale(eICV) + Center.Newcastle + (1 | eid), data = df.Test, REML = T)
r.squaredGLMM(lm.main) -r.squaredGLMM(lm.main_rm)
# visually check assumptions
plot(lm.main)
qqnorm(residuals(lm.main))
plot(lm.main@frame$BirthWeightNormal,residuals(lm.main))
lm.no_mb = lmer(BAG_corr_gam ~ Time*BirthWeightNormal + AgeBsl + sex + scale(eICV) + Center.Newcastle + (1 | eid), data = df.Test %>% filter(MultipleBirth == 0), REML = T)
summary(lm.no_mb)
lm.LASSO = lmer(LASSO.BAG_corr_gam ~ Time*BirthWeightNormal + AgeBsl + sex + scale(eICV) + Center.Newcastle + (1 | eid), data = df.Test, REML = T)
summary(lm.LASSO)
# UVE - unique variance explained
lm.main_rm = lmer(LASSO.BAG_corr_gam ~ Time + AgeBsl + sex + scale(eICV) + Center.Newcastle + (1 | eid), data = df.Test, REML = T)
r.squaredGLMM(lm.LASSO) -r.squaredGLMM(lm.main_rm)
# UKB - Extreme Bossting
gs = ggplot(data = df.Test %>% filter(wave == 1),
mapping = aes(x = BirthWeightNormal,
y = LASSO.BAG_corr_gam)) +
geom_point(shape = 21, color = "black", fill = '#810f7c', size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = '#8856a7',
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab("Brain Age Delta") +
xlab("Birth Weight") +
ggtitle("UKB - XGB")
ggsave("figures/BirthWeight_XGB.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
# UKB - LASSO
gs = ggplot(data = df.Test %>% filter(wave == 1),
mapping = aes(x = BirthWeightNormal,
y = LASSO.BAG_corr_gam)) +
geom_point(shape = 21, color = "black", fill = '#810f7c', size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = '#8856a7',
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab("Brain Age Delta") +
xlab("Birth Weight") +
ggtitle("UKB - LASSO")
ggsave("figures/BirthWeight_LASSO.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
```
### b) robust birt weight
```{r Birth Weight grid}
BW.range = df.Test$BirthWeight_unf %>% range(., na.rm = T)
lpf = seq(round(BW.range[1],1), 2.7, by = .025)
hpf = seq(4.2, signif(BW.range[2],2), by = .025)
# horrible code :(
if (!file.exists(file.path(data_folder, "ResultsBirthWeightGrid.Rda"))) {
df.grid = NULL
for (i in lpf) {
for (j in hpf) {
tmp = lmer(BAG_corr_gam ~ Time*BirthWeight_unf + AgeBsl + sex + scale(eICV) + Center.Newcastle + (1 | eid),
data = df.Test %>% filter(BirthWeight_unf > i & BirthWeight_unf < j),
REML = T)
tmp.2 = summary(tmp)$coefficients
df.grid = rbind(df.grid, c(tmp.2[rownames(tmp.2) == "BirthWeight_unf",],lpf=i,hpf=j))
}
}
df.grid %<>% as.data.frame()
names(df.grid)[5] = "p.value"
df.grid %<>% mutate(Estimate.filt = if_else(p.value <.05, Estimate, NaN))
save(df.grid, file = file.path(data_folder, "ResultsBirthWeightGrid.Rda"))
} else {
load(file.path(data_folder, "ResultsBirthWeightGrid.Rda"))
}
gs = ggplot(df.grid, aes(x =lpf, y =hpf)) +
geom_raster(mapping = aes(fill = Estimate))+
scale_fill_viridis_c(limits = c(min(df.grid$Estimate),0)) +
scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0)) +
theme_classic() +
theme(axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0),
legend.title = element_text(size = 20),
legend.text = element_text(size =16, hjust = 1)) +
xlab("min Birth Weight") +
ylab("max Birth Weight") +
labs(fill =expression(paste(beta,"eta")))
ggsave("figures/BirthWeight_grid.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
```
### c) delta change with weight - reviewer ammendment
```{r}
gs = ggplot(data = df.Test.long,
mapping = aes(x = BirthWeightNormal,
y = BAG_change_T)) +
geom_point(shape = 21, color = "black", fill = '#810f7c', size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = '#8856a7',
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Brain Age Delta"[long])) +
xlab("Birth Weight") +
ggtitle("UKB - XGB")
ggsave("figures/BirthWeight_XGB_change.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
# UKB - LASSO
gs = ggplot(data = df.Test.long,
mapping = aes(x = BirthWeightNormal,
y = LASSO.BAG_change_T)) +
geom_point(shape = 21, color = "black", fill = '#810f7c', size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = '#8856a7',
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Brain Age Delta"[long])) +
xlab("Birth Weight") +
ggtitle("UKB - LASSO")
ggsave("figures/BirthWeight_LASSO_change.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
```
# 3) PGS data
### merged pgs data to TEST dataset
```{r PGS load}
pgsBA_folder = file.path(pgs_folder, "BrainAge")
df.pgsBA = read.table(file.path(pgsBA_folder,"autosome_pst_eff_a1_b0.5_phi1e-02_BAG_corr_gam.profile"),
header = T)
df.pgsBA.eigenvec = read.table(file.path(pgsBA_folder, "Longi_test_autosome_plink_snpsonly_rmdup.eigenvec"),
header = F)
df.pgsBA %<>% dplyr::select(FID, SCORE) %>%
mutate(SCORE = scale(SCORE))
names(df.pgsBA) = c("eid", "pgsBA")
names(df.pgsBA.eigenvec) = c("eid", "tmp", paste0("pgsBAIC_",str_pad(seq(1:20),2, pad = "0")))
df.pgsBA.eigenvec %<>% dplyr::select(eid, starts_with("pgsBAIC"))
df.Test %<>% left_join(., df.pgsBA) %>% left_join(., df.pgsBA.eigenvec)
```
### is PGS-BA data associated with BAG
```{r PGS}
#n: sum(!is.na(df.Test$pgsBA))/2
lm.main.PGS = lmer(BAG_corr_gam ~ Time*pgsBA +
AgeBsl +
sex +
scale(eICV) +
Center.Newcastle +
scale(pgsBAIC_01) +
scale(pgsBAIC_02) +
scale(pgsBAIC_03) +
scale(pgsBAIC_04) +
scale(pgsBAIC_05) +
scale(pgsBAIC_06) +
scale(pgsBAIC_07) +
scale(pgsBAIC_08) +
scale(pgsBAIC_09) +
scale(pgsBAIC_10) +
(1 | eid),
data = df.Test, REML = T)
summary(lm.main.PGS)
# UVE - unique variance explained
lm.main.PGS_rm = lmer(BAG_corr_gam ~ Time +
AgeBsl +
sex +
scale(eICV) +
Center.Newcastle +
scale(pgsBAIC_01) +
scale(pgsBAIC_02) +
scale(pgsBAIC_03) +
scale(pgsBAIC_04) +
scale(pgsBAIC_05) +
scale(pgsBAIC_06) +
scale(pgsBAIC_07) +
scale(pgsBAIC_08) +
scale(pgsBAIC_09) +
scale(pgsBAIC_10) +
(1 | eid),
data = df.Test, REML = T)
r.squaredGLMM(lm.main.PGS) -r.squaredGLMM(lm.main.PGS_rm)
# PC from UKB
lm.main.PGS.PCA.UKB = lmer(BAG_corr_gam ~ Time*pgsBA +
AgeBsl +
sex +
scale(eICV) +
Center.Newcastle +
scale(Genetic_principal_components_c_22009.0.1) +
scale(Genetic_principal_components_c_22009.0.2) +
scale(Genetic_principal_components_c_22009.0.3) +
scale(Genetic_principal_components_c_22009.0.4) +
scale(Genetic_principal_components_c_22009.0.5) +
scale(Genetic_principal_components_c_22009.0.6) +
scale(Genetic_principal_components_c_22009.0.7) +
scale(Genetic_principal_components_c_22009.0.8) +
scale(Genetic_principal_components_c_22009.0.9) +
scale(Genetic_principal_components_c_22009.0.10) +
(1 | eid),
data = df.Test, REML = T)
summary(lm.main.PGS.PCA.UKB)
gs = ggplot(data = df.Test %>% filter(wave == 1),
mapping = aes(x = pgsBA,
y = BAG_corr_gam)) +
geom_point(shape = 21, color = "black", fill = '#810f7c', size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = '#8856a7',
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab("Brain Age Delta") +
xlab("PGS - BA") +
ggtitle("UKB - XGB")
ggsave("figures/pgs_XGB.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
lm.main.PGS.LASSO = lmer(LASSO.BAG_corr_gam ~ Time*pgsBA +
AgeBsl +
sex +
scale(eICV) +
Center.Newcastle +
scale(pgsBAIC_01) +
scale(pgsBAIC_02) +
scale(pgsBAIC_03) +
scale(pgsBAIC_04) +
scale(pgsBAIC_05) +
scale(pgsBAIC_06) +
scale(pgsBAIC_07) +
scale(pgsBAIC_08) +
scale(pgsBAIC_09) +
scale(pgsBAIC_10) +
(1 | eid),
data = df.Test, REML = T)
summary(lm.main.PGS.LASSO)
# UVE - unique variance explained
lm.main.PGS_rm = lmer(LASSO.BAG_corr_gam ~ Time +
AgeBsl +
sex +
scale(eICV) +
Center.Newcastle +
scale(pgsBAIC_01) +
scale(pgsBAIC_02) +
scale(pgsBAIC_03) +
scale(pgsBAIC_04) +
scale(pgsBAIC_05) +
scale(pgsBAIC_06) +
scale(pgsBAIC_07) +
scale(pgsBAIC_08) +
scale(pgsBAIC_09) +
scale(pgsBAIC_10) +
(1 | eid),
data = df.Test, REML = T)
r.squaredGLMM(lm.main.PGS.LASSO) -r.squaredGLMM(lm.main.PGS_rm)
# PC from UKB
lm.main.PGS.PCA.UKB = lmer(LASSO.BAG_corr_gam ~ Time*pgsBA +
AgeBsl +
sex +
scale(eICV) +
Center.Newcastle +
scale(Genetic_principal_components_c_22009.0.1) +
scale(Genetic_principal_components_c_22009.0.2) +
scale(Genetic_principal_components_c_22009.0.3) +
scale(Genetic_principal_components_c_22009.0.4) +
scale(Genetic_principal_components_c_22009.0.5) +
scale(Genetic_principal_components_c_22009.0.6) +
scale(Genetic_principal_components_c_22009.0.7) +
scale(Genetic_principal_components_c_22009.0.8) +
scale(Genetic_principal_components_c_22009.0.9) +
scale(Genetic_principal_components_c_22009.0.10) +
(1 | eid),
data = df.Test, REML = T)
summary(lm.main.PGS.PCA.UKB)
gs = ggplot(data = df.Test %>% filter(wave == 1),
mapping = aes(x = pgsBA,
y = LASSO.BAG_corr_gam)) +
geom_point(shape = 21, color = "black", fill = '#810f7c', size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = '#8856a7',
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab("Brain Age Delta") +
xlab("PGS - BA") +
ggtitle("UKB - LASSO")
ggsave("figures/pgs_LASSO.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
```
### c) is PGS related to deltalong - reviewer ammendment
```{r}
df.Test.long = left_join(df.Test.long,
df.Test %>%
select(eid, pgsBA) %>%
group_by(eid) %>%
summarize(pgsBA = mean(pgsBA)))
gs = ggplot(data = df.Test.long,
mapping = aes(x = pgsBA,
y = BAG_change_T)) +
geom_point(shape = 21, color = "black", fill = '#810f7c', size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = '#8856a7',
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Brain Age Delta"[long])) +
xlab("PGS - BA") +
ggtitle("UKB - XGB")
ggsave("figures/pgs_XGB_change.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
# UKB - LASSO
gs = ggplot(data = df.Test.long,
mapping = aes(x = BirthWeightNormal,
y = LASSO.BAG_change_T)) +
geom_point(shape = 21, color = "black", fill = '#810f7c', size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = '#8856a7',
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Brain Age Delta"[long])) +
xlab("PGS - BA") +
ggtitle(ggtitle("UKB - LASSO"))
ggsave("figures/pgs_XGB_changelasso.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
```
# 4) PCA/feature change
### a) feature change
```{r PCA_ICA - compute feature change}
# open harmonized features
df.Harmonize = read.csv("data/Harmonize.csv", stringsAsFactors = F) %>%
mutate(feature = UKB,
modality = as.factor(Stats_file)) %>%
dplyr::select(feature, modality)
# open features of interest (if not loaded)
load(file.path(data_folder,"T1w_vars.Rda"))
# compute yearly change in features (not that features are already scaled)
tmp = df.Test %>%
dplyr::select(eid,
wave,
age,
T1w_vars)
df.Test.long.f = pivot_wider(tmp,
names_from = wave,
values_from = c(T1w_vars, "age")) %>%
mutate(Time = age_2 - age_1)
for (i in T1w_vars) {
j = paste0(i,"_2")
k = paste0(i,"_1")
df.Test.long.f %<>% mutate(!!paste0(i) := (!!as.name(j) - !!as.name(k))/Time)
}
# create data.frame with change
tmp = df.Test.long.f %>% dplyr::select(T1w_vars) %>% lapply(., t.test)
df.feature.change = data.frame(feature = names(tmp),
p.value = lapply(tmp, function(x) x$p.value) %>% simplify2array(),
statistic = lapply(tmp, function(x) x$statistic) %>% simplify2array(),
estimate = lapply(tmp, function(x) x$estimate) %>% simplify2array()) %>%
mutate(logp = -log10(p.value),
logp.sig = if_else(estimate > 0, logp, -logp))
bf.thr = -log10(0.05/length(T1w_vars))
sum(df.feature.change$logp > bf.thr)/length(T1w_vars)
# join with harmonize
df.feature.change = left_join(df.feature.change,df.Harmonize)
# preparations for the figure
tmp =df.feature.change %>%
group_by(modality) %>%
summarise(logp =mean(logp)) %>%
mutate(order = -rank(logp)) %>%
dplyr::select(-logp)
df.feature.change = left_join(df.feature.change, tmp) %>%
arrange(order, -logp) %>%
mutate(order2 = 1:length(T1w_vars))
df.feature.change$modality =
plyr::mapvalues(df.feature.change$modality,
from =c("Area_Aparc","GWC_Aparc","Intensity_Aseg","Thickness_Aparc","Volume_Aseg","Volume_Aparc"),
to = c("area (c)", "gwc (c)","intensity (s)","thickness (c)", "volume (s)", "volume (c)"))
colorscale =c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')
gs = ggplot(df.feature.change, aes(x = order2, y = logp, group = modality, fill = modality)) +
geom_point(shape = 21, size = 3, alpha = .1) +
geom_point(data = df.feature.change %>% filter(logp > bf.thr), shape = 21, size = 3) +
geom_hline(yintercept = bf.thr, linetype = 3, color ="grey40",size = 1.5) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(5,"point"),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
xlab("Feature") +
ylab(expression("Longitudinal Change [-log"[10]*"(p)")) +
scale_fill_manual(values = colorscale) +
scale_y_continuous(trans = "sqrt", breaks = c(1.3,6,20,50,100)) +
ggtitle("UKB")
ggsave("figures/feature_change.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
write.csv(df.feature.change, file = "figures/feature_change.csv")
# append change in df.Test.long dataset
df.Test.long %<>% left_join(.,df.Test.long.f %>% dplyr::select(eid, T1w_vars))
```
### b) Feat.v.delta_0
```{r feature and delta}
# compute relationship between brain age delta cross and feature change
mod = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lm(change ~ BAG_mean+ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = .))
# compute rel. with LASSO
mod_lasso = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lm(change ~ LASSO.BAG_mean+ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = .))
# for computing uve
mod_uve = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lm(change ~ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = .))
df.feature2delta_mean = data.frame(feature = mod$feature,
estimate.xgb =lapply(mod$fit, function(x) summary(x)$coefficients["BAG_mean","Estimate"]) %>% simplify2array(),
p.value.xgb =lapply(mod$fit, function(x) summary(x)$coefficients["BAG_mean","Pr(>|t|)"]) %>% simplify2array(),
uve.xgb = lapply(mod$fit, function(x) summary(x)$r.squared) %>% simplify2array() -
lapply(mod_uve$fit, function(x) summary(x)$r.squared) %>% simplify2array(),
estimate.lasso =lapply(mod_lasso$fit, function(x) summary(x)$coefficients["LASSO.BAG_mean","Estimate"]) %>% simplify2array(),
p.value.lasso =lapply(mod_lasso$fit, function(x) summary(x)$coefficients["LASSO.BAG_mean","Pr(>|t|)"]) %>% simplify2array(),
uve.lasso = lapply(mod_lasso$fit, function(x) summary(x)$r.squared) %>% simplify2array() -
lapply(mod_uve$fit, function(x) summary(x)$r.squared) %>% simplify2array()) %>%
mutate(logp.xgb = -log10(p.value.xgb),
logp.sig.xgb = if_else(estimate.xgb > 0, logp.xgb, -logp.xgb),
logp.lasso = -log10(p.value.lasso),
logp.sig.lasso = if_else(estimate.lasso > 0, logp.lasso, -logp.lasso))
bf.thr = -log10(0.05/length(T1w_vars))
sum(df.feature2delta_mean$logp.xgb > bf.thr)
sum(df.feature2delta_mean$logp.lasso > bf.thr)
df.feature2delta_mean = left_join(df.feature.change %>% dplyr::select(feature,
modality,
statistic,
estimate,
logp),
df.feature2delta_mean,
by = "feature")
## prepare plot XGB
tmp =df.feature2delta_mean %>%
group_by(modality) %>%
summarise(logp.xgb =mean(logp.xgb)) %>%
mutate(order = -rank(logp.xgb)) %>%
dplyr::select(-logp.xgb)
df.feature2delta_mean_plot = left_join(df.feature2delta_mean,
tmp) %>%
arrange(order, -logp.sig.xgb) %>%
mutate(order2 = 1:length(T1w_vars))
colorscale =c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')
gs = ggplot(df.feature2delta_mean_plot,
aes(x = order2,
y = logp.sig.xgb,
group = modality,
fill = modality)) +
geom_point(shape = 21,
size = 3,
alpha = .1) +
geom_point(data = df.feature2delta_mean_plot %>% filter(logp.xgb > bf.thr),
shape = 21,
size = 3) +
geom_hline(yintercept = bf.thr,
linetype = 3,
color ="grey40",
size = 1.5) +
geom_hline(yintercept = -bf.thr, linetype = 3, color ="grey40",size = 1.5) +
geom_hline(yintercept = 1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
geom_hline(yintercept = -1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(5,"point"),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
xlab("Feature") +
ylab(expression("feature change vs. delta"[cross]*" [-log"[10]*"(p)")) +
scale_fill_manual(values = colorscale) +
ggtitle("UKB - XGB")
ggsave("figures/feature_change_v_delta_XGB.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
## prepare plot XGB
tmp =df.feature2delta_mean %>%
group_by(modality) %>%
summarise(logp.lasso =mean(logp.lasso)) %>%
mutate(order = -rank(logp.lasso)) %>%
dplyr::select(-logp.lasso)
df.feature2delta_mean_plot = left_join(df.feature2delta_mean,
tmp) %>%
arrange(order, -logp.sig.lasso) %>%
mutate(order2 = 1:length(T1w_vars))
gs = ggplot(df.feature2delta_mean_plot,
aes(x = order2,
y = logp.sig.lasso,
group = modality,
fill = modality)) +
geom_point(shape = 21,
size = 3,
alpha = .1) +
geom_point(data = df.feature2delta_mean_plot %>% filter(logp.lasso > bf.thr),
shape = 21,
size = 3) +
geom_hline(yintercept = bf.thr,
linetype = 3,
color ="grey40",
size = 1.5) +
geom_hline(yintercept = -bf.thr, linetype = 3, color ="grey40",size = 1.5) +
geom_hline(yintercept = 1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
geom_hline(yintercept = -1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(5,"point"),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
xlab("Feature") +
ylab(expression("feature change vs. delta"[cross]*" [-log"[10]*"(p)")) +
scale_fill_manual(values = colorscale) +
ggtitle("UKB - LASSO")
ggsave("figures/feature_change_v_delta_LASSO.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
write.csv(df.feature2delta_mean, file = "figures/feature_change2delta_mean.csv")
```
### c) Feat v. delta change
```{r feature and delta change}
# compute relationship between brain age delta change and feature change
mod = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lm(change ~ BAG_change_T+ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = .))
# compute rel. with LASSO
mod_lasso = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lm(change ~ LASSO.BAG_change_T+ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = .))
# for computing uve
mod_uve = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lm(change ~ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = .))
df.feature2delta_change = data.frame(feature = mod$feature,
estimate.xgb =lapply(mod$fit, function(x) summary(x)$coefficients["BAG_change_T","Estimate"]) %>% simplify2array(),
p.value.xgb =lapply(mod$fit, function(x) summary(x)$coefficients["BAG_change_T","Pr(>|t|)"]) %>% simplify2array(),
uve.xgb = lapply(mod$fit, function(x) summary(x)$r.squared) %>% simplify2array() -
lapply(mod_uve$fit, function(x) summary(x)$r.squared) %>% simplify2array(),
estimate.lasso =lapply(mod_lasso$fit, function(x) summary(x)$coefficients["LASSO.BAG_change_T","Estimate"]) %>% simplify2array(),
p.value.lasso =lapply(mod_lasso$fit, function(x) summary(x)$coefficients["LASSO.BAG_change_T","Pr(>|t|)"]) %>% simplify2array(),
uve.lasso = lapply(mod_lasso$fit, function(x) summary(x)$r.squared) %>% simplify2array() -
lapply(mod_uve$fit, function(x) summary(x)$r.squared) %>% simplify2array()) %>%
mutate(logp.xgb = -log10(p.value.xgb),
logp.sig.xgb = if_else(estimate.xgb > 0, logp.xgb, -logp.xgb),
logp.lasso = -log10(p.value.lasso),
logp.sig.lasso = if_else(estimate.lasso > 0, logp.lasso, -logp.lasso))
bf.thr = -log10(0.05/length(T1w_vars))
sum(df.feature2delta_change$logp.xgb > bf.thr)/length(T1w_vars)
sum(df.feature2delta_change$logp.lasso > bf.thr)/length(T1w_vars)
df.feature2delta_change = left_join(df.feature.change %>% dplyr::select(feature,
modality,
statistic,
estimate,
logp),
df.feature2delta_change,
by = "feature")
## prepare plot XGB
tmp =df.feature2delta_change %>%
group_by(modality) %>%
summarise(logp.xgb =mean(logp.xgb)) %>%
mutate(order = -rank(logp.xgb)) %>%
dplyr::select(-logp.xgb)
df.feature2delta_change_plot = left_join(df.feature2delta_change,
tmp) %>%
arrange(order, -logp.sig.xgb) %>%
mutate(order2 = 1:length(T1w_vars))
colorscale =c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')
gs = ggplot(df.feature2delta_change_plot,
aes(x = order2,
y = logp.sig.xgb,
group = modality,
fill = modality)) +
geom_point(shape = 21,
size = 3,
alpha = .1) +
geom_point(data = df.feature2delta_change_plot %>% filter(logp.xgb > bf.thr),
shape = 21,
size = 3) +
geom_hline(yintercept = bf.thr,
linetype = 3,
color ="grey40",
size = 1.5) +
geom_hline(yintercept = -bf.thr, linetype = 3, color ="grey40",size = 1.5) +
geom_hline(yintercept = 1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
geom_hline(yintercept = -1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(5,"point"),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
xlab("Feature") +
ylab(expression("feature change vs. delta"[long]*" [-log"[10]*"(p)")) +
scale_fill_manual(values = colorscale) +
ggtitle("UKB - XGB")
ggsave("figures/feature_change_v_delta_change_XGB.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
## prepare plot XGB
tmp =df.feature2delta_change %>%
group_by(modality) %>%
summarise(logp.lasso =mean(logp.lasso)) %>%
mutate(order = -rank(logp.lasso)) %>%
dplyr::select(-logp.lasso)
df.feature2delta_change_plot = left_join(df.feature2delta_change,
tmp) %>%
arrange(order, -logp.sig.lasso) %>%
mutate(order2 = 1:length(T1w_vars))
gs = ggplot(df.feature2delta_change_plot,
aes(x = order2,
y = logp.sig.lasso,
group = modality,
fill = modality)) +
geom_point(shape = 21,
size = 3,
alpha = .1) +
geom_point(data = df.feature2delta_change_plot %>% filter(logp.lasso > bf.thr),
shape = 21,
size = 3) +
geom_hline(yintercept = bf.thr,
linetype = 3,
color ="grey40",
size = 1.5) +
geom_hline(yintercept = -bf.thr, linetype = 3, color ="grey40",size = 1.5) +
geom_hline(yintercept = 1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
geom_hline(yintercept = -1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(5,"point"),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
xlab("Feature") +
ylab(expression("feature change vs. delta"[long]*" [-log"[10]*"(p)")) +
scale_fill_manual(values = colorscale) +
ggtitle("UKB - LASSO")
ggsave("figures/feature_change_v_delta_change_LASSO.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
write.csv(df.feature2delta_change, file = "figures/feature_change2delta_change.csv")
```
### d) PCA
```{r PCA on change}
# select only variables with significant change over time
T1w_change = df.feature.change$feature[df.feature.change$logp > bf.thr]
# prcomp
mod.pca = prcomp(df.Test.long[T1w_change], center =F)
df.PCA = data.frame(eid = df.Test.long.f$eid,
PC1 = mod.pca$x[,1]) %>%
mutate(PC1 = if_else(abs(PC1) > 10, NaN, PC1))
summary(mod.pca)$importance[,1:10]
#weights
df.pca.weights = data.frame(
features = rownames(mod.pca$rotation),
weight = mod.pca$rotation[,1])
write.csv(df.pca.weights, file = "figures/pca_weights.csv")
df.Test.long %<>% left_join(., df.PCA)
# basis model
lm.uve = lm(PC1 ~ AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.uve)
# mean xgb
lm.pred = lm(PC1 ~ BAG_mean + AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.pred)
summary(lm.pred)$r.squared - summary(lm.uve)$r.squared
# mean lasso
lm.pred.LASSO = lm(PC1 ~ LASSO.BAG_mean + AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.pred.LASSO)
summary(lm.pred.LASSO)$r.squared - summary(lm.uve)$r.squared
# change xgb
lm.predCh = lm(PC1 ~ BAG_change_T + AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.predCh)
summary(lm.predCh)$r.squared - summary(lm.uve)$r.squared
# change lasso
lm.pred.Ch.LASSO = lm(PC1 ~ LASSO.BAG_change_T + AgeBsl + sex + scale(eICV) + Center.Newcastle, data = df.Test.long)
summary(lm.pred.Ch.LASSO)
summary(lm.pred.Ch.LASSO)$r.squared - summary(lm.uve)$r.squared
```
#### PCA figures
```{r Plot Figure 2a-b. Brain Age to Brain Aging}
#ORANGE SCALE
#yellow scale
# UKB - Extreme Bossting
gs = ggplot(data = df.Test.long,
mapping = aes(x = BAG_mean,
y = PC1)) +
geom_point(shape = 21, color = "black", fill = "#EF820D", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#F05E23",
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Feature change (PC1)")) +
xlab(expression("Brain Age Delta"[cross])) +
ggtitle("UKB - XGB")
ggsave("figures/PC1_BrainAgeCross_XGB.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
# UKB - LASSO
gs = ggplot(data = df.Test.long,
mapping = aes(x = LASSO.BAG_mean,
y = PC1)) +
geom_point(shape = 21, color = "black", fill = "#EF820D", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#F05E23",
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Feature change (PC1)")) +
xlab(expression("Brain Age Delta"[long])) +
ggtitle("UKB - LASSO")
ggsave("figures/PC1_BrainAgeCross_LASSO.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
# UKB - Extreme Bossting
gs = ggplot(data = df.Test.long,
mapping = aes(x = BAG_change_T,
y = PC1)) +
geom_point(shape = 21, color = "black", fill = "#FCF4A3", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#F8DE7E",
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Feature change (PC1)")) +
xlab(expression("Brain Age Delta"[long])) +
ggtitle("UKB - XGB")
ggsave("figures/PC1_BrainAgeLong_UKB.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
# UKB - LASSO
gs = ggplot(data = df.Test.long,
mapping = aes(x = LASSO.BAG_change_T,
y = PC1)) +
geom_point(shape = 21, color = "black", fill = "#FCF4A3", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#F8DE7E",
size = 2.3,
level = .999) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Feature change (PC1)")) +
xlab(expression("Brain Age Delta"[long])) +
ggtitle("UKB - LASSO")
ggsave("figures/PC1_BrainAgeLong_LASSO.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
```
# save raw data to reproduce plots
```{r save raw data for plots}
load(file = file.path(data_folder,"age.effects.Rda"))
remake_plots = list()
remake_plots[["fig1c"]] =
data.frame(age = c(df.Train$age %>% jitter(0.1),
df.Test$age %>% jitter(0.1)),
set = c(length(df.Train$age),
length(df.Test$age)))
remake_plots[["fig1d"]] =
df.age
remake_plots[["fig1e"]] =
data.frame(age = df.Train$age %>% jitter(0.1),
BA = df.Train$BA)
remake_plots[["fig1f"]] =
data.frame(age = df.Test$age %>% jitter(0.1),
BA = df.Test$BA)
remake_plots[["fig2a"]] =
data.frame(
BAG_mean = df.Test.long$BAG_mean,
BAG_change_T = df.Test.long$BAG_change_T
)
remake_plots[["fig2b"]] =
data.frame(
LASSO.BAG_mean = df.Test.long$LASSO.BAG_mean,
LASSO.BAG_change_T = df.Test.long$LASSO.BAG_change_T
)
remake_plots[["fig3a"]] =
data.frame(BirthWeightNormal = df.Test %>% filter(wave == 1) %>% .$BirthWeightNormal,
BAG_corr_gam = df.Test %>% filter(wave == 1) %>% .$BAG_corr_gam)
remake_plots[["fig3b"]] =
data.frame(BirthWeightNormal = df.Test %>% filter(wave == 1) %>% .$BirthWeightNormal,
LASSO.BAG_corr_gam = df.Test %>% filter(wave == 1) %>% .$LASSO.BAG_corr_gam)
remake_plots[["fig3c"]] =
data.frame(
pgsBA = df.Test %>% filter(wave == 1) %>% .$pgsBA,
BAG_corr_gam = df.Test %>% filter(wave == 1) %>% .$BAG_corr_gam)
save(remake_plots,
file = "figures/remake_plots.Rda")
```