BrainAge_VidalPineiro_Lifebrain_Tests.Rmd
---
title: "BrainAge_VidalPineiro_Tests(Lifebrain replication)"
author: "dvp"
date: "11/2/2020"
output:
html_document: default
pdf_document: default
---
# Setup
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(magrittr)
#library(itsadug)
#library(lavaan)
#library(reshape2)
library(broom)
library(lmerTest)
library(MuMIn)
data_folder=file.path("./data/LifeBrain",paste0("noExcl_resCohort_outlierremove"))
options(bitmapType = "cairo")
```
# Loading data and common Preproc
load data
```{r load}
load(file.path(data_folder, "ResultsBrainAge.Rda"))
df.Test = df.Test[[1]]
df.Test %<>% mutate(BAc = age + BAG_corr_gam)
```
# Compute change
Delta considered as the GAM-coefficient corrected GAP
Change in Delta derived from linear models
```{r compute delta change}
# compute interceptect (demeaned) and slope
tmp.BAG_corr = df.Test %>%
group_by(CrossProject_ID) %>%
mutate(Time = Time - mean(Time)) %>%
do(fit = lm(BAG_corr_gam ~ Time, data = .))
tmp.BAG_corr$df = lapply(tmp.BAG_corr$fit, tidy)
df.lm = data.frame(CrossProject_ID = tmp.BAG_corr$CrossProject_ID,
BAG_corr_mean = lapply(tmp.BAG_corr$df,
function(x) x$estimate[[1]]) %>% simplify2array(),
BAG_corr_change = lapply(tmp.BAG_corr$df,
function(x) x$estimate[[2]]) %>% simplify2array())
df.lm2 = df.Test %>%
group_by(CrossProject_ID) %>%
summarise(sex = first(sex),
AgeBsl = first(AgeBsl),
cohort = first(cohort),
eICV = mean(Vol_EstimatedTotalIntraCranial_wb),
AgeGap = first(AgeGap),
n = n())
df.Test.long = left_join(df.lm, df.lm2)
```
# Analysis
### a) Cross/sectional delta predicts brain aging
```{r Brain aging to aging}
## BAG (Brain Age Gap bias-corrected at tp1 predicts less Brain Aging Change with time)
lm.pred = lm(BAG_corr_change ~ BAG_corr_mean, data = df.Test.long) # uncorrected. just to check
summary(lm.pred)
# as in UKB
lm.main = lmer(BAG_corr_change ~ BAG_corr_mean + AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long, REML = T)
summary(lm.main)
# save residuals
df.Test.long$residuals = summary(lm.main)$residuals %>% as.numeric()
# UVE - unique variance explained
lm.main_rm = lmer(BAG_corr_change ~ AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long, REML = T)
r.squaredGLMM(lm.main) -r.squaredGLMM(lm.main_rm)
# visually check assumptions
plot(lm.main)
qqnorm(residuals(lm.main))
plot(lm.main@frame$BAG_corr_mean,residuals(lm.main))
# supplementary analysis controling for age gap
# control for agegap and restrict analysis to 4 years gap
lm.main_agegap1 = lmer(BAG_corr_change ~ BAG_corr_mean + AgeBsl + AgeGap + sex + scale(eICV) + (1 | cohort), data = df.Test.long, REML = T)
summary(lm.main_agegap1)
sum(df.lm2$AgeGap > 4)
lm.main_agegap2 = lmer(BAG_corr_change ~ BAG_corr_mean + AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long %>% filter(AgeGap > 4), REML = T)
summary(lm.main_agegap2)
```
### b) Fig. 2b. Plotting cross to long
```{r Plot cross to long}
# Lifebrain - XGB
gs = ggplot(data = df.Test.long,
mapping = aes(x = BAG_corr_mean,
y = residuals)) +
geom_point(shape = 21, color = "black", fill = "#66a61e", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#1b9e77",
size = 3) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Brain Age Delta"[long])) +
xlab(expression("Brain Age Delta"[cross])) +
ggtitle("Lifebrain - XGB")
ggsave("figures/BrainAge_Lifebrain.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
load("figures/remake_plots.Rda")
remake_plots[["fig2c"]] =
data.frame(
BAG_corr_mean = df.Test.long$BAG_corr_mean,
BAG_corr_change = df.Test.long$BAG_corr_change)
save(remake_plots,
file = "figures/remake_plots.Rda")
```
# 4) PCA/feature change
### a) feature change
```{r PCA_ICA - compute feature change}
# open features of interest (if not loaded)
load(file.path(data_folder,"T1w_vars.Rda"))
# open harmonized features
df.Harmonize = read.csv("data/Harmonize.csv", stringsAsFactors = F) %>%
mutate(feature = ConensusName,
modality = as.factor(Stats_file)) %>%
dplyr::select(feature, modality)
# compute features change
if (!file.exists(file.path(data_folder,"feature_change.Rda"))) {
tmp = df.Test %>%
dplyr::select(CrossProject_ID,
Time,
age,
BAG_corr_gam,
T1w_vars) %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "value") %>%
group_by(CrossProject_ID, feature) %>%
mutate(Time = Time - mean(Time)) %>%
do(fit = lm(value ~ Time, data = .))
change = lapply(tmp$fit, function(x) x$coefficients[["Time"]]) %>% simplify2array()
df.slope.feature = data.frame(CrossProject_ID = tmp$CrossProject_ID,
feature = tmp$feature,
change = change)
rm("tmp")
save(df.slope.feature, file = file.path(data_folder,"feature_change.Rda"))
} else {
load(file.path(data_folder,"feature_change.Rda"))
}
df.feature.change = pivot_wider(df.slope.feature,
names_from =feature,
values_from = change)
df.Test.long.f = left_join(df.Test.long, df.feature.change)
tmp = df.Test.long.f %>% dplyr::select(T1w_vars) %>% lapply(., t.test)
df.feature.change = data.frame(feature = names(tmp),
p.value = lapply(tmp, function(x) x$p.value) %>% simplify2array(),
statistic = lapply(tmp, function(x) x$statistic) %>% simplify2array(),
estimate = lapply(tmp, function(x) x$estimate) %>% simplify2array()) %>%
mutate(logp = -log10(p.value),
logp.sig = if_else(estimate > 0, logp, -logp))
#fdr.thr = -log10(BY(df.feature.change$p.value, alpha = 0.05)$FDR)
bf.thr = -log10(0.05/length(T1w_vars))
sum(df.feature.change$logp > bf.thr)/length(T1w_vars)
df.feature.change = left_join(df.feature.change,df.Harmonize)
tmp =df.feature.change %>%
group_by(modality) %>%
summarise(logp =mean(logp)) %>%
mutate(order = -rank(logp)) %>%
dplyr::select(-logp)
df.feature.change = left_join(df.feature.change, tmp) %>%
arrange(order, -logp) %>%
mutate(order2 = 1:length(T1w_vars))
df.feature.change$modality =
plyr::mapvalues(df.feature.change$modality,
from =c("Area_Aparc","GWC_Aparc","Intensity_Aseg","Thickness_Aparc","Volume_Aseg","Volume_Aparc"),
to = c("area (c)", "gwc (c)","intensity (s)","thickness (c)", "volume (s)", "volume (c)"))
colorscale =c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')
gs = ggplot(df.feature.change, aes(x = order2, y = logp, group = modality, fill = modality)) +
geom_point(shape = 21, size = 3, alpha = .1) +
geom_point(data = df.feature.change %>% filter(logp > bf.thr), shape = 21, size = 3) +
geom_hline(yintercept = bf.thr, linetype = 3, color ="grey40",size = 1.5) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(5,"point"),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
xlab("Feature") +
ylab(expression("Longitudinal Change [-log"[10]*"(p)")) +
scale_fill_manual(values = colorscale) +
scale_y_continuous(trans = "sqrt", breaks = c(1.3,6,20,50,100)) +
ggtitle("Lifebrain")
ggsave("figures/feature_change_Lifebrain.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
write.csv(df.feature.change, file = "figures/feature_change_Lifebrain.csv")
df.Test.long %<>% left_join(.,df.Test.long.f %>% dplyr::select(CrossProject_ID, T1w_vars))
```
### b) feature change v. delta_0 and delta_0 change.
```{r feature and delta}
mod = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lmer(change ~ BAG_corr_mean+ AgeBsl + sex + scale(eICV) + (1 | cohort), data = ., REML = T))
# UVE - unique variance explained
mod_uve = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lmer(change ~ AgeBsl + sex + scale(eICV) + (1 | cohort), data = ., REML = T))
uve = lapply(mod$fit, function(x) r.squaredGLMM(x)[[1]]) %>% simplify2array() -
lapply(mod_uve$fit, function(x) r.squaredGLMM(x)[[1]]) %>% simplify2array()
df.feature2delta_mean = data.frame(feature = mod$feature,
estimate.xgb =lapply(mod$fit, function(x) summary(x)$coefficients["BAG_corr_mean","Estimate"]) %>% simplify2array(),
p.value.xgb =lapply(mod$fit, function(x) summary(x)$coefficients["BAG_corr_mean","Pr(>|t|)"]) %>% simplify2array(),
uve.xgb = uve) %>%
mutate(logp.xgb = -log10(p.value.xgb),
logp.sig.xgb = if_else(estimate.xgb > 0, logp.xgb, -logp.xgb))
bf.thr = -log10(0.05/length(T1w_vars))
sum(df.feature2delta_mean$logp.xgb > bf.thr)
df.feature2delta_mean = left_join(df.feature.change %>% dplyr::select(feature,
modality,
statistic,
estimate,
logp),
df.feature2delta_mean,
by = "feature")
## prepare plot XGB
tmp =df.feature2delta_mean %>%
group_by(modality) %>%
summarise(logp.xgb =mean(logp.xgb)) %>%
mutate(order = -rank(logp.xgb)) %>%
dplyr::select(-logp.xgb)
df.feature2delta_mean_plot = left_join(df.feature2delta_mean,
tmp) %>%
arrange(order, -logp.sig.xgb) %>%
mutate(order2 = 1:length(T1w_vars))
colorscale =c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')
gs = ggplot(df.feature2delta_mean_plot,
aes(x = order2,
y = logp.sig.xgb,
group = modality,
fill = modality)) +
geom_point(shape = 21,
size = 3,
alpha = .1) +
geom_point(data = df.feature2delta_mean_plot %>% filter(logp.xgb > bf.thr),
shape = 21,
size = 3) +
geom_hline(yintercept = bf.thr,
linetype = 3,
color ="grey40",
size = 1.5) +
geom_hline(yintercept = -bf.thr, linetype = 3, color ="grey40",size = 1.5) +
geom_hline(yintercept = 1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
geom_hline(yintercept = -1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(5,"point"),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
xlab("Feature") +
ylab(expression("feature change vs. delta"[cross]*" [-log"[10]*"(p)")) +
scale_fill_manual(values = colorscale) +
ggtitle("Lifebrain - XGB")
ggsave("figures/feature_change_v_delta_Lifebrain.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
write.csv(df.feature2delta_mean, file = "figures/feature_change2delta_mean_Lifebrain.csv")
```
### c) feature change v. delta change
```{r feature and delta change}
mod = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lmer(change ~ BAG_corr_change+ AgeBsl + sex + scale(eICV) + (1 | cohort), data = ., REML = T))
# UVE - unique variance explained
mod_uve = df.Test.long %>%
pivot_longer(T1w_vars,
names_to = "feature",
values_to = "change") %>%
group_by(feature) %>%
do(fit = lmer(change ~ AgeBsl + sex + scale(eICV) + (1 | cohort), data = ., REML = T))
uve = lapply(mod$fit, function(x) r.squaredGLMM(x)[[1]]) %>% simplify2array() -
lapply(mod_uve$fit, function(x) r.squaredGLMM(x)[[1]]) %>% simplify2array()
df.feature2delta_change = data.frame(feature = mod$feature,
estimate.xgb =lapply(mod$fit, function(x) summary(x)$coefficients["BAG_corr_change","Estimate"]) %>% simplify2array(),
p.value.xgb =lapply(mod$fit, function(x) summary(x)$coefficients["BAG_corr_change","Pr(>|t|)"]) %>% simplify2array(),
uve.xgb = uve) %>%
mutate(logp.xgb = -log10(p.value.xgb),
logp.sig.xgb = if_else(estimate.xgb > 0, logp.xgb, -logp.xgb))
bf.thr = -log10(0.05/length(T1w_vars))
sum(df.feature2delta_change$logp.xgb > bf.thr)
df.feature2delta_change = left_join(df.feature.change %>% dplyr::select(feature,
modality,
statistic,
estimate,
logp),
df.feature2delta_change,
by = "feature")
## prepare plot XGB
tmp =df.feature2delta_change %>%
group_by(modality) %>%
summarise(logp.xgb =mean(logp.xgb)) %>%
mutate(order = -rank(logp.xgb)) %>%
dplyr::select(-logp.xgb)
df.feature2delta_change_plot = left_join(df.feature2delta_change,
tmp) %>%
arrange(order, -logp.sig.xgb) %>%
mutate(order2 = 1:length(T1w_vars))
colorscale =c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')
gs = ggplot(df.feature2delta_change_plot,
aes(x = order2,
y = logp.sig.xgb,
group = modality,
fill = modality)) +
geom_point(shape = 21,
size = 3,
alpha = .1) +
geom_point(data = df.feature2delta_change_plot %>% filter(logp.xgb > bf.thr),
shape = 21,
size = 3) +
geom_hline(yintercept = bf.thr,
linetype = 3,
color ="grey40",
size = 1.5) +
geom_hline(yintercept = -bf.thr, linetype = 3, color ="grey40",size = 1.5) +
geom_hline(yintercept = 1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
geom_hline(yintercept = -1.3, linetype = 1, color ="grey40",size = 1.5, alpha =.4) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
axis.title.y = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(5,"point"),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
xlab("Feature") +
ylab(expression("feature change vs. delta"[cross]*" [-log"[10]*"(p)")) +
scale_fill_manual(values = colorscale) +
ggtitle("Lifebrain - XGB")
ggsave("figures/feature_change_v_delta_change_Lifebrain.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
write.csv(df.feature2delta_change, file = "figures/feature_change2delta_change_Lifebrain.csv")
```
### d) PCA
```{r PCA on change}
# select only variables with significant change over time
T1w_change = df.feature.change$feature[df.feature.change$logp > bf.thr]
# prcomp
mod.pca = prcomp(df.Test.long[T1w_change], center =F)
df.PCA = data.frame(CrossProject_ID = df.Test.long$CrossProject_ID,
PC1 = mod.pca$x[,1]) %>%
mutate(PC1 = if_else(abs(PC1) > 10, NaN, PC1))
summary(mod.pca)$importance[,1:10]
#weights
df.pca.weights = data.frame(
features = rownames(mod.pca$rotation),
weight = mod.pca$rotation[,1])
write.csv(df.pca.weights, file = "figures/pca_weights_Lifebrain.csv")
df.Test.long %<>% left_join(., df.PCA)
# basis model
lm.uve = lmer(PC1 ~AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long, REML = T)
summary(lm.uve)
# mean xgb
lm.pred = lmer(PC1 ~ BAG_corr_mean + AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long, REML = T)
summary(lm.pred)
r.squaredGLMM(lm.pred) -r.squaredGLMM(lm.uve)
# change xgb
lm.predCh = lmer(PC1 ~ BAG_corr_change + AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long, REML = T)
summary(lm.predCh)
r.squaredGLMM(lm.predCh) -r.squaredGLMM(lm.uve)
```
#### PCA figures
```{r Plot Figure 2a-b. Brain Age to Brain Aging}
#ORANGE SCALE
#yellow scale
#Lifebrain - Extreme Bossting
gs = ggplot(data = df.Test.long,
mapping = aes(x = BAG_corr_mean,
y = PC1)) +
geom_point(shape = 21, color = "black", fill = "#EF820D", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#F05E23",
size = 3) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title = element_text(size = 20),
#axis.title.y = element_text(size = 12),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Feature change (PC1)")) +
xlab(expression("Brain Age Delta"[cross])) +
ggtitle("Lifebrain - XGB")
ggsave("figures/PC1_BrainAgeCross_Lifebrain.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
# Lifebrain - Extreme Bossting
gs = ggplot(data = df.Test.long,
mapping = aes(x = BAG_corr_change,
y = PC1)) +
geom_point(shape = 21, color = "black", fill = "#FCF4A3", size = 4, stroke = 1.5) +
geom_smooth(method = "lm",
color = "#F8DE7E",
size = 3) +
theme_classic() +
theme(legend.position = 'none',
axis.text = element_text(size = 16),
axis.title.y = element_text(size = 20),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, vjust = 0)) +
ylab(expression("Feature change (PC1)")) +
xlab(expression("Brain Age Delta"[long])) +
ggtitle("Lifebrain - XGB")
ggsave("figures/PC1_BrainAgeLong_Lifebrain.png",
dpi = 500,
plot = gs,
width = 10,
height = 10,
units = "cm")
```
### PCA
```{r PCA on change}
#df.Test.long %<>% left_join(.,df.Test.long.f %>% dplyr::select(eid, T1w_vars))
T1w_change = df.feature.change$feature[df.feature.change$logp > bf.thr]
# prcomp
mod.pca = prcomp(df.Test.long[T1w_change], center =F)
data.frame(mod.pca$rotation[,1]) %>% View() # positive values less decline. note different dirsctionality with delta
df.PCA = data.frame(CrossProject_ID = df.Test.long$CrossProject_ID, mod.pca$x[,1:4])
# pc1 = .20, pc2 = .17, pc3 = .04, pc4 = .03
df.Test.long.test = df.Test.long
df.Test.long.test %<>% left_join(., df.PCA)
## uve
lm.uve = lmer(PC1 ~ AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long.test, REML = T)
lm.pred = lmer(PC1 ~ BAG_corr_mean + AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long.test, REML = T)
summary(lm.pred)
r.squaredGLMM(lm.pred) -r.squaredGLMM(lm.uve)
lm.pred = lmer(PC1 ~ BAG_corr_change + AgeBsl + sex + scale(eICV) + (1 | cohort), data = df.Test.long.test, REML = T)
summary(lm.pred)
```