figure6.R
suppressPackageStartupMessages({
library(Seurat)
library(stringr)
library(ggplot2)
library(ggpubr)
library(future)
require(scales)
library(RColorBrewer)
library("readxl")
library(dplyr)
library(dendextend)
library('conover.test')
library("devtools")
library(ggpmisc)
})
### figure 6 + supplements
# import the Dopaminergic nuclei (mDA) dataset.
sobj <- readRDS("/path/to/dir/mDA.rds")
# set levels by territory, but re-order again based on dendrogram (desired order)
sobj <- SetIdent(sobj, value = "territory")
TER_levels <- c("ML", "Sox6", "Gad2", "Fbn2", ..., )
my.cols = c("vector of customized colors")
sobj$territory <- factor(x = sobj$territory, levels = TER_levels)
tiff(file = "/path/to/dir/plot.tiff",
units="cm", width=15, height=10, res=300)
VlnPlot(sobj, features = "Vulmodule1", cols = my.cols, group.by = "territory", pt.size = 0, combine = TRUE) +
stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
labs(y="avg.Exp.gene.set", title = "vulnerable module mDA territories")
dev.off()
tiff(file = "/path/to/dir/plot.tiff",
units="cm", width=15, height=10, res=300)
VlnPlot(sobj, features = "Resmodule1", cols = my.cols, group.by = "territory", pt.size = 0, combine = TRUE) +
stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
labs(y="avg.Exp.gene.set", title = "resilient module mDA territories")
dev.off()
# set levels by neighborhoods, but re-order again based on dendrogram (desired order)
sobj <- SetIdent(sobj, value = "neighborhood")
NH_levels <- c("ML_NH1", "ML_NH2", "Sox6_NH1", "Sox6_NH2", ..., )
my.cols = c("vector of customized colors")
sobj$neighborhood <- factor(x = sobj$neighborhood, levels = NH_levels)
tiff(file = "/path/to/dir/plot.tiff",
units="cm", width=30, height=10, res=300)
VlnPlot(sobj, features = "Vulmodule1", cols = my.cols, group.by = "neighborhood", pt.size = 0, combine = TRUE) +
stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
labs(y="avg.Exp.gene.set", title = "vulnerable module mDA neighborhoods")
dev.off()
tiff(file = "/path/to/dir/plot.tiff",
units="cm", width=30, height=10, res=300)
VlnPlot(sobj, features = "Resmodule1", cols = my.cols, group.by = "neighborhood", pt.size = 0, combine = TRUE) +
stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
labs(y="avg.Exp.gene.set", title = "resilient module mDA neighborhoods")
dev.off()
# Fig 6 E neighborhood pairwise comparison of Resilience module
# RES = resilience module
UP.tb <- table(sobj@assays$RES@data, sobj$neighborhood)
UP.df <- as.data.frame(UP.tb)
colnames(UP.df)
#[1] "Var1" "Var2" "Freq"
# neighborhoods re-ordered based on dendrogram and only choose mDA neighborhoods.
mda.NH <- c('ML_NH1', 'ML_NH2', 'Sox6_NH1', 'Sox6_NH2', 'Sox6_NH3', 'Sox6_NH4',
'Gad2_NH1', 'Gad2_NH2', 'Fbn2_NH1', 'Fbn2_NH2', 'Pcsk6_NH1', 'Pcsk6_NH2',
'Pdia5_NH1', 'Pdia5_NH2', 'Col24a1', 'Vip', 'Otx2_NH1', 'Otx2_NH2')
# subset the defined neighborhoods in mda.NH from the UP.df
for (i in mda.NH) {
# Subset the dataframe for the current cluster
cluster_df <- UP.df[UP.df$Var2 == i, ]
# Filter the subsetted dataframe to retain only rows where Freq == 1
cluster_df <- cluster_df[cluster_df$Freq == 1, ]
# Assign the filtered dataframe to a new dataframe with the cluster name
assign(i, cluster_df)
}
# merge into a long df:
ldf <- do.call('rbind', list(ML_NH1, ML_NH2, Sox6_NH1, Sox6_NH2, Sox6_NH3, Sox6_NH4,
Gad2_NH1, Gad2_NH2, Fbn2_NH1, Fbn2_NH2, Pcsk6_NH1, Pcsk6_NH2,
Pdia5_NH1, Pdia5_NH2, Col24a1, Vip, Otx2_NH1, Otx2_NH2))
colnames(ldf)
#[1] "Var1" "Var2" "Freq"
# large sample size, use lolcat package for test of normality
install_git("https://mikeburr.visualstudio.com/DefaultCollection/lolcat-public/_git/lolcat")
library(lolcat)
ldf$Var1 <- as.numeric(as.character(ldf$Var1 ))
skewness.test(ldf$Var1)
# D'Agostino Skewness Normality Test
kurtosis.test(ldf$Var1 )
# D'Agostino Kurtosis Normality Test
ggqqplot(ldf$Var1 ) + ggtitle('vulnerable all NHs')
# test of homogeneity of variance
fligner.test(ldf$Var1 ~ ldf$Var2, data = ldf)
# non-parametric test (data is not normally distributed)
kruskal.test(ldf$Var1 ~ ldf$Var2, data = ldf)
# Welch's ANOVA because data shows heteroscedasticity (different groups have different standard deviations)
oneway.test(ldf$Var1 ~ ldf$Var2, data = ldf, var.equal = FALSE)
### conclusion: both tests reject the null hypothesis: there is significant difference among groups
# post hoc pairwise tests: the Conover-Iman test
CI_NH_res <- as.data.frame(conover.test(ldf$Var1, ldf$Var2, method="bh", list = TRUE))
CI_NH_res <- CI_NH_res[, c("comparisons", "P", "P.adjusted", "chi2", "T")]
# visualization of pairwise comparisons for mDA neighborhoods:
CI_NH_res$comparisons <- as.character(CI_NH_res$comparisons)
# remove white space in comparison column
CI_NH_res$comparisons <- gsub('\\s+', '', CI_NH_res$comparisons)
# write the name on the right of the "-" in comparison string as a new column y:
CI_NH_res$y <- gsub(".*-", "", CI_NH_res$comparisons)
# write the name on the left of the "-" in comparison string as a new column x:
CI_NH_res$x <- gsub("\\-.*", "", CI_NH_res$comparisons)
# get the columns needed for visualization:
vis.df <- CI_NH_res[, c("x", "y", "P.adjusted")]
## plot using geom_tile:
## for p <= alpha/2 (0.025)
tiff(file = "/path/to/dir/tilePlot.tiff",
units="cm", width=20, height=20, res=300)
ggplot(vis.df, aes(x, y, fill = P.adjusted)) + geom_tile() +
geom_hline(yintercept = seq_along(vis.df$y), color='grey') +
geom_vline(xintercept = seq_along(vis.df$x), color='grey') +
scale_fill_gradient(low = 'red', high = 'blue', limits=c(0, 0.025)) +
ggtitle('Res module comparison neighborhoods') + coord_fixed() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 14, face = 'bold')) +
theme(axis.text.y = element_text(size = 14, face = 'bold'))
dev.off()
# Fig Supplement 6 B
# visualization of post hoc pairwise test: CI = Conover-Iman test
# CI.clusters derived from cell_loss.R script
# remove white space in comparison column
CI.clusters$comparisons <- gsub('\\s+', '', CI.clusters$comparisons)
# write the name on the right of the "-" in comparison string as a new column y:
CI.clusters$y <- gsub(".*-", "", CI.clusters$comparisons)
# write the name on the left of the "-" in comparison string as a new column x:
CI.clusters$x <- gsub("\\-.*", "", CI.clusters$comparisons)
# get the columns needed for visualization:
vis.df <- CI.clusters[, c("x", "y", "P.adjusted")]
## for p <= alpha/2 (0.025)
ggplot(vis.df, aes(x, y, fill = P.adjusted)) + geom_tile() +
geom_hline(yintercept = seq_along(vis.df$y), color='grey') +
geom_vline(xintercept = seq_along(vis.df$x), color='grey') +
scale_fill_gradient(low = 'red', high = 'blue', limits=c(0, 0.025)) +
ggtitle('normalized cell loss across mDA clusters')
### figure supplement 6 C & D
# set cell loss as the dependent variable lm(Y~X) : Y: dependent X:independent (predictor)
# set VUL (with DAT) or newVUL (without DAT) as predictor
# calculate regression (linear model) fit linear regression models
# AverageExpression() Returns averaged expression values for each identity class
# Returns a matrix with genes as rows, identity classes as columns
vul <- as.data.frame(AverageExpression(
object = sobj,
assays = 'VUL',
features = NULL,
return.seurat = FALSE,
group.by = "kmeans71",
slot = "data",
verbose = TRUE
))
newvul <- as.data.frame(AverageExpression(
object = sobj,
assays = 'newVUL',
features = NULL,
return.seurat = FALSE,
group.by = "kmeans71",
slot = "data",
verbose = TRUE
))
## to drop the "vul" and "newVUL." from the column names
colnames(vul) <- gsub("VUL.", "", colnames(vul))
colnames(newvul) <- gsub("newVUL.", "", colnames(newvul))
# remove non-mDA clusters, ML and unassigned clusters 12, 26, 51
keepers <- c('40', '52', '17', '28', '27', '33', '23', '29', '66', '9', '67', '31', '44',
'22', '46', '14', '11', '30', '1', '38', '41', '61', '50', '19', '42', '4', '10',
'2', '39', '60', '5', '43', '37', '53')
vulmda <- vul[colnames(vul) %in% keepers]
newvulmda <- newvul[colnames(newvul) %in% keepers]
vulmda <- as.data.frame(t(vulmda))
newvulmda <- as.data.frame(t(newvulmda))
colnames(vulmda)[1] <- 'vul'
colnames(newvulmda)[1] <- 'newvul'
dfmda <- cbind.data.frame(vulmda, newvulmda)
dfmda$clusters <- rownames(dfmda)
dfmda <- dfmda[, c(3, 1, 2)]
### sqrt() moderate transformation to meet the normality assumption of linear models
shapiro.test(sqrt(dfmda$vul))
# Shapiro-Wilk normality test
# data: sqrt(dfmda$vul)
# W = 0.96427, p-value = 0.3224
shapiro.test(sqrt(dfmda$newvul))
# Shapiro-Wilk normality test
# data: sqrt(dfmda$newvul)
# W = 0.96016, p-value = 0.2458
# from mDA.R script: load the normalized cell loss per mDA sub-clusters
df <- read.csv(file = "path/to/file/cell_loss_km71subclusters.csv", header = TRUE, sep = ",", dec = ".")
# average cell loss per cluster
new_df <- df %>%
group_by(cluster) %>%
summarise(mean_normalized_loss = mean(normalized_loss))
# Get the order of clusters in dfmda (above)
order_clusters <- dfmda$clusters
# Reorder the rows of new_df to match the order of clusters in dfmda
new_df_reordered <- new_df[match(order_clusters, new_df$cluster), ]
new_df_reordered <- as.data.frame(new_df_reordered)
# create a new df for linear models
lmdf <- cbind.data.frame(dfmda, new_df_reordered)
lmdf <- lmdf[, c(1:3, 5)]
### fit linear regression model for Vul (with DAT)
lm1 <- lm(mean_normalized_loss ~ sqrt(vul), data = lmdf)
summary(lm1)
# calculate a 95% confidence interval for the regression coefficient
confint(lm1, 'sqrt(vul)', level = 0.95)
# 2.5 % 97.5 %
# sqrt(vul) 0.3468961 0.5536449
p1 <- ggplot(lm1, aes(mean_normalized_loss, lm1$model$`sqrt(vul)`)) + geom_point() +
ggtitle("formula = avg.norm.loss ~ sqrt(Vul)") + stat_poly_eq() +
geom_smooth(method="lm", col="red") + stat_regline_equation(label.x = 0, label.y = 1.25) +
theme_classic()
p1 <- LabelPoints(plot = p1, points = colnames(lm1$residuals ),
size = 8, color='blue', repel = T, xnudge = 0, ynudge = 0)
plot(p1)
### fit linear regression model for newVul (without DAT)
lm2 <- lm(mean_normalized_loss ~ sqrt(newvul), data = lmdf)
summary(lm2)
# calculate a 95% confidence interval for the regression coefficient
confint(lm2, 'sqrt(newvul)', level = 0.95)
# 2.5 % 97.5 %
# sqrt(newvul) 0.3592504 0.5927673
p2 <- ggplot(lm2, aes(mean_normalized_loss, lm2$model$`sqrt(newvul)`)) + geom_point() +
ggtitle("formula = avg.norm.loss ~ sqrt(newVul)") + stat_poly_eq() +
geom_smooth(method="lm", col="red") + stat_regline_equation(label.x = 0, label.y = 1.25) +
theme_classic()
p2 <- LabelPoints(plot = p2, points = colnames(lm2$residuals ),
size = 8, color='blue', repel = T, xnudge = 0, ynudge = 0)
plot(p2)
sessionInfo()