Content - 5c355ea5a0703150f0fc534d338be861997c8dd7 - 9692782/figure6.R

visit type:
Tip revision: 44f72a1e27db1bafefb159cdc26bc71f3691bf54 authored by Behzad Yaghmaeian Salmani on 07 March 2024, 16:41:19 UTC
First commit
Tip revision: 44f72a1
figure6.R
suppressPackageStartupMessages({
  library(Seurat)
  library(stringr)
  library(ggplot2)
  library(ggpubr)
  library(future)
  require(scales)
  library(RColorBrewer)
  library("readxl")
  library(dplyr)
  library(dendextend)
  library('conover.test')
  library("devtools")
  library(ggpmisc)
})

### figure 6 + supplements 

# import the Dopaminergic nuclei (mDA) dataset.
sobj <- readRDS("/path/to/dir/mDA.rds")

# set levels by territory, but re-order again based on dendrogram (desired order) 
sobj <- SetIdent(sobj, value = "territory")

TER_levels <- c("ML", "Sox6", "Gad2", "Fbn2", ..., )

my.cols = c("vector of customized colors")

sobj$territory <- factor(x = sobj$territory, levels = TER_levels)

tiff(file = "/path/to/dir/plot.tiff", 
     units="cm", width=15, height=10, res=300)

VlnPlot(sobj, features = "Vulmodule1", cols = my.cols, group.by = "territory", pt.size = 0, combine = TRUE) +
  stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
  labs(y="avg.Exp.gene.set", title = "vulnerable module mDA territories")

dev.off()

tiff(file = "/path/to/dir/plot.tiff", 
     units="cm", width=15, height=10, res=300)

VlnPlot(sobj, features = "Resmodule1", cols = my.cols, group.by = "territory", pt.size = 0, combine = TRUE) +
  stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
  labs(y="avg.Exp.gene.set", title = "resilient module mDA territories")

dev.off()

# set levels by neighborhoods, but re-order again based on dendrogram (desired order) 
sobj <- SetIdent(sobj, value = "neighborhood")

NH_levels <- c("ML_NH1", "ML_NH2", "Sox6_NH1", "Sox6_NH2", ..., )

my.cols = c("vector of customized colors")

sobj$neighborhood <- factor(x = sobj$neighborhood, levels = NH_levels)

tiff(file = "/path/to/dir/plot.tiff", 
     units="cm", width=30, height=10, res=300)

VlnPlot(sobj, features = "Vulmodule1", cols = my.cols, group.by = "neighborhood", pt.size = 0, combine = TRUE) +
  stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
  labs(y="avg.Exp.gene.set", title = "vulnerable module mDA neighborhoods")

dev.off()

tiff(file = "/path/to/dir/plot.tiff", 
     units="cm", width=30, height=10, res=300)

VlnPlot(sobj, features = "Resmodule1", cols = my.cols, group.by = "neighborhood", pt.size = 0, combine = TRUE) +
  stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
  labs(y="avg.Exp.gene.set", title = "resilient module mDA neighborhoods")

dev.off()


# Fig 6 E neighborhood pairwise comparison of Resilience module
# RES = resilience module 

UP.tb <- table(sobj@assays$RES@data, sobj$neighborhood)

UP.df <- as.data.frame(UP.tb)  

colnames(UP.df)
#[1] "Var1" "Var2" "Freq"

# neighborhoods re-ordered based on dendrogram and only choose mDA neighborhoods. 

mda.NH <- c('ML_NH1', 'ML_NH2', 'Sox6_NH1', 'Sox6_NH2', 'Sox6_NH3', 'Sox6_NH4', 
            'Gad2_NH1', 'Gad2_NH2', 'Fbn2_NH1', 'Fbn2_NH2', 'Pcsk6_NH1', 'Pcsk6_NH2', 
            'Pdia5_NH1', 'Pdia5_NH2', 'Col24a1', 'Vip', 'Otx2_NH1', 'Otx2_NH2')

# subset the defined neighborhoods in mda.NH from the UP.df 

for (i in mda.NH) {
  # Subset the dataframe for the current cluster
  cluster_df <- UP.df[UP.df$Var2 == i, ]
  
  # Filter the subsetted dataframe to retain only rows where Freq == 1
  cluster_df <- cluster_df[cluster_df$Freq == 1, ]
  
  # Assign the filtered dataframe to a new dataframe with the cluster name
  assign(i, cluster_df)
}

# merge into a long df:
ldf <- do.call('rbind', list(ML_NH1, ML_NH2, Sox6_NH1, Sox6_NH2, Sox6_NH3, Sox6_NH4, 
                             Gad2_NH1, Gad2_NH2, Fbn2_NH1, Fbn2_NH2, Pcsk6_NH1, Pcsk6_NH2, 
                             Pdia5_NH1, Pdia5_NH2, Col24a1, Vip, Otx2_NH1, Otx2_NH2))

colnames(ldf)
#[1] "Var1" "Var2" "Freq"

# large sample size, use lolcat package for test of normality
install_git("https://mikeburr.visualstudio.com/DefaultCollection/lolcat-public/_git/lolcat")

library(lolcat)

ldf$Var1 <- as.numeric(as.character(ldf$Var1 )) 

skewness.test(ldf$Var1)
# D'Agostino Skewness Normality Test

kurtosis.test(ldf$Var1 )
# D'Agostino Kurtosis Normality Test

ggqqplot(ldf$Var1 ) + ggtitle('vulnerable all NHs')

# test of homogeneity of variance
fligner.test(ldf$Var1 ~ ldf$Var2, data = ldf)

# non-parametric test (data is not normally distributed)

kruskal.test(ldf$Var1 ~ ldf$Var2, data = ldf)

# Welch's ANOVA because data shows heteroscedasticity (different groups have different standard deviations)

oneway.test(ldf$Var1 ~ ldf$Var2, data = ldf, var.equal = FALSE)

### conclusion: both tests reject the null hypothesis: there is significant difference among groups 

# post hoc pairwise tests: the Conover-Iman test
CI_NH_res <- as.data.frame(conover.test(ldf$Var1, ldf$Var2, method="bh", list = TRUE)) 

CI_NH_res <- CI_NH_res[, c("comparisons", "P", "P.adjusted", "chi2", "T")]

# visualization of pairwise comparisons for mDA neighborhoods: 
CI_NH_res$comparisons <- as.character(CI_NH_res$comparisons)

# remove white space in comparison column 
CI_NH_res$comparisons <- gsub('\\s+', '', CI_NH_res$comparisons)

# write the name on the right of the "-" in comparison string as a new column y:
CI_NH_res$y <- gsub(".*-", "", CI_NH_res$comparisons)

# write the name on the left of the "-" in comparison string as a new column x:
CI_NH_res$x <- gsub("\\-.*", "", CI_NH_res$comparisons)

# get the columns needed for visualization:

vis.df <- CI_NH_res[, c("x", "y", "P.adjusted")]

##  plot using geom_tile:
## for p <= alpha/2 (0.025)

tiff(file = "/path/to/dir/tilePlot.tiff", 
     units="cm", width=20, height=20, res=300)

ggplot(vis.df, aes(x, y, fill = P.adjusted)) + geom_tile() + 
  geom_hline(yintercept = seq_along(vis.df$y), color='grey') + 
  geom_vline(xintercept = seq_along(vis.df$x), color='grey') + 
  scale_fill_gradient(low = 'red', high = 'blue', limits=c(0, 0.025)) + 
  ggtitle('Res module comparison neighborhoods') + coord_fixed() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 14, face = 'bold')) +
  theme(axis.text.y = element_text(size = 14, face = 'bold')) 

dev.off()


# Fig Supplement 6 B  
# visualization of post hoc pairwise test: CI = Conover-Iman test
# CI.clusters derived from cell_loss.R script 

# remove white space in comparison column 
CI.clusters$comparisons <- gsub('\\s+', '', CI.clusters$comparisons)

# write the name on the right of the "-" in comparison string as a new column y:
CI.clusters$y <- gsub(".*-", "", CI.clusters$comparisons)

# write the name on the left of the "-" in comparison string as a new column x:
CI.clusters$x <- gsub("\\-.*", "", CI.clusters$comparisons)

# get the columns needed for visualization:

vis.df <- CI.clusters[, c("x", "y", "P.adjusted")]

## for p <= alpha/2 (0.025)
ggplot(vis.df, aes(x, y, fill = P.adjusted)) + geom_tile() + 
  geom_hline(yintercept = seq_along(vis.df$y), color='grey') + 
  geom_vline(xintercept = seq_along(vis.df$x), color='grey') + 
  scale_fill_gradient(low = 'red', high = 'blue', limits=c(0, 0.025)) + 
  ggtitle('normalized cell loss across mDA clusters')



### figure supplement 6 C & D 

# set cell loss as the dependent variable lm(Y~X) : Y: dependent   X:independent (predictor)
# set VUL (with DAT) or newVUL (without DAT) as predictor 
# calculate regression (linear model) fit linear regression models

# AverageExpression() Returns averaged expression values for each identity class
# Returns a matrix with genes as rows, identity classes as columns

vul <- as.data.frame(AverageExpression(
  object = sobj,
  assays = 'VUL',
  features = NULL,
  return.seurat = FALSE,
  group.by = "kmeans71",
  slot = "data",
  verbose = TRUE
))

newvul <- as.data.frame(AverageExpression(
  object = sobj,
  assays = 'newVUL',
  features = NULL,
  return.seurat = FALSE,
  group.by = "kmeans71",
  slot = "data",
  verbose = TRUE
))


## to drop the "vul" and "newVUL." from the column names

colnames(vul) <- gsub("VUL.", "", colnames(vul))

colnames(newvul) <- gsub("newVUL.", "", colnames(newvul))

# remove  non-mDA clusters, ML and unassigned clusters 12, 26, 51 
keepers <- c('40', '52', '17', '28', '27', '33', '23', '29', '66', '9', '67', '31', '44', 
              '22', '46', '14', '11', '30', '1', '38', '41', '61', '50', '19', '42', '4', '10', 
              '2', '39', '60', '5', '43', '37', '53')

vulmda <- vul[colnames(vul) %in% keepers] 

newvulmda <- newvul[colnames(newvul) %in% keepers] 

vulmda <- as.data.frame(t(vulmda))
newvulmda <- as.data.frame(t(newvulmda))

colnames(vulmda)[1] <- 'vul'
colnames(newvulmda)[1] <- 'newvul'

dfmda <- cbind.data.frame(vulmda, newvulmda)
dfmda$clusters <- rownames(dfmda)
dfmda <- dfmda[, c(3, 1, 2)]

### sqrt() moderate transformation to meet the normality assumption of linear models 

shapiro.test(sqrt(dfmda$vul))
# Shapiro-Wilk normality test
# data:  sqrt(dfmda$vul)
# W = 0.96427, p-value = 0.3224

shapiro.test(sqrt(dfmda$newvul))
# Shapiro-Wilk normality test
# data:  sqrt(dfmda$newvul)
# W = 0.96016, p-value = 0.2458

# from mDA.R script: load the normalized cell loss per mDA sub-clusters 
df <- read.csv(file = "path/to/file/cell_loss_km71subclusters.csv", header = TRUE, sep = ",", dec = ".")

# average cell loss per cluster
new_df <- df %>%
  group_by(cluster) %>%
  summarise(mean_normalized_loss = mean(normalized_loss))

# Get the order of clusters in dfmda (above)
order_clusters <- dfmda$clusters

# Reorder the rows of new_df to match the order of clusters in dfmda
new_df_reordered <- new_df[match(order_clusters, new_df$cluster), ]

new_df_reordered <- as.data.frame(new_df_reordered)

# create a new df for linear models 
lmdf <- cbind.data.frame(dfmda, new_df_reordered)

lmdf <- lmdf[, c(1:3, 5)]


### fit linear regression model for Vul (with DAT)

lm1 <- lm(mean_normalized_loss ~ sqrt(vul), data = lmdf)
summary(lm1)

# calculate a 95% confidence interval for the regression coefficient 

confint(lm1, 'sqrt(vul)', level = 0.95)
#                    2.5 %        97.5 %
#   sqrt(vul)    0.3468961     0.5536449


p1 <- ggplot(lm1, aes(mean_normalized_loss, lm1$model$`sqrt(vul)`)) + geom_point() +
  ggtitle("formula = avg.norm.loss ~ sqrt(Vul)") + stat_poly_eq() +
  geom_smooth(method="lm", col="red") + stat_regline_equation(label.x = 0, label.y = 1.25) +
  theme_classic()

p1 <- LabelPoints(plot = p1, points = colnames(lm1$residuals ),
                  size = 8,  color='blue', repel = T, xnudge = 0, ynudge = 0)
plot(p1)

### fit linear regression model for newVul (without DAT)

lm2 <- lm(mean_normalized_loss ~ sqrt(newvul), data = lmdf)
summary(lm2)

# calculate a 95% confidence interval for the regression coefficient

confint(lm2, 'sqrt(newvul)', level = 0.95)
#                    2.5 %     97.5 %
#   sqrt(newvul) 0.3592504  0.5927673


p2 <- ggplot(lm2, aes(mean_normalized_loss, lm2$model$`sqrt(newvul)`)) + geom_point() +
  ggtitle("formula = avg.norm.loss ~ sqrt(newVul)") + stat_poly_eq() +
  geom_smooth(method="lm", col="red") + stat_regline_equation(label.x = 0, label.y = 1.25) +
  theme_classic()

p2 <- LabelPoints(plot = p2, points = colnames(lm2$residuals ),
                  size = 8,  color='blue', repel = T, xnudge = 0, ynudge = 0)
plot(p2)



sessionInfo()
Browse the archive

https://github.com/behyag/perlmann_lab_eLife2024