Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
content badge
swh:1:cnt:5c355ea5a0703150f0fc534d338be861997c8dd7

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
suppressPackageStartupMessages({
  library(Seurat)
  library(stringr)
  library(ggplot2)
  library(ggpubr)
  library(future)
  require(scales)
  library(RColorBrewer)
  library("readxl")
  library(dplyr)
  library(dendextend)
  library('conover.test')
  library("devtools")
  library(ggpmisc)
})

### figure 6 + supplements 

# import the Dopaminergic nuclei (mDA) dataset.
sobj <- readRDS("/path/to/dir/mDA.rds")

# set levels by territory, but re-order again based on dendrogram (desired order) 
sobj <- SetIdent(sobj, value = "territory")

TER_levels <- c("ML", "Sox6", "Gad2", "Fbn2", ..., )

my.cols = c("vector of customized colors")

sobj$territory <- factor(x = sobj$territory, levels = TER_levels)

tiff(file = "/path/to/dir/plot.tiff", 
     units="cm", width=15, height=10, res=300)

VlnPlot(sobj, features = "Vulmodule1", cols = my.cols, group.by = "territory", pt.size = 0, combine = TRUE) +
  stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
  labs(y="avg.Exp.gene.set", title = "vulnerable module mDA territories")

dev.off()

tiff(file = "/path/to/dir/plot.tiff", 
     units="cm", width=15, height=10, res=300)

VlnPlot(sobj, features = "Resmodule1", cols = my.cols, group.by = "territory", pt.size = 0, combine = TRUE) +
  stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
  labs(y="avg.Exp.gene.set", title = "resilient module mDA territories")

dev.off()

# set levels by neighborhoods, but re-order again based on dendrogram (desired order) 
sobj <- SetIdent(sobj, value = "neighborhood")

NH_levels <- c("ML_NH1", "ML_NH2", "Sox6_NH1", "Sox6_NH2", ..., )

my.cols = c("vector of customized colors")

sobj$neighborhood <- factor(x = sobj$neighborhood, levels = NH_levels)

tiff(file = "/path/to/dir/plot.tiff", 
     units="cm", width=30, height=10, res=300)

VlnPlot(sobj, features = "Vulmodule1", cols = my.cols, group.by = "neighborhood", pt.size = 0, combine = TRUE) +
  stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
  labs(y="avg.Exp.gene.set", title = "vulnerable module mDA neighborhoods")

dev.off()

tiff(file = "/path/to/dir/plot.tiff", 
     units="cm", width=30, height=10, res=300)

VlnPlot(sobj, features = "Resmodule1", cols = my.cols, group.by = "neighborhood", pt.size = 0, combine = TRUE) +
  stat_summary(fun = mean, geom='point', size = 8, colour = "black", shape=95) + NoLegend() +
  labs(y="avg.Exp.gene.set", title = "resilient module mDA neighborhoods")

dev.off()


# Fig 6 E neighborhood pairwise comparison of Resilience module
# RES = resilience module 

UP.tb <- table(sobj@assays$RES@data, sobj$neighborhood)

UP.df <- as.data.frame(UP.tb)  

colnames(UP.df)
#[1] "Var1" "Var2" "Freq"

# neighborhoods re-ordered based on dendrogram and only choose mDA neighborhoods. 

mda.NH <- c('ML_NH1', 'ML_NH2', 'Sox6_NH1', 'Sox6_NH2', 'Sox6_NH3', 'Sox6_NH4', 
            'Gad2_NH1', 'Gad2_NH2', 'Fbn2_NH1', 'Fbn2_NH2', 'Pcsk6_NH1', 'Pcsk6_NH2', 
            'Pdia5_NH1', 'Pdia5_NH2', 'Col24a1', 'Vip', 'Otx2_NH1', 'Otx2_NH2')

# subset the defined neighborhoods in mda.NH from the UP.df 

for (i in mda.NH) {
  # Subset the dataframe for the current cluster
  cluster_df <- UP.df[UP.df$Var2 == i, ]
  
  # Filter the subsetted dataframe to retain only rows where Freq == 1
  cluster_df <- cluster_df[cluster_df$Freq == 1, ]
  
  # Assign the filtered dataframe to a new dataframe with the cluster name
  assign(i, cluster_df)
}

# merge into a long df:
ldf <- do.call('rbind', list(ML_NH1, ML_NH2, Sox6_NH1, Sox6_NH2, Sox6_NH3, Sox6_NH4, 
                             Gad2_NH1, Gad2_NH2, Fbn2_NH1, Fbn2_NH2, Pcsk6_NH1, Pcsk6_NH2, 
                             Pdia5_NH1, Pdia5_NH2, Col24a1, Vip, Otx2_NH1, Otx2_NH2))

colnames(ldf)
#[1] "Var1" "Var2" "Freq"

# large sample size, use lolcat package for test of normality
install_git("https://mikeburr.visualstudio.com/DefaultCollection/lolcat-public/_git/lolcat")

library(lolcat)

ldf$Var1 <- as.numeric(as.character(ldf$Var1 )) 

skewness.test(ldf$Var1)
# D'Agostino Skewness Normality Test

kurtosis.test(ldf$Var1 )
# D'Agostino Kurtosis Normality Test

ggqqplot(ldf$Var1 ) + ggtitle('vulnerable all NHs')

# test of homogeneity of variance
fligner.test(ldf$Var1 ~ ldf$Var2, data = ldf)

# non-parametric test (data is not normally distributed)

kruskal.test(ldf$Var1 ~ ldf$Var2, data = ldf)

# Welch's ANOVA because data shows heteroscedasticity (different groups have different standard deviations)

oneway.test(ldf$Var1 ~ ldf$Var2, data = ldf, var.equal = FALSE)

### conclusion: both tests reject the null hypothesis: there is significant difference among groups 

# post hoc pairwise tests: the Conover-Iman test
CI_NH_res <- as.data.frame(conover.test(ldf$Var1, ldf$Var2, method="bh", list = TRUE)) 

CI_NH_res <- CI_NH_res[, c("comparisons", "P", "P.adjusted", "chi2", "T")]

# visualization of pairwise comparisons for mDA neighborhoods: 
CI_NH_res$comparisons <- as.character(CI_NH_res$comparisons)

# remove white space in comparison column 
CI_NH_res$comparisons <- gsub('\\s+', '', CI_NH_res$comparisons)

# write the name on the right of the "-" in comparison string as a new column y:
CI_NH_res$y <- gsub(".*-", "", CI_NH_res$comparisons)

# write the name on the left of the "-" in comparison string as a new column x:
CI_NH_res$x <- gsub("\\-.*", "", CI_NH_res$comparisons)

# get the columns needed for visualization:

vis.df <- CI_NH_res[, c("x", "y", "P.adjusted")]

##  plot using geom_tile:
## for p <= alpha/2 (0.025)

tiff(file = "/path/to/dir/tilePlot.tiff", 
     units="cm", width=20, height=20, res=300)

ggplot(vis.df, aes(x, y, fill = P.adjusted)) + geom_tile() + 
  geom_hline(yintercept = seq_along(vis.df$y), color='grey') + 
  geom_vline(xintercept = seq_along(vis.df$x), color='grey') + 
  scale_fill_gradient(low = 'red', high = 'blue', limits=c(0, 0.025)) + 
  ggtitle('Res module comparison neighborhoods') + coord_fixed() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 14, face = 'bold')) +
  theme(axis.text.y = element_text(size = 14, face = 'bold')) 

dev.off()


# Fig Supplement 6 B  
# visualization of post hoc pairwise test: CI = Conover-Iman test
# CI.clusters derived from cell_loss.R script 

# remove white space in comparison column 
CI.clusters$comparisons <- gsub('\\s+', '', CI.clusters$comparisons)

# write the name on the right of the "-" in comparison string as a new column y:
CI.clusters$y <- gsub(".*-", "", CI.clusters$comparisons)

# write the name on the left of the "-" in comparison string as a new column x:
CI.clusters$x <- gsub("\\-.*", "", CI.clusters$comparisons)

# get the columns needed for visualization:

vis.df <- CI.clusters[, c("x", "y", "P.adjusted")]

## for p <= alpha/2 (0.025)
ggplot(vis.df, aes(x, y, fill = P.adjusted)) + geom_tile() + 
  geom_hline(yintercept = seq_along(vis.df$y), color='grey') + 
  geom_vline(xintercept = seq_along(vis.df$x), color='grey') + 
  scale_fill_gradient(low = 'red', high = 'blue', limits=c(0, 0.025)) + 
  ggtitle('normalized cell loss across mDA clusters')



### figure supplement 6 C & D 

# set cell loss as the dependent variable lm(Y~X) : Y: dependent   X:independent (predictor)
# set VUL (with DAT) or newVUL (without DAT) as predictor 
# calculate regression (linear model) fit linear regression models

# AverageExpression() Returns averaged expression values for each identity class
# Returns a matrix with genes as rows, identity classes as columns

vul <- as.data.frame(AverageExpression(
  object = sobj,
  assays = 'VUL',
  features = NULL,
  return.seurat = FALSE,
  group.by = "kmeans71",
  slot = "data",
  verbose = TRUE
))

newvul <- as.data.frame(AverageExpression(
  object = sobj,
  assays = 'newVUL',
  features = NULL,
  return.seurat = FALSE,
  group.by = "kmeans71",
  slot = "data",
  verbose = TRUE
))


## to drop the "vul" and "newVUL." from the column names

colnames(vul) <- gsub("VUL.", "", colnames(vul))

colnames(newvul) <- gsub("newVUL.", "", colnames(newvul))

# remove  non-mDA clusters, ML and unassigned clusters 12, 26, 51 
keepers <- c('40', '52', '17', '28', '27', '33', '23', '29', '66', '9', '67', '31', '44', 
              '22', '46', '14', '11', '30', '1', '38', '41', '61', '50', '19', '42', '4', '10', 
              '2', '39', '60', '5', '43', '37', '53')

vulmda <- vul[colnames(vul) %in% keepers] 

newvulmda <- newvul[colnames(newvul) %in% keepers] 

vulmda <- as.data.frame(t(vulmda))
newvulmda <- as.data.frame(t(newvulmda))

colnames(vulmda)[1] <- 'vul'
colnames(newvulmda)[1] <- 'newvul'

dfmda <- cbind.data.frame(vulmda, newvulmda)
dfmda$clusters <- rownames(dfmda)
dfmda <- dfmda[, c(3, 1, 2)]

### sqrt() moderate transformation to meet the normality assumption of linear models 

shapiro.test(sqrt(dfmda$vul))
# Shapiro-Wilk normality test
# data:  sqrt(dfmda$vul)
# W = 0.96427, p-value = 0.3224

shapiro.test(sqrt(dfmda$newvul))
# Shapiro-Wilk normality test
# data:  sqrt(dfmda$newvul)
# W = 0.96016, p-value = 0.2458

# from mDA.R script: load the normalized cell loss per mDA sub-clusters 
df <- read.csv(file = "path/to/file/cell_loss_km71subclusters.csv", header = TRUE, sep = ",", dec = ".")

# average cell loss per cluster
new_df <- df %>%
  group_by(cluster) %>%
  summarise(mean_normalized_loss = mean(normalized_loss))

# Get the order of clusters in dfmda (above)
order_clusters <- dfmda$clusters

# Reorder the rows of new_df to match the order of clusters in dfmda
new_df_reordered <- new_df[match(order_clusters, new_df$cluster), ]

new_df_reordered <- as.data.frame(new_df_reordered)

# create a new df for linear models 
lmdf <- cbind.data.frame(dfmda, new_df_reordered)

lmdf <- lmdf[, c(1:3, 5)]


### fit linear regression model for Vul (with DAT)

lm1 <- lm(mean_normalized_loss ~ sqrt(vul), data = lmdf)
summary(lm1)

# calculate a 95% confidence interval for the regression coefficient 

confint(lm1, 'sqrt(vul)', level = 0.95)
#                    2.5 %        97.5 %
#   sqrt(vul)    0.3468961     0.5536449


p1 <- ggplot(lm1, aes(mean_normalized_loss, lm1$model$`sqrt(vul)`)) + geom_point() +
  ggtitle("formula = avg.norm.loss ~ sqrt(Vul)") + stat_poly_eq() +
  geom_smooth(method="lm", col="red") + stat_regline_equation(label.x = 0, label.y = 1.25) +
  theme_classic()

p1 <- LabelPoints(plot = p1, points = colnames(lm1$residuals ),
                  size = 8,  color='blue', repel = T, xnudge = 0, ynudge = 0)
plot(p1)

### fit linear regression model for newVul (without DAT)

lm2 <- lm(mean_normalized_loss ~ sqrt(newvul), data = lmdf)
summary(lm2)

# calculate a 95% confidence interval for the regression coefficient

confint(lm2, 'sqrt(newvul)', level = 0.95)
#                    2.5 %     97.5 %
#   sqrt(newvul) 0.3592504  0.5927673


p2 <- ggplot(lm2, aes(mean_normalized_loss, lm2$model$`sqrt(newvul)`)) + geom_point() +
  ggtitle("formula = avg.norm.loss ~ sqrt(newVul)") + stat_poly_eq() +
  geom_smooth(method="lm", col="red") + stat_regline_equation(label.x = 0, label.y = 1.25) +
  theme_classic()

p2 <- LabelPoints(plot = p2, points = colnames(lm2$residuals ),
                  size = 8,  color='blue', repel = T, xnudge = 0, ynudge = 0)
plot(p2)



sessionInfo()


back to top

Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API