Revision 195fe7dd0bf388ad4cd6f01db052f45d8b8fa67e authored by UribeLabRice on 14 December 2020, 17:23:35 UTC, committed by GitHub on 14 December 2020, 17:23:35 UTC
1 parent 5911f2e
Raw File
Uribe Lab_Seurat_69HPF.R
# Set up the 69hpf data for processing. -----------------------------------
library(dplyr)
library(Seurat)
library(reticulate)
rm(list = ls())

nc2.data <- Read10X(data.dir = "C:/Users/agh8/Desktop/RU_10XGenSC42/Sox10Enriched-69h_analysis/outs/filtered_gene_bc_matrices/DanioGRCz10/")
#nc2.data <- Read10X(data.dir = "C:/Users/agh8/Desktop/RU_10XGenSC42/Sox10Enriched-69h_analysis/outs/Ensb_filtered_gene_bc_matrices/DanioGRCz10/")
ProjectTitle <- "69HPF"
nc2 <- CreateSeuratObject(counts = nc2.data, project = ProjectTitle, min.cells = 3, min.features = 200)
nc2

pcnum <- 20
res <- 1.8
imagesize<- 4000
filepath<- "C:/Users/agh8/Desktop/Rice University/Uribe lab/Transcriptome/R programming/Seurat Plots/"

# NC2 Quality Control ---------------------------------------------------------
# Here we will filter cells that above 2500 UMI and under 200 UMI
#Also will remove cells with >5% Mitochondrial genes (indicates dying or damaged cell)

#Add Mitocohondrial QC to Seurat object
nc2[["percent.mt"]] <- PercentageFeatureSet(nc2, pattern = "^MT-")

#Add Cell Cycle QC to Seurat Object
nc2[["CellCycle"]] <- PercentageFeatureSet(nc2)

#Visualize QC metrics prior to thresholding
VlnPlot(nc2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

plot1<- FeatureScatter(nc2, feature1= "nCount_RNA", feature2 = "percent.mt")
plot2<- FeatureScatter(nc2, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1,plot2))
nc2 <- subset(nc2, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)

# NC2 Normilization, scaling, and feature selection of Data -------------------

#normalizaing the data
nc2 <- NormalizeData(nc2, normalization.method = "LogNormalize", scale.factor = 10000)


#find variable features (eg. id most significant dimensions)
nc2 <- FindVariableFeatures(nc2, selection.method = "vst", nfeatures = 2000)

#Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(nc2),10)

#plot variable features with and without labels
plot1 <- VariableFeaturePlot(nc2)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))

#scaling data
all.genes <- rownames(nc2)
nc2 <- ScaleData(nc2, features = all.genes)

#Remove unwanted sources of variation
# nc2 <- ScaleData(nc2, vars.to.regress = "percent.mt") or refer to the Seurat V3 vignette on sctransform

#linear dimensional reduction (i.e. find the PCs)
nc2 <- RunPCA(nc2, features = VariableFeatures(object =nc2))
#Print the  top PC gene lists
print(nc2[["pca"]],dims = 1:pcnum, nfeatures = 5)

nc2qc.pcaVis <- VizDimLoadings(nc2,dims = 1:pcnum, reduction = "pca")
fname = paste(filepath,"QC PLOTS/","69hpf_PCAViz_PC",pcnum,"+res",res,".tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(VizDimLoadings(nc2,dims = 1:pcnum, reduction = "pca"))
dev.off()


nc2qc.pcaHM %<a-%{ DimHeatmap(nc2, dims = 1:pcnum, cells = 100, balanced = TRUE, fast = TRUE)}
fname = paste(filepath,"/QC PLOTS/","69hpf_PCAHM_PC",pcnum,"+res",res,".tiff",sep = "")
tiff(filename=fname,width = imagesize*4, height = imagesize*5,units = "px",res = 300,pointsize = 24)
nc2qc.pcaHM
p1.save <-recordPlot()
dev.off()

nc2 <- JackStraw(nc2, reduction = "pca", num.replicate = 100,dims = pcnum)
nc2 <- ScoreJackStraw(nc2, dims = 1:pcnum)
nc2qc.jk<-JackStrawPlot(nc2, dims = 1:pcnum)
fname = paste(filepath,"QC PLOTS/","69hpf_JackStraw_PC",pcnum,"+res",res,".tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(nc2qc.jk)
dev.off()

nc2qc.EP<-ElbowPlot(nc2, ndims = pcnum)
fname = paste(filepath,"QC PLOTS/","69hpf_ElbowPlot_PC",pcnum,"+res",res,".tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(nc2qc.EP)
dev.off()


# NC2 Clustering of Cells -----------------------------------------------------


nc2 <- FindNeighbors(nc2, dims = 1:pcnum)
nc2 <- FindClusters(nc2, resolution = res)

head(Idents(nc2),5)


# NC2 Run TSEN ----------------------------------------------------------------
nc2 <- RunTSNE(object = nc2, dims = 1:pcnum, do.fast = TRUE )
nc2_tsne <- TSNEPlot(nc2, label = FALSE)
fname = paste(filepath,"69hpf_tsne_PC",pcnum,"+res",res,".tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(nc2_tsne)
dev.off()

nc2_tsne <- TSNEPlot(nc2, label = TRUE)
fname = paste(filepath,"69hpf_tsne_PC",pcnum,"+res",res,"_Labled.tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(nc2_tsne)
dev.off()


#proposed cluster labels
new.cluster.ids <- (c("cl0:Chondrogenic differentiating mesenchyme 1",
                      "cl1:Fin bud",
                      "cl2:Mesenchyme Migratory",                   
                      "cl3:Neural progenitor" ,
                      "cl4:Melanophore",
                      "cl5:Sympatho-enteric progenitor" ,
                      "cl6:Chondrogenic differentiating mesenchyme 2" ,
                      "cl7:Fin bud",
                      "cl8:Chondrogenic proliferative mesenchyme",
                      "cl9:Pigmented muscle" ,
                      "cl10:Neural progenitor",
                      "cl11:Otic epithelium" ,
                      "cl12:Enteric neuron"   ,                       
                      "cl13:Pigment Progenitor",
                      "cl14:Schwann Cells" ,
                      "cl15:Xanthophore",                              
                      "cl16:Iridiophore" ,
                      "cl17:Differentiating mesenchyme 1",
                      "cl18:Proliferative melanophores",      
                      "cl19:Muscle"                     ,              
                      "cl20:Chondrogenic mesenchyme migratory",         
                      "cl21:Unidentified"                      ,       
                      "cl22:Differentiating mesenchyme 2"))
names(new.cluster.ids) <- levels(nc2)
nc2 <- RenameIdents(nc2,new.cluster.ids)
nc2$CellType <- Idents(nc2)
FP<-DimPlot(nc2,reduction = "tsne", label = TRUE, pt.size = 0.5, repel = TRUE, group.by = "CellType")+NoLegend()
fname = paste(filepath,ProjectTitle,"_tsne_PC",pcnum,"+res",res,"_CellTypeLabels.tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(FP)
dev.off()


#Print Dendrogram
nc2<- BuildClusterTree(object = nc2)
PlotClusterTree(nc2)
fname = paste(filepath,ProjectTitle,"_",pcnum,"+res",res,"ClusterDendrogram.tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(PlotClusterTree(nc2))
dev.off()

nc2.tsne_markers <- FindAllMarkers(object = nc2, only.pos = TRUE, min.pct = 0.25,thresh.use = 0.25)
nc2.tsne_markers %>% group_by(cluster) %>% top_n(2, avg_logFC)

top10 <- nc2.tsne_markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
top10HM <- DoHeatmap(nc2,group.by = "seurat_clusters") + NoLegend()
fname = paste(filepath,ProjectTitle,"_Top10MarkerHeatMap_PC",pcnum,"+res",res,".tiff",sep = "")
tiff(filename=fname,width = imagesize+2000, height = imagesize+1000,units = "px",res = 300,pointsize = 14)
print(top10HM)
dev.off()



# write.csv(nc2.tsne_markers, "69hpf_tsne_markers.csv")

# # cluster1.markers <- FindMarkers(object = nc2, ident.1 = 0, thresh.use = 0.25,
# #                                 test.use = "roc", only.pos = TRUE)
# VlnPlot(nc2, features = c("dlx4a", "lect1"))
# VlnPlot(nc2, features = c("sox10", "lect1"), slot = "counts", log = TRUE)
# 
# FeaturePlot(nc2, features = c("sox10", "lect1", "dlx4a", "fabp7a",
#                                             "phox2bb", "foxc1a", "mitfa", "foxd3", "mycn"))
# 
# top100 <- nc2.tsne_markers %>% group_by(cluster) %>% top_n(n = 100, wt = avg_logFC)
# DoHeatmap(nc2, features = top10$gene) + NoLegend()
# 
#write.csv(top100, "69hpf_tsne_top100markers.csv")

# NC2 UMAP --------------------------------------------------------------------

nc2 <- RunUMAP(nc2,dims = 1:pcnum)
nc2_umap <- DimPlot(nc2,reduction = "umap",label = FALSE)
fname = paste(filepath,"69hpf_umap_PC",pcnum,"+res",res,".tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(nc2_umap)
dev.off()

nc2_umap <- DimPlot(nc2,reduction = "umap",label = TRUE,repel = TRUE, group.by = "CellType")
fname = paste(filepath,"69hpf_umap_PC",pcnum,"+res",res,"._Labeled.tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(nc2_umap)
dev.off()

FP<-DimPlot(nc2,reduction = "umap", label = TRUE, pt.size = 0.5, repel = TRUE, group.by = "CellType")+NoLegend()
fname = paste(filepath,ProjectTitle,"_umap_PC",pcnum,"+res",res,"_CellTypeLabels.tiff",sep = "")
tiff(filename=fname,width = imagesize, height = imagesize,units = "px",res = 300,pointsize = 12)
print(FP)
dev.off()

#find all unique cluster biomarkers
#nc2.umap_markers <- FindAllMarkers(nc2, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
#nc2.umap_markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
#write.csv(nc2.umap_markers, "69hpf_umap_markers.csv")
# 
# top10<- nc2.markers %>% group_by(cluster) %>% top_n(n=pcnum, wt = avg_logFC)
# DoHeatmap(nc2,features = top10$gene) + NoLegend()

saveRDS(nc2, file = paste(filepath,ProjectTitle,"_PC",pcnum,"+res",res,".rds",sep = ""))
back to top