Content - 3b22529027713c7aa8fde311d82bb9b46785bc89 - 2f09a1f/Demux_Preprocess_Downsample.Rmd

visit type:
Tip revision: 1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f authored by Terkild on 13 March 2021, 20:04:44 UTC
Add figures for review
Tip revision: 1c7fcab
Demux_Preprocess_Downsample.Rmd
---
title: "CITE-seq optimization - Demux, Pre-process and downsample"
author: "Terkild Brink Buus"
date: "30/3/2020"
output: github_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
options(stringsAsFactors=FALSE)
```

## Load utilities

Including libraries, plotting and color settings and custom utility functions

```{r loadLibraries, results='hide', message=FALSE, warning=FALSE}
set.seed(114)
require("Seurat", quietly=T)
require("tidyverse", quietly=T)
library("Matrix", quietly=T)
library("DropletUtils", quietly=T)

## Load ggplot theme and defaults
source("R/ggplot_settings.R")

## Load helper functions
source("R/Utilities.R")

## Load predefined color schemes
source("R/color.R")

read_kallisto_data <- function(file.path){
  ## Load mtx and transpose it
  res_mat <- as(t(readMM(file.path(file.path,"cells_x_genes.mtx"))), 'CsparseMatrix') 
  ## Attach genes
  rownames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.genes.txt"), sep = '\t', header = F)[,1]
  ## Attach barcodes
  colnames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.barcodes.txt"), header = F, sep = '\t')[,1]
  
  return(res_mat)
}

data.drive <- "F:/"
data.project.dir <- "Projects/ECCITE-seq/TotalSeqC_TitrationA"
outdir <- "figures"
t2g.file <- file.path(data.drive,data.project.dir,"/kallisto/t2g_cellranger.txt")

kallistobusDir <- file.path(data.drive,data.project.dir,"kallisto/gex/c1/counts_unfiltered")
kallistobusDirADT <- file.path(data.drive,data.project.dir,"kallisto/features/A1_S5.ADT_15/counts_unfiltered")
kallistobusDirHTO <- file.path(data.drive,data.project.dir,"kallisto/features/H1_S6.HTO_A_13/counts_unfiltered")

data.abpanel <- "data/Supplementary_Table_1.xlsx"

```

## Load data

From kallisto-bustools output. Modified from https://github.com/Sarah145/scRNA_pre_process 

```{r loadGEX}
raw_mtx <- read_kallisto_data(kallistobusDir)

t2g <- unique(read.csv(t2g.file, sep = '\t', header=F)[,2:3]) # load t2g file
t2g <- data.frame(t2g[,2], row.names = t2g[,1])
gene_sym <- t2g[as.character(rownames(raw_mtx)),1] # get symbols for gene ids

# Which rows have same gene symbol (but different Ensembl gene id)
gene_sym.duplicated <- which(gene_sym %in% gene_sym[which(duplicated(gene_sym))])

# Which genes are have duplicated entries
gene_sym.duplicated.unique <- unique(gene_sym[gene_sym.duplicated])

# Make placeholder matrix for duplicate gene symbols
raw_mtx_dedup <- Matrix(data=0,nrow=length(gene_sym.duplicated.unique),ncol=ncol(raw_mtx))
rownames(raw_mtx_dedup) <- gene_sym.duplicated.unique
colnames(raw_mtx_dedup) <- colnames(raw_mtx)

# Combine counts from genes with same gene symbol (but different Ensembl gene id)
for(i in seq_along(gene_sym.duplicated)){
  curGene <- gene_sym[gene_sym.duplicated[i]]
  curRow <- gene_sym.duplicated.unique == curGene
  raw_mtx_dedup[curRow,] <- raw_mtx_dedup[curRow,] + raw_mtx[gene_sym.duplicated[i],]
}

# Merged combined counts duplicate gene symbol with matrix of unique gene symbol counts
raw_mtx <- raw_mtx[-gene_sym.duplicated,]
rownames(raw_mtx) <- gene_sym[-gene_sym.duplicated]
raw_mtx <- rbind(raw_mtx,raw_mtx_dedup)

tot_counts <- Matrix::colSums(raw_mtx)
bc_rank <- DropletUtils::barcodeRanks(raw_mtx, lower = 10)

GEX.knee_plot <- knee_plot(bc_rank)

kallisto.GEX <- raw_mtx
```

# Load Kallisto HTO data

```{r loadHTO}
HTO.res_mat <- read_kallisto_data(kallistobusDirHTO)

HTO.tot_counts <- Matrix::colSums(HTO.res_mat)
HTO.bc_rank <- DropletUtils::barcodeRanks(HTO.res_mat, lower = 10)

HTO.knee_plot <- knee_plot(HTO.bc_rank)
kallisto.HTO <- HTO.res_mat
```

# Load Kallisto ADT data

```{r loadADT}
ADT.res_mat <- read_kallisto_data(kallistobusDirADT)

ADT.tot_counts <- Matrix::colSums(ADT.res_mat)
ADT.bc_rank <- DropletUtils::barcodeRanks(ADT.res_mat, lower = 10)

ADT.knee_plot <- knee_plot(ADT.bc_rank)
kallisto.ADT <- ADT.res_mat
```

Plot Barcode-rank plots

```{r, fig.height=2.5, fig.width=7}
cowplot::plot_grid(GEX.knee_plot, HTO.knee_plot, ADT.knee_plot, nrow=1, labels=c("GEX","HTO","ADT"))
```

## Demultiplex by HTO

Use Seurat MULTIseqDemux to demultiplex samples (by their hashing antibody signal = HTO)

```{r demux}
object <- CreateSeuratObject(counts = kallisto.HTO, assay="HTO.kallisto")
object <- NormalizeData(object, assay = "HTO.kallisto", normalization.method = "CLR")

## Assure the matrices are in the same barcode-space
commonDrops <- Reduce("intersect",x=list(colnames(kallisto.HTO),colnames(kallisto.ADT),colnames(kallisto.GEX)))

length(commonDrops)

object <- subset(object, cells=commonDrops)
object[["ADT.kallisto"]] <- CreateAssayObject(counts=kallisto.ADT[,commonDrops])
object[["RNA.kallisto"]] <- CreateAssayObject(counts=kallisto.GEX[,commonDrops])
Key(object[["RNA.kallisto"]]) <- "rna_"
Key(object[["ADT.kallisto"]]) <- "adt_"
Key(object[["HTO.kallisto"]]) <- "hto_"


## MULTIseqDemux seems better when using unfiltered input (including empty droplets)
object <- MULTIseqDemux(object, assay="HTO.kallisto")

RidgePlot(object, assay = "HTO.kallisto", features = rownames(object[["HTO.kallisto"]]))
table(object$MULTI_ID)

object$sampleID <- object$MULTI_ID

object <- CalculateBarcodeInflections(object,barcode.column="nCount_HTO.kallisto",group.column="sampleID",threshold.low=1000)
Seurat::BarcodeInflectionsPlot(object) + scale_x_continuous(trans="log10")

object.empty <- subset(object, subset=sampleID == "Negative")
object <- subset(object, subset=sampleID %in% c(1:6))
```

## Assign annotation to each cell

```{r annotation}
## Rename groups to meaningful names
groups <- c("PBMC_50ul_1_1000k","PBMC_50ul_4_1000k","PBMC_25ul_4_1000k","PBMC_25ul_4_200k","Lung_50ul_1_500k","Lung_50ul_4_500k","Doublet","Negative")
object$group <- object$sampleID

## Keep ordering of groups for best plotting
levels(object$group) <- groups

object$tissue <- factor(c("PBMC","PBMC","PBMC","PBMC","Lung","Lung","Doublet","Negative")[object$sampleID],levels=c("PBMC","Lung"))
object$volume <- factor(c("50µl","50µl","25µl","25µl","50µl","50µl","Doublet","Negative")[object$sampleID], levels=c("50µl","25µl","Doublet","Negative"))
object$dilution <- factor(c("DF1","DF4","DF4","DF4","DF1","DF4","Doublet","Negative")[object$sampleID], levels=c("DF1","DF4","Doublet","Negative"))
object$cellsAtStaining <- factor(c("1000k","1000k","1000k","200k","500k","500k","Doublet","Negative")[object$sampleID], levels=c("1000k","500k","200k","Doublet","Negative"))
```

## Filter dead/dying cells

Based on mitochondrial reads and number of detected genes. Cutoff set to 15% MT and at least 60 expressed genes by visual inspection. The number of expressed genes is low due to the low depth of the GEX sequencing (5000 reads/cell)

```{r filter}
DefaultAssay(object) <- "RNA.kallisto"
object[["percent.mt"]] <- PercentageFeatureSet(object, pattern = "^MT-")

cutoff.percent.mt <- 15
cutoff.nFeature <- 60
FeatureScatter(object, feature1 = "percent.mt", feature2 = "nFeature_RNA.kallisto") + ggplot2::geom_vline(xintercept=cutoff.percent.mt,linetype="dashed") + scale_y_continuous(trans="log10") + ggplot2::geom_hline(yintercept=cutoff.nFeature,linetype="dashed")

table(object[["percent.mt"]]<cutoff.percent.mt & object[["nFeature_RNA.kallisto"]]>cutoff.nFeature, object$sampleID)

object <- subset(object, subset = percent.mt <= cutoff.percent.mt & nFeature_RNA.kallisto > cutoff.nFeature)
```

## Filter doublets

Doublet rate calculated from: https://satijalab.org/costpercell at 18,000 cells and 6 multiplexed samples (3.27% after HTO demux)

```{r}
library(scDblFinder)
DefaultAssay(object) <- "RNA.kallisto"

sce <- as.SingleCellExperiment(object)
sce <- scDblFinder(sce, dbr=0.0327, samples="sampleID")

table(sce$scDblFinder.class, sce$sampleID)

identical(colnames(object),colnames(sce))

object$scDblFinder.class <- sce$scDblFinder.class
object$scDblFinder.score <- sce$scDblFinder.score
object$scDblFinder.ratio <- sce$scDblFinder.ratio
object$scDblFinder.weighted <- sce$scDblFinder.weighted
rm(sce)

object$scDblFinder.class <- factor(object$scDblFinder.class, levels=c("singlet","doublet"))
  
FeatureScatter(object, feature1 = "nCount_RNA.kallisto", feature2 = "nFeature_RNA.kallisto", group.by="scDblFinder.class", cols=c(alpha("blue",0.01),alpha("red",0.5))) + scale_y_continuous(trans="log10") + scale_x_continuous(trans="log10")

object <- subset(object, subset=scDblFinder.class=="singlet")
```


## Make DSB normalization

Normalize ADT counts using the "Denoised and Scaled by Background" method (https://mattpm.github.io/dsb/). This method utilizes the ADT signal in non-cell-containing droplets and signal form isotype controls to make normalized values. The normalized values correspond to number of standard deviations from the background median.

```{r dsbnorm, fig.height=8, fig.width=10}
#devtools::install_github(repo = 'MattPM/dsb')
library(dsb)

neg_adt_matrix = GetAssayData(object.empty, assay = "ADT.kallisto", slot = 'counts') %>% as.matrix()
positive_adt_matrix = GetAssayData(object, assay = "ADT.kallisto", slot = 'counts') %>% as.matrix()
isotypes = c("IgG2A","IgG1")

normalized_matrix_individual <- positive_adt_matrix

for(i in c(1:6)){
  curCells <- which(object$sampleID == i)
  normalized_matrix_individual[,curCells] = DSBNormalizeProtein(cell_protein_matrix = positive_adt_matrix[,curCells],
                                        empty_drop_matrix = neg_adt_matrix,
                                        use.isotype.control = TRUE,
                                        isotype.control.name.vec = isotypes)
}

object = SetAssayData(object=object, assay="ADT.kallisto", slot="data", new.data = normalized_matrix_individual)

plotData <- as.data.frame(normalized_matrix_individual)
plotData$Marker <- rownames(plotData)
plotData <- reshape2::melt(plotData, id.vars=c("Marker"))
colnames(plotData) <- c("Marker","Cell","value")
ggplot(plotData,aes(x=value,y=Marker,fill=object$group[Cell])) + ggridges::geom_density_ridges(alpha=0.5, scale=3, rel_min_height = 0.01) + xlim(-5,30) + facet_grid(~object$group[Cell])

```

## Preprocess data

Run standard Seurat preprocessing on RNA modality.

```{r preprocessRNA}

object <- NormalizeData(object)
object <- FindVariableFeatures(object)
object <- ScaleData(object)
object <- RunPCA(object, verbose = FALSE)
object <- FindNeighbors(object, dims = 1:30)
object <- FindClusters(object, resolution = 0.3)
object <- RunTSNE(object,dims=1:30)
object <- RunUMAP(object,dims=1:30)

DimPlot(object, group.by="tissue", reduction="tsne")
DimPlot(object, group.by="group", reduction="tsne")
DimPlot(object, label=TRUE, reduction="tsne")

```


## Label and merge clusters into "superclusters"

To make the poulations less complex and for easier visualization, we merged the clusters into major cell types.

```{r superclustering, fig.width=8, fig.height=7}
## LINEAGE MARKERS FOR CLUSTERLABELLING
ADTplots <- FeaturePlot(object, features=c("adt_CD1a","adt_CD3","adt_CD4","adt_CD8","adt_CD11b","adt_CD14","adt_CD19","adt_CD56","adt_HLA-DR","adt_EpCAM"), label=TRUE, reduction="tsne", min.cutoff=4, col=c("lightgrey","red"), combine=FALSE)
ADTplots <- lapply(ADTplots,FUN=function(x)x+NoLegend())
CombinePlots(ADTplots,ncol=5)

library("dplyr")
cluster.markers <- FindAllMarkers(object, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top5 <- cluster.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
DoHeatmap(object, features = top5$gene, slot = "data") + NoLegend() + ggplot2::scale_fill_gradientn(colors = c("blue", "white", "red"))
```

Combine clusters into superclusters (corresponding roughly to cell types)

```{r}
## COMBINE CLUSTERS TO SUPERCLUSTERS
superclusters <- c("0"="MO/MØ/DC",
                   "1"="T/NK cells",
                   "2"="T/NK cells",
                   "3"="T/NK cells",
                   "4"="B/Plasma cells",
                   "5"="T/NK cells",
                   "6"="B/Plasma cells",
                   "7"="MO/MØ/DC",
                   "8"="MO/MØ/DC",
                   "9"="B/Plasma cells",
                   "10"="MO/MØ/DC",
                   "11"="T/NK cells",
                   "12"="Other",
                   "13"="Other",
                   "14"="B/Plasma cells",
                   "15"="Other",
                   "16"="MO/MØ/DC")

object$supercluster <- factor(superclusters[as.character(Idents(object))],levels=superclusters.levels)

DimPlot(object, group.by="supercluster", reduction="tsne")
DimPlot(object, group.by="supercluster", reduction="umap")
table <- t(table(object$supercluster,object$group))
round(table/rowSums(table)*100,2)
```

## Make fine-grained clustering

```{r fineClusters}
object <- FindClusters(object, resolution = 1.2)

object$fineCluster <- Idents(object)

DimPlot(object, reduction = "tsne", label = TRUE) + NoLegend()
DimPlot(object, reduction = "umap", label = TRUE) + NoLegend()
```

## Downsampling

To better compare UMI counts between samples, we downsample within each tissue so that each sample have the same number of cells from each fine grained cluster. Most clusters based on different tissue still have a few cells assigned to it. To avoid "expressing" clusters to be based on less than 10 cells, we remove cells belonging to clusters that have less than 10 cells within a given tissue after equal downsampling.

```{r downsample}
data.fineClusters <- FetchData(object, vars=c("tissue","sampleID","fineCluster"))

## Get number of cells in each cluster for each sample
data.fineClusters <- data.fineClusters %>% group_by(tissue, sampleID, fineCluster) %>% summarize(count=length(sampleID))

## Calculate minimum cells within each cluster for each tissue and remove clusters that are represented by less than 10 cells winin a sample of a given tissue
data.fineClusters.min <- data.fineClusters %>% group_by(tissue, fineCluster) %>% summarize(count.min=min(count)) %>% filter(count.min >= 10)

downsampled.index <- c()
for(i in 1:nrow(data.fineClusters.min)){
  curCluster <- data.fineClusters.min[i,]
  
  samples <- unique(object$sampleID[object$tissue == curCluster$tissue])
  
  for(j in seq_along(samples)){
    sample <- samples[j]
    
    ## Extract indices for cells in current sample and cluster
    cellsInCluster <- which(object$sampleID == sample & object$fineCluster == curCluster$fineCluster)
    
    ## Get random subsample according to minimum for the current cluster and tissue
    addToIndex <- cellsInCluster[sample(length(cellsInCluster),curCluster$count.min)]
    
    downsampled.index <- append(downsampled.index,addToIndex)
  }
}

table(object$fineCluster,object$sampleID)
table(object$fineCluster[downsampled.index],object$sampleID[downsampled.index])

object.downsampled <- subset(object, cells=downsampled.index[sample(length(downsampled.index),length(downsampled.index))])
table(object.downsampled$sampleID, object.downsampled$orig.ident)

DimPlot(object.downsampled, split.by="sampleID", reduction="tsne", label=TRUE , ncol=4) + NoLegend()
```

## Determine gating values for each marker

While DSB normalization should center negative populations around 0, their variance makes it necessary to make small adjustments per marker to split between negative and positive cells. This done by visual inspection.

```{r}
abpanel <- data.frame(readxl::read_excel(data.abpanel))
rownames(abpanel) <- abpanel$Marker
abpanel$marker <- abpanel$Marker
abpanel$DSB.cutoff <- 7

## Setting gating thresholds based on DSB normalized values by visual inspection
abpanel[c("EpCAM"),"DSB.cutoff"] <- 15
abpanel[c("CD2","CD31"),"DSB.cutoff"] <- 12
abpanel[c("CD26","CD3","CD39","CD11b"),"DSB.cutoff"] <- 8
abpanel[c("CD127","CD1a","CD223","CD25","CD62L"),"DSB.cutoff"] <- 6
abpanel[c("CD24","CD30","TCRab","CD70"),"DSB.cutoff"] <- 5.5
abpanel[c("CD134","CD138","CD152","CD194","IgG1","IgG2A","CD28","CD80"),"DSB.cutoff"] <- 5
abpanel[c("CD366"),"DSB.cutoff"] <- 4.5
abpanel[c("TCRgd","CD183","CD197"),"DSB.cutoff"] <- 4
abpanel[c("CD86","CD279"),"DSB.cutoff"] <- 3.5
abpanel[c("TCRgd"),"DSB.cutoff"] <- 3

data.ADT.DSB <- GetAssayData(object.downsampled, assay="ADT.kallisto", slot="data")
data.meta <- FetchData(object.downsampled, vars=c("fineCluster","supercluster","dilution","tissue"))

data.ADT.DSB.pivot <- as.data.frame(data.ADT.DSB) %>% 
                          mutate(marker=rownames(.)) %>% 
                          pivot_longer(-marker) %>% 
                          filter(data.meta[name,"dilution"]=="DF1")

## Calculate percent positive (within each supercluster)
data.ADT.DSB.pivot.positive.bySupercluster <- data.ADT.DSB.pivot %>% group_by(tissue=data.meta[name,"tissue"], supercluster=data.meta[name,"supercluster"], marker) %>% summarize(positive=sum(value >= abpanel[marker,"DSB.cutoff"]), count=length(name)) %>% mutate(pct=round(positive/count*100,2))

## Calculate percent positive (within each tissue and supercluster)
data.ADT.DSB.pivot.positive.byTissue <- data.ADT.DSB.pivot %>% group_by(tissue=data.meta[name,"tissue"], marker) %>% summarize(positive=sum(value >= abpanel[marker,"DSB.cutoff"]), count=length(name)) %>% mutate(pct=round(positive/count*100,2))

## Remove negative "outliers" from the visualization as it drastically skews the axes making it hard to interpret the plots.
data.ADT.DSB.pivot.filtered <- data.ADT.DSB.pivot %>% filter(value >= -5)
```

Plot the gating values and ADT distribution within major cell types

```{r, fig.height=8, fig.width=7, message=FALSE, warning=FALSE}
p.ADT.histograms <- ggplot(data.ADT.DSB.pivot.filtered, aes(y=data.meta[name,"supercluster"], fill=data.meta[name,"supercluster"], x=value, linetype=data.meta[name,"tissue"], color=data.meta[name,"tissue"])) + 
  ggridges::geom_density_ridges(alpha=0.5, show.legend=FALSE) + 
  ## A bit of a hack to get the "right" legend symbols
  geom_point(alpha=0, aes(color=NA, linetype=NA)) + 
  geom_line(alpha=0, aes(fill=NA)) + 
  geom_vline(data=abpanel,aes(xintercept=DSB.cutoff)) + 
  geom_text(data=data.ADT.DSB.pivot.positive.bySupercluster, 
            aes(x=Inf, y=as.integer(supercluster)+(3.3-as.integer(tissue))*0.40, color=tissue, fill=NA, linetype=NA, label=paste0(round(pct,1),"%")), hjust=1, vjust=1, size=2, show.legend=FALSE) + 
  #geom_point(data=data.ADT.DSB.pivot.positive.bySupercluster, 
            #aes(x=1, y=1, fill=supercluster, color="black", linetype=21, alpha=0)) + 
  facet_wrap(~marker, scales="free_x", ncol=6) + 
  scale_fill_manual(values=color.supercluster) + 
  scale_color_manual(values=sapply(color.tissue,function(x)alpha(x,0.5))) + 
  scale_linetype_manual(values=c("Lung"="dashed","PBMC"="solid")) + 
  scale_y_discrete(expand = c(0,0,0.65,0)) + 
  labs(fill="Cell type", linetype="Tissue", color="Tissue") +
  guides(fill=guide_legend(ncol=2, override.aes=list(alpha=1, pch=21, size=3), reverse=TRUE),
         color=guide_legend(override.aes=list(linetype=c("solid","dashed"), pch=NA, alpha=1, size=0.75),
                            keywidth=unit(8,"mm"), reverse=TRUE), 
         linetype=FALSE) + 
  theme(legend.position=c(1,0), 
        legend.justification=c(1,0),
        legend.direction="horizontal",
        legend.key.size=unit(3,"mm"),
        strip.text=element_text(vjust=-1),
        panel.spacing.y=unit(0,"lines"),
        axis.title=element_blank())


png(file=file.path(outdir,"Supplementary Figure S1.png"), width=figure.width.full, height=10, units = figure.unit, res=figure.resolution, antialias=figure.antialias)

  p.ADT.histograms

dev.off()

p.ADT.histograms
```

## Find "ADT expressing cluster" for each tissue

We have tried different approaches. But the one that came closest to manual inspecition was using the cluster that had the highest value at the 90th percentile. To make it less sensitive to outliers within very small clusters, we use the median value if the 90th percentile "rank" is less than 10 threshhold. This effectively makes sure that the value used for expression cannot be from within the top 1-3 cells within a cluster.

```{r ADTExpressingCluster}
## Get data from DF1 samples stained in 50µl (as this is likely to have highest signal)
ADT.matrix <- data.frame(GetAssayData(object.downsampled[,object.downsampled$volume == "50µl"], assay="ADT.kallisto", slot="counts"))
ADT.matrix <- ADT.matrix %>% mutate(marker=rownames(ADT.matrix)) %>% pivot_longer(c(-marker))

## Get annotation
cell.annotation <- FetchData(object.downsampled, vars=c("tissue", "fineCluster", "dilution"))

## Calculate summary statistics for each fineCluster
ADT.matrix.agg <- ADT.matrix %>% group_by(tissue=cell.annotation[name,"tissue"], fineCluster=cell.annotation[name,"fineCluster"], marker) %>% summarise(nCells=length(value), UMIsum=sum(value), nth=nth(value), median=median(value), f90=quantile(value,probs=0.9))

marker.sum <- ADT.matrix %>% group_by(tissue=cell.annotation[name,"tissue"], dilution=cell.annotation[name,"dilution"], marker) %>% summarise(UMItotal=sum(value)) %>% filter(dilution=="DF1")

## Remove dilution factor column (necessary for joining)
marker.sum <- marker.sum[,-2]

## Determine which cluster has "highest expression" based on the highest nth value
Cluster.max <- ADT.matrix.agg %>% group_by(marker, tissue) %>% summarize(fineCluster=fineCluster[which.max(nth)])

ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg) %>% left_join(marker.sum) %>% left_join(abpanel, by=c("marker"="Marker")) %>% left_join(data.ADT.DSB.pivot.positive.byTissue)

write.table(ADT.matrix.aggByClusterMax,"data/markerByClusterStats.tsv")
```

## Save Seurat object

```{r save}
saveRDS(object,file="data/5P-CITE-seq_Titration_full.rds")
saveRDS(object.downsampled,file="data/5P-CITE-seq_Titration.rds")
```
Browse the archive

https://github.com/Terkild/CITE-seq_optimization