https://github.com/satijalab/seurat
Tip revision: 49a1be0427f2f26a531eb468ba93eeb18d8a2edb authored by satijalab on 13 December 2019, 20:42:25 UTC
Merge pull request #2416 from satijalab/develop
Merge pull request #2416 from satijalab/develop
Tip revision: 49a1be0
FindMarkers.Rd
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generics.R, R/differential_expression.R
\name{FindMarkers}
\alias{FindMarkers}
\alias{FindMarkersNode}
\alias{FindMarkers.default}
\alias{FindMarkers.Seurat}
\title{Gene expression markers of identity classes}
\usage{
FindMarkers(object, ...)
\method{FindMarkers}{default}(
object,
slot = "data",
counts = numeric(),
cells.1 = NULL,
cells.2 = NULL,
features = NULL,
reduction = NULL,
logfc.threshold = 0.25,
test.use = "wilcox",
min.pct = 0.1,
min.diff.pct = -Inf,
verbose = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
pseudocount.use = 1,
...
)
\method{FindMarkers}{Seurat}(
object,
ident.1 = NULL,
ident.2 = NULL,
group.by = NULL,
subset.ident = NULL,
assay = NULL,
slot = "data",
reduction = NULL,
features = NULL,
logfc.threshold = 0.25,
test.use = "wilcox",
min.pct = 0.1,
min.diff.pct = -Inf,
verbose = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
pseudocount.use = 1,
...
)
}
\arguments{
\item{object}{An object}
\item{...}{Arguments passed to other methods and to specific DE methods}
\item{slot}{Slot to pull data from; note that if \code{test.use} is "negbinom", "poisson", or "DESeq2",
\code{slot} will be set to "counts"}
\item{counts}{Count matrix if using scale.data for DE tests. This is used for
computing pct.1 and pct.2 and for filtering features based on fraction
expressing}
\item{cells.1}{Vector of cell names belonging to group 1}
\item{cells.2}{Vector of cell names belonging to group 2}
\item{features}{Genes to test. Default is to use all genes}
\item{reduction}{Reduction to use in differential expression testing - will test for DE on cell embeddings}
\item{logfc.threshold}{Limit testing to genes which show, on average, at least
X-fold difference (log-scale) between the two groups of cells. Default is 0.25
Increasing logfc.threshold speeds up the function, but can miss weaker signals.}
\item{test.use}{Denotes which test to use. Available options are:
\itemize{
\item{"wilcox"} : Identifies differentially expressed genes between two
groups of cells using a Wilcoxon Rank Sum test (default)
\item{"bimod"} : Likelihood-ratio test for single cell gene expression,
(McDavid et al., Bioinformatics, 2013)
\item{"roc"} : Identifies 'markers' of gene expression using ROC analysis.
For each gene, evaluates (using AUC) a classifier built on that gene alone,
to classify between two groups of cells. An AUC value of 1 means that
expression values for this gene alone can perfectly classify the two
groupings (i.e. Each of the cells in cells.1 exhibit a higher level than
each of the cells in cells.2). An AUC value of 0 also means there is perfect
classification, but in the other direction. A value of 0.5 implies that
the gene has no predictive power to classify the two groups. Returns a
'predictive power' (abs(AUC-0.5) * 2) ranked matrix of putative differentially
expressed genes.
\item{"t"} : Identify differentially expressed genes between two groups of
cells using the Student's t-test.
\item{"negbinom"} : Identifies differentially expressed genes between two
groups of cells using a negative binomial generalized linear model.
Use only for UMI-based datasets
\item{"poisson"} : Identifies differentially expressed genes between two
groups of cells using a poisson generalized linear model.
Use only for UMI-based datasets
\item{"LR"} : Uses a logistic regression framework to determine differentially
expressed genes. Constructs a logistic regression model predicting group
membership based on each feature individually and compares this to a null
model with a likelihood ratio test.
\item{"MAST"} : Identifies differentially expressed genes between two groups
of cells using a hurdle model tailored to scRNA-seq data. Utilizes the MAST
package to run the DE testing.
\item{"DESeq2"} : Identifies differentially expressed genes between two groups
of cells based on a model using DESeq2 which uses a negative binomial
distribution (Love et al, Genome Biology, 2014).This test does not support
pre-filtering of genes based on average difference (or percent detection rate)
between cell groups. However, genes may be pre-filtered based on their
minimum detection rate (min.pct) across both cell groups. To use this method,
please install DESeq2, using the instructions at
https://bioconductor.org/packages/release/bioc/html/DESeq2.html
}}
\item{min.pct}{only test genes that are detected in a minimum fraction of
min.pct cells in either of the two populations. Meant to speed up the function
by not testing genes that are very infrequently expressed. Default is 0.1}
\item{min.diff.pct}{only test genes that show a minimum difference in the
fraction of detection between the two groups. Set to -Inf by default}
\item{verbose}{Print a progress bar once expression testing begins}
\item{only.pos}{Only return positive markers (FALSE by default)}
\item{max.cells.per.ident}{Down sample each identity class to a max number.
Default is no downsampling. Not activated by default (set to Inf)}
\item{random.seed}{Random seed for downsampling}
\item{latent.vars}{Variables to test, used only when \code{test.use} is one of
'LR', 'negbinom', 'poisson', or 'MAST'}
\item{min.cells.feature}{Minimum number of cells expressing the feature in at least one
of the two groups, currently only used for poisson and negative binomial tests}
\item{min.cells.group}{Minimum number of cells in one of the groups}
\item{pseudocount.use}{Pseudocount to add to averaged expression values when
calculating logFC. 1 by default.}
\item{ident.1}{Identity class to define markers for; pass an object of class
\code{phylo} or 'clustertree' to find markers for a node in a cluster tree;
passing 'clustertree' requires \code{\link{BuildClusterTree}} to have been run}
\item{ident.2}{A second identity class for comparison; if \code{NULL},
use all other cells for comparison; if an object of class \code{phylo} or
'clustertree' is passed to \code{ident.1}, must pass a node to find markers for}
\item{group.by}{Regroup cells into a different identity class prior to performing differential expression (see example)}
\item{subset.ident}{Subset a particular identity class prior to regrouping. Only relevant if group.by is set (see example)}
\item{assay}{Assay to use in differential expression testing}
}
\value{
data.frame with a ranked list of putative markers as rows, and associated
statistics as columns (p-values, ROC score, etc., depending on the test used (\code{test.use})). The following columns are always present:
\itemize{
\item \code{avg_logFC}: log fold-chage of the average expression between the two groups. Positive values indicate that the gene is more highly expressed in the first group
\item \code{pct.1}: The percentage of cells where the gene is detected in the first group
\item \code{pct.2}: The percentage of cells where the gene is detected in the second group
\item \code{p_val_adj}: Adjusted p-value, based on bonferroni correction using all genes in the dataset
}
}
\description{
Finds markers (differentially expressed genes) for identity classes
}
\details{
p-value adjustment is performed using bonferroni correction based on
the total number of genes in the dataset. Other correction methods are not
recommended, as Seurat pre-filters genes using the arguments above, reducing
the number of tests performed. Lastly, as Aaron Lun has pointed out, p-values
should be interpreted cautiously, as the genes used for clustering are the
same genes tested for differential expression.
}
\examples{
# Find markers for cluster 2
markers <- FindMarkers(object = pbmc_small, ident.1 = 2)
head(x = markers)
# Take all cells in cluster 2, and find markers that separate cells in the 'g1' group (metadata
# variable 'group')
markers <- FindMarkers(pbmc_small, ident.1 = "g1", group.by = 'groups', subset.ident = "2")
head(x = markers)
# Pass 'clustertree' or an object of class phylo to ident.1 and
# a node to ident.2 as a replacement for FindMarkersNode
pbmc_small <- BuildClusterTree(object = pbmc_small)
markers <- FindMarkers(object = pbmc_small, ident.1 = 'clustertree', ident.2 = 5)
head(x = markers)
}
\references{
McDavid A, Finak G, Chattopadyay PK, et al. Data exploration,
quality control and testing in single-cell qPCR-based gene expression experiments.
Bioinformatics. 2013;29(4):461-467. doi:10.1093/bioinformatics/bts714
Trapnell C, et al. The dynamics and regulators of cell fate
decisions are revealed by pseudotemporal ordering of single cells. Nature
Biotechnology volume 32, pages 381-386 (2014)
Andrew McDavid, Greg Finak and Masanao Yajima (2017). MAST: Model-based
Analysis of Single Cell Transcriptomics. R package version 1.2.1.
https://github.com/RGLab/MAST/
Love MI, Huber W and Anders S (2014). "Moderated estimation of
fold change and dispersion for RNA-seq data with DESeq2." Genome Biology.
https://bioconductor.org/packages/release/bioc/html/DESeq2.html
}