https://github.com/satijalab/seurat
Raw File
Tip revision: 49a1be0427f2f26a531eb468ba93eeb18d8a2edb authored by satijalab on 13 December 2019, 20:42:25 UTC
Merge pull request #2416 from satijalab/develop
Tip revision: 49a1be0
FindMarkers.Rd
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generics.R, R/differential_expression.R
\name{FindMarkers}
\alias{FindMarkers}
\alias{FindMarkersNode}
\alias{FindMarkers.default}
\alias{FindMarkers.Seurat}
\title{Gene expression markers of identity classes}
\usage{
FindMarkers(object, ...)

\method{FindMarkers}{default}(
  object,
  slot = "data",
  counts = numeric(),
  cells.1 = NULL,
  cells.2 = NULL,
  features = NULL,
  reduction = NULL,
  logfc.threshold = 0.25,
  test.use = "wilcox",
  min.pct = 0.1,
  min.diff.pct = -Inf,
  verbose = TRUE,
  only.pos = FALSE,
  max.cells.per.ident = Inf,
  random.seed = 1,
  latent.vars = NULL,
  min.cells.feature = 3,
  min.cells.group = 3,
  pseudocount.use = 1,
  ...
)

\method{FindMarkers}{Seurat}(
  object,
  ident.1 = NULL,
  ident.2 = NULL,
  group.by = NULL,
  subset.ident = NULL,
  assay = NULL,
  slot = "data",
  reduction = NULL,
  features = NULL,
  logfc.threshold = 0.25,
  test.use = "wilcox",
  min.pct = 0.1,
  min.diff.pct = -Inf,
  verbose = TRUE,
  only.pos = FALSE,
  max.cells.per.ident = Inf,
  random.seed = 1,
  latent.vars = NULL,
  min.cells.feature = 3,
  min.cells.group = 3,
  pseudocount.use = 1,
  ...
)
}
\arguments{
\item{object}{An object}

\item{...}{Arguments passed to other methods and to specific DE methods}

\item{slot}{Slot to pull data from; note that if \code{test.use} is "negbinom", "poisson", or "DESeq2",
\code{slot} will be set to "counts"}

\item{counts}{Count matrix if using scale.data for DE tests. This is used for
computing pct.1 and pct.2 and for filtering features based on fraction
expressing}

\item{cells.1}{Vector of cell names belonging to group 1}

\item{cells.2}{Vector of cell names belonging to group 2}

\item{features}{Genes to test. Default is to use all genes}

\item{reduction}{Reduction to use in differential expression testing - will test for DE on cell embeddings}

\item{logfc.threshold}{Limit testing to genes which show, on average, at least
X-fold difference (log-scale) between the two groups of cells. Default is 0.25
Increasing logfc.threshold speeds up the function, but can miss weaker signals.}

\item{test.use}{Denotes which test to use. Available options are:
\itemize{
 \item{"wilcox"} : Identifies differentially expressed genes between two
 groups of cells using a Wilcoxon Rank Sum test (default)
 \item{"bimod"} : Likelihood-ratio test for single cell gene expression,
 (McDavid et al., Bioinformatics, 2013)
 \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis.
 For each gene, evaluates (using AUC) a classifier built on that gene alone,
 to classify between two groups of cells. An AUC value of 1 means that
 expression values for this gene alone can perfectly classify the two
 groupings (i.e. Each of the cells in cells.1 exhibit a higher level than
 each of the cells in cells.2). An AUC value of 0 also means there is perfect
 classification, but in the other direction. A value of 0.5 implies that
 the gene has no predictive power to classify the two groups. Returns a
 'predictive power' (abs(AUC-0.5) * 2) ranked matrix of putative differentially
 expressed genes.
 \item{"t"} : Identify differentially expressed genes between two groups of
 cells using the Student's t-test.
 \item{"negbinom"} : Identifies differentially expressed genes between two
  groups of cells using a negative binomial generalized linear model.
  Use only for UMI-based datasets
 \item{"poisson"} : Identifies differentially expressed genes between two
  groups of cells using a poisson generalized linear model.
  Use only for UMI-based datasets
 \item{"LR"} : Uses a logistic regression framework to determine differentially
 expressed genes. Constructs a logistic regression model predicting group
 membership based on each feature individually and compares this to a null
 model with a likelihood ratio test.
 \item{"MAST"} : Identifies differentially expressed genes between two groups
 of cells using a hurdle model tailored to scRNA-seq data. Utilizes the MAST
 package to run the DE testing.
 \item{"DESeq2"} : Identifies differentially expressed genes between two groups
 of cells based on a model using DESeq2 which uses a negative binomial
 distribution (Love et al, Genome Biology, 2014).This test does not support
 pre-filtering of genes based on average difference (or percent detection rate)
 between cell groups. However, genes may be pre-filtered based on their
 minimum detection rate (min.pct) across both cell groups. To use this method,
 please install DESeq2, using the instructions at
 https://bioconductor.org/packages/release/bioc/html/DESeq2.html
}}

\item{min.pct}{only test genes that are detected in a minimum fraction of
min.pct cells in either of the two populations. Meant to speed up the function
by not testing genes that are very infrequently expressed. Default is 0.1}

\item{min.diff.pct}{only test genes that show a minimum difference in the
fraction of detection between the two groups. Set to -Inf by default}

\item{verbose}{Print a progress bar once expression testing begins}

\item{only.pos}{Only return positive markers (FALSE by default)}

\item{max.cells.per.ident}{Down sample each identity class to a max number.
Default is no downsampling. Not activated by default (set to Inf)}

\item{random.seed}{Random seed for downsampling}

\item{latent.vars}{Variables to test, used only when \code{test.use} is one of
'LR', 'negbinom', 'poisson', or 'MAST'}

\item{min.cells.feature}{Minimum number of cells expressing the feature in at least one
of the two groups, currently only used for poisson and negative binomial tests}

\item{min.cells.group}{Minimum number of cells in one of the groups}

\item{pseudocount.use}{Pseudocount to add to averaged expression values when
calculating logFC. 1 by default.}

\item{ident.1}{Identity class to define markers for; pass an object of class
\code{phylo} or 'clustertree' to find markers for a node in a cluster tree;
passing 'clustertree' requires \code{\link{BuildClusterTree}} to have been run}

\item{ident.2}{A second identity class for comparison; if \code{NULL},
use all other cells for comparison; if an object of class \code{phylo} or
'clustertree' is passed to \code{ident.1}, must pass a node to find markers for}

\item{group.by}{Regroup cells into a different identity class prior to performing differential expression (see example)}

\item{subset.ident}{Subset a particular identity class prior to regrouping. Only relevant if group.by is set (see example)}

\item{assay}{Assay to use in differential expression testing}
}
\value{
data.frame with a ranked list of putative markers as rows, and associated
statistics as columns (p-values, ROC score, etc., depending on the test used (\code{test.use})). The following columns are always present:
\itemize{
  \item \code{avg_logFC}: log fold-chage of the average expression between the two groups. Positive values indicate that the gene is more highly expressed in the first group
  \item \code{pct.1}: The percentage of cells where the gene is detected in the first group
  \item \code{pct.2}: The percentage of cells where the gene is detected in the second group
  \item \code{p_val_adj}: Adjusted p-value, based on bonferroni correction using all genes in the dataset
}
}
\description{
Finds markers (differentially expressed genes) for identity classes
}
\details{
p-value adjustment is performed using bonferroni correction based on
the total number of genes in the dataset. Other correction methods are not
recommended, as Seurat pre-filters genes using the arguments above, reducing
the number of tests performed. Lastly, as Aaron Lun has pointed out, p-values
should be interpreted cautiously, as the genes used for clustering are the
same genes tested for differential expression.
}
\examples{
# Find markers for cluster 2
markers <- FindMarkers(object = pbmc_small, ident.1 = 2)
head(x = markers)

# Take all cells in cluster 2, and find markers that separate cells in the 'g1' group (metadata
# variable 'group')
markers <- FindMarkers(pbmc_small, ident.1 = "g1", group.by = 'groups', subset.ident = "2")
head(x = markers)

# Pass 'clustertree' or an object of class phylo to ident.1 and
# a node to ident.2 as a replacement for FindMarkersNode
pbmc_small <- BuildClusterTree(object = pbmc_small)
markers <- FindMarkers(object = pbmc_small, ident.1 = 'clustertree', ident.2 = 5)
head(x = markers)

}
\references{
McDavid A, Finak G, Chattopadyay PK, et al. Data exploration,
quality control and testing in single-cell qPCR-based gene expression experiments.
Bioinformatics. 2013;29(4):461-467. doi:10.1093/bioinformatics/bts714

Trapnell C, et al. The dynamics and regulators of cell fate
decisions are revealed by pseudotemporal ordering of single cells. Nature
Biotechnology volume 32, pages 381-386 (2014)

Andrew McDavid, Greg Finak and Masanao Yajima (2017). MAST: Model-based
Analysis of Single Cell Transcriptomics. R package version 1.2.1.
https://github.com/RGLab/MAST/

Love MI, Huber W and Anders S (2014). "Moderated estimation of
fold change and dispersion for RNA-seq data with DESeq2." Genome Biology.
https://bioconductor.org/packages/release/bioc/html/DESeq2.html
}
back to top