https://github.com/hadley/dplyr
Raw File
Tip revision: 8a18247f3518939390c1354629f4497d8a918992 authored by Kirill Müller on 13 March 2018, 20:12:34 UTC
Merge branch 'r-0.7.4.9001' into production
Tip revision: 8a18247
summarise.Rd
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/manip.r
\name{summarise}
\alias{summarise}
\alias{summarize}
\title{Reduces multiple values down to a single value}
\usage{
summarise(.data, ...)

summarize(.data, ...)
}
\arguments{
\item{.data}{A tbl. All main verbs are S3 generics and provide methods
for \code{\link[=tbl_df]{tbl_df()}}, \code{\link[dtplyr:tbl_dt]{dtplyr::tbl_dt()}} and \code{\link[dbplyr:tbl_dbi]{dbplyr::tbl_dbi()}}.}

\item{...}{Name-value pairs of summary functions. The name will be the
name of the variable in the result. The value should be an expression
that returns a single value like \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.

These arguments are automatically \link[rlang:quo]{quoted} and
\link[rlang:eval_tidy]{evaluated} in the context of the data
frame. They support \link[rlang:quasiquotation]{unquoting} and
splicing. See \code{vignette("programming")} for an introduction to
these concepts.}
}
\value{
An object of the same class as \code{.data}. One grouping level will
be dropped.
}
\description{
\code{summarise()} is typically used on grouped data created by \code{\link[=group_by]{group_by()}}.
The output will have one row for each group.
}
\section{Useful functions}{

\itemize{
\item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}}
\item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}}
\item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=quantile]{quantile()}}
\item Position: \code{\link[=first]{first()}}, \code{\link[=last]{last()}}, \code{\link[=nth]{nth()}},
\item Count: \code{\link[=n]{n()}}, \code{\link[=n_distinct]{n_distinct()}}
\item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}}
}
}

\section{Backend variations}{


Data frames are the only backend that supports creating a variable and
using it in the same summary. See examples for more details.
}

\section{Tidy data}{

When applied to a data frame, row names are silently dropped. To preserve,
convert to an explicit variable with \code{\link[tibble:rownames_to_column]{tibble::rownames_to_column()}}.
}

\examples{
# A summary applied to ungrouped tbl returns a single row
mtcars \%>\%
  summarise(mean = mean(disp), n = n())

# Usually, you'll want to group first
mtcars \%>\%
  group_by(cyl) \%>\%
  summarise(mean = mean(disp), n = n())

# Each summary call removes one grouping level (since that group
# is now just a single row)
mtcars \%>\%
  group_by(cyl, vs) \%>\%
  summarise(cyl_n = n()) \%>\%
  group_vars()

# Note that with data frames, newly created summaries immediately
# overwrite existing variables
mtcars \%>\%
  group_by(cyl) \%>\%
  summarise(disp = mean(disp), sd = sd(disp))


# summarise() supports quasiquotation. You can unquote raw
# expressions or quosures:
var <- quo(mean(cyl))
summarise(mtcars, !!var)
}
\seealso{
Other single table verbs: \code{\link{arrange}},
  \code{\link{filter}}, \code{\link{mutate}},
  \code{\link{select}}, \code{\link{slice}}
}
\concept{single table verbs}
back to top