Content - adf9a7e3c38c90b7d302b0d8a5592e0677c65255 - 86296d0/mutate-joins.Rd

mutate-joins.Rd
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/join.r
\name{mutate-joins}
\alias{mutate-joins}
\alias{join}
\alias{join.data.frame}
\alias{inner_join}
\alias{inner_join.data.frame}
\alias{left_join}
\alias{left_join.data.frame}
\alias{right_join}
\alias{right_join.data.frame}
\alias{full_join}
\alias{full_join.data.frame}
\title{Mutating joins}
\usage{
inner_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)

\method{inner_join}{data.frame}(
  x,
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  na_matches = c("na", "never")
)

left_join(
  x,
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = FALSE
)

\method{left_join}{data.frame}(
  x,
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = FALSE,
  na_matches = c("na", "never")
)

right_join(
  x,
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = FALSE
)

\method{right_join}{data.frame}(
  x,
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = FALSE,
  na_matches = c("na", "never")
)

full_join(
  x,
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = FALSE
)

\method{full_join}{data.frame}(
  x,
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = FALSE,
  na_matches = c("na", "never")
)
}
\arguments{
\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{by}{A character vector of variables to join by.

If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so that you
can check they're correct; suppress the message by supplying \code{by} explicitly.

To join by different variables on \code{x} and \code{y}, use a named vector.
For example, \code{by = c("a" = "b")} will match \code{x$a} to \code{y$b}.

To join by multiple variables, use a vector with length > 1.
For example, \code{by = c("a", "b")} will match \code{x$a} to \code{y$a} and \code{x$b} to
\code{y$b}. Use a named vector to match different variables in \code{x} and \code{y}.
For example, \code{by = c("a" = "b", "c" = "d")} will match \code{x$a} to \code{y$b} and
\code{x$c} to \code{y$d}.

To perform a cross-join, generating all combinations of \code{x} and \code{y},
use \code{by = character()}.}

\item{copy}{If \code{x} and \code{y} are not from the same data source,
and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
same src as \code{x}.  This allows you to join tables across srcs, but
it is a potentially expensive operation so you must opt into it.}

\item{suffix}{If there are non-joined duplicate variables in \code{x} and
\code{y}, these suffixes will be added to the output to disambiguate them.
Should be a character vector of length 2.}

\item{...}{Other parameters passed onto methods.}

\item{na_matches}{Should \code{NA} and \code{NaN} values match one another?

The default, \code{"na"}, treats two \code{NA} or \code{NaN} values as equal, like
\code{\%in\%}, \code{\link[=match]{match()}}, \code{\link[=merge]{merge()}}.

Use \code{"never"} to always treat two \code{NA} or \code{NaN} values as different, like
joins for database sources, similarly to \code{merge(incomparables = FALSE)}.}

\item{keep}{Should the join keys from both \code{x} and \code{y} be preserved in the
output? Only applies to \code{nest_join()}, \code{left_join()}, \code{right_join()}, and
\code{full_join()}.}
}
\value{
An object of the same type as \code{x}. The order of the rows and columns of \code{x}
is preserved as much as possible. The output has the following properties:
\itemize{
\item For \code{inner_join()}, a subset of \code{x} rows.
For \code{left_join()}, all \code{x} rows.
For \code{right_join()}, a subset of \code{x} rows, followed by unmatched \code{y} rows.
For \code{full_join()}, all \code{x} rows, followed by unmatched \code{y} rows.
\item For all joins, rows will be duplicated if one or more rows in \code{x} matches
multiple rows in \code{y}.
\item Output columns include all \code{x} columns and all \code{y} columns. If columns in
\code{x} and \code{y} have the same name (and aren't included in \code{by}), \code{suffix}es are
added to disambiguate.
\item Output columns included in \code{by} are coerced to common type across
\code{x} and \code{y}.
\item Groups are taken from \code{x}.
}
}
\description{
The mutating joins add columns from \code{y} to \code{x}, matching rows based on the
keys:
\itemize{
\item \code{inner_join()}: includes all rows in \code{x} and \code{y}.
\item \code{left_join()}: includes all rows in \code{x}.
\item \code{right_join()}: includes all rows in \code{y}.
\item \code{full_join()}: includes all rows in \code{x} or \code{y}.
}

If a row in \code{x} matches multiple rows in \code{y}, all the rows in \code{y} will be returned
once for each matching row in \code{x}.
}
\section{Methods}{

These functions are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
\item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
\item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
\item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
}
}

\examples{
band_members \%>\% inner_join(band_instruments)
band_members \%>\% left_join(band_instruments)
band_members \%>\% right_join(band_instruments)
band_members \%>\% full_join(band_instruments)

# To suppress the message about joining variables, supply `by`
band_members \%>\% inner_join(band_instruments, by = "name")
# This is good practice in production code

# Use a named `by` if the join variables have different names
band_members \%>\% full_join(band_instruments2, by = c("name" = "artist"))
# By default, the join keys from `x` and `y` are coalesced in the output; use
# `keep = TRUE` to keep the join keys from both `x` and `y`
band_members \%>\%
  full_join(band_instruments2, by = c("name" = "artist"), keep = TRUE)

# If a row in `x` matches multiple rows in `y`, all the rows in `y` will be
# returned once for each matching row in `x`
df1 <- tibble(x = 1:3)
df2 <- tibble(x = c(1, 1, 2), y = c("first", "second", "third"))
df1 \%>\% left_join(df2)

# By default, NAs match other NAs so that there are two
# rows in the output of this join:
df1 <- data.frame(x = c(1, NA), y = 2)
df2 <- data.frame(x = c(1, NA), z = 3)
left_join(df1, df2)

# You can optionally request that NAs don't match, giving a
# a result that more closely resembles SQL joins
left_join(df1, df2, na_matches = "never")
}
\seealso{
Other joins: 
\code{\link{filter-joins}},
\code{\link{nest_join}()}
}
\concept{joins}