Revision 3aa06241783d83bddafcab6454692454646af66d authored by Marek Gagolewski on 07 April 2017, 12:08:03 UTC, committed by cran-robot on 07 April 2017, 12:08:03 UTC
1 parent 87b62a0
Raw File
stri_match.Rd
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/search_match_4.R
\name{stri_match_all}
\alias{stri_match_all}
\alias{stri_match_first}
\alias{stri_match_last}
\alias{stri_match}
\alias{stri_match_all_regex}
\alias{stri_match_first_regex}
\alias{stri_match_last_regex}
\title{Extract Regex Pattern Matches, Together with Capture Groups}
\usage{
stri_match_all(str, ..., regex)

stri_match_first(str, ..., regex)

stri_match_last(str, ..., regex)

stri_match(str, ..., regex, mode = c("first", "all", "last"))

stri_match_all_regex(str, pattern, omit_no_match = FALSE,
  cg_missing = NA_character_, ..., opts_regex = NULL)

stri_match_first_regex(str, pattern, cg_missing = NA_character_, ...,
  opts_regex = NULL)

stri_match_last_regex(str, pattern, cg_missing = NA_character_, ...,
  opts_regex = NULL)
}
\arguments{
\item{str}{character vector with strings to search in}

\item{...}{supplementary arguments passed to the underlying functions,
including additional settings for \code{opts_regex}}

\item{mode}{single string;
one of: \code{"first"} (the default), \code{"all"}, \code{"last"}}

\item{pattern, regex}{character vector defining regex patterns to search for;
for more details refer to \link{stringi-search-regex}}

\item{omit_no_match}{single logical value; if \code{FALSE},
then a row with missing values will indicate that there was no match;
\code{stri_match_all_*} only}

\item{cg_missing}{single string to be used if a capture group match
is unavailable}

\item{opts_regex}{a named list with \pkg{ICU} Regex settings
as generated with \code{\link{stri_opts_regex}}; \code{NULL}
for default settings;}
}
\value{
For \code{stri_match_all*},
a list of character matrices is returned. Each list element
represents the results of a separate search scenario.

For \code{stri_match_first*} and \code{stri_match_last*},
on the other hand, a character matrix is returned.
Here the search results are provided as separate rows.

The first matrix column gives the whole match. The second one corresponds to
the first capture group, the third -- the second capture group, and so on.
}
\description{
These functions extract substrings of \code{str} that
match a given regex \code{pattern}. Additionally, they extract matches
to every \emph{capture group}, i.e. to all the subpatterns given
in round parentheses.
}
\details{
Vectorized over \code{str} and \code{pattern}.

If no pattern match is detected and \code{omit_no_match=FALSE},
then \code{NA}s are included in the resulting matrix (matrices), see Examples.

By the way, \pkg{ICU} regex engine currently does not support named capture groups.

\code{stri_match}, \code{stri_match_all}, \code{stri_match_first},
and \code{stri_match_last} are convenience functions.
They just call \code{stri_match_*_regex} -- they have been
provided for consistency with other string searching functions' wrappers,
cf. e.g. \code{\link{stri_extract}}.
}
\examples{
stri_match_all_regex("breakfast=eggs, lunch=pizza, dessert=icecream",
   "(\\\\w+)=(\\\\w+)")
stri_match_all_regex(c("breakfast=eggs", "lunch=pizza", "no food here"),
   "(\\\\w+)=(\\\\w+)")
stri_match_all_regex(c("breakfast=eggs;lunch=pizza",
   "breakfast=bacon;lunch=spaghetti", "no food here"),
   "(\\\\w+)=(\\\\w+)")
stri_match_first_regex(c("breakfast=eggs;lunch=pizza",
   "breakfast=bacon;lunch=spaghetti", "no food here"),
   "(\\\\w+)=(\\\\w+)")
stri_match_last_regex(c("breakfast=eggs;lunch=pizza",
   "breakfast=bacon;lunch=spaghetti", "no food here"),
   "(\\\\w+)=(\\\\w+)")

stri_match_first_regex(c("abcd", ":abcd", ":abcd:"), "^(:)?([^:]*)(:)?$")
stri_match_first_regex(c("abcd", ":abcd", ":abcd:"), "^(:)?([^:]*)(:)?$", cg_missing="")

# Match all the pattern of the form XYX, including overlapping matches:
stri_match_all_regex("ACAGAGACTTTAGATAGAGAAGA", "(?=(([ACGT])[ACGT]\\\\2))")[[1]][,2]
# Compare the above to:
stri_extract_all_regex("ACAGAGACTTTAGATAGAGAAGA", "([ACGT])[ACGT]\\\\1")

}
\seealso{
Other search_extract: \code{\link{stri_extract_all_boundaries}},
  \code{\link{stri_extract_all}},
  \code{\link{stringi-search}}
}
back to top