https://github.com/cran/RecordLinkage
Raw File
Tip revision: b32452149857b15849e9c82fb81e812df6e921fc authored by Murat Sariyar on 08 November 2022, 13:10:15 UTC
version 0.4-12.4
Tip revision: b324521
epiClassify.Rd
\name{epiClassify}
\Rdversion{1.1}
\alias{epiClassify}
\alias{epiClassify-methods}
\alias{epiClassify,RLBigData-method}
\alias{epiClassify,RecLinkData-method}

\title{
  Classify record pairs with EpiLink weights
}
\description{
  Classifies record pairs as link, non-link or possible link based on
  weights computed by \code{\link{epiWeights}} and the thresholds
  passed as arguments.
}

\usage{
epiClassify(rpairs, threshold.upper, threshold.lower = threshold.upper,
  ...)

\S4method{epiClassify}{RecLinkData}(rpairs, threshold.upper, threshold.lower = threshold.upper)

\S4method{epiClassify}{RLBigData}(rpairs, threshold.upper, threshold.lower = threshold.upper,
  e = 0.01, f = getFrequencies(rpairs), withProgressBar = (sink.number()==0))
}

\arguments{
  \item{rpairs}{
    \code{\link{RecLinkData}} object. Record pairs to be classified.}
  \item{threshold.upper}{A numeric value between 0 and 1. }
  \item{threshold.lower}{A numeric value between 0 and 1 lower than \code{threshold.upper}}
  \item{e}{Numeric vector. Estimated error rate(s).}
  \item{f}{Numeric vector. Average frequency of attribute values.}
  \item{withProgressBar}{Logical. Whether to display a progress bar.}
  \item{...}{Placeholder for optional arguments}
}

\details{
  All record pairs with weights greater or
  equal \code{threshold.upper} are classified as links. Record pairs with
  weights smaller than \code{threshold.upper} and greater or equal
  \code{threshold.lower} are classified as possible links. All remaining
  records are classified as non-links.
  
  For the \code{"RecLinkData"} method, weights must have been calculated
  for \code{rpairs} using \code{\link{epiWeights}}.

  A progress bar is displayed by the \code{"RLBigData"} method only if
  weights are calculated on the fly and, by default, unless output is diverted by
  \code{\link{sink}} (e.g. in a Sweave script).
}

\value{
  For the \code{"\link{RecLinkData}"} method, a S3 object
  of class \code{"\link{RecLinkResult}"} that represents a copy
  of \code{newdata} with element \code{rpairs$prediction}, which stores
  the classification result, as addendum.

  For the \code{"\linkS4class{RLBigData}"} method, a S4 object of class
  \code{"\linkS4class{RLResult}"}.
}

\author{Andreas Borg, Murat Sariyar}

\seealso{
  \code{\link{epiWeights}}
}

\examples{
# generate record pairs
data(RLdata500)
p=compare.dedup(RLdata500,strcmp=TRUE ,strcmpfun=levenshteinSim,
  identity=identity.RLdata500, blockfld=list("by", "bm", "bd"))

# calculate weights
p=epiWeights(p)

# classify and show results
summary(epiClassify(p,0.6))
}
\keyword{classif}
back to top