\name{epiClassify}
\Rdversion{1.1}
\alias{epiClassify}
\alias{epiClassify-methods}
\alias{epiClassify,RLBigData-method}
\alias{epiClassify,RecLinkData-method}
\title{
Classify record pairs with EpiLink weights
}
\description{
Classifies record pairs as link, non-link or possible link based on
weights computed by \code{\link{epiWeights}} and the thresholds
passed as arguments.
}
\usage{
epiClassify(rpairs, threshold.upper, threshold.lower = threshold.upper,
...)
\S4method{epiClassify}{RecLinkData}(rpairs, threshold.upper, threshold.lower = threshold.upper)
\S4method{epiClassify}{RLBigData}(rpairs, threshold.upper, threshold.lower = threshold.upper,
e = 0.01, f = getFrequencies(rpairs))
}
\arguments{
\item{rpairs}{
\code{\link{RecLinkData}} object. Record pairs to be classified.}
\item{threshold.upper}{A numeric value between 0 and 1. }
\item{threshold.lower}{A numeric value between 0 and 1 lower than \code{threshold.upper}}
\item{e}{Numeric vector. Estimated error rate(s).}
\item{f}{Numeric vector. Average frequency of attribute values.}
\item{...}{Placeholder for optional arguments}
}
\details{
All record pairs with weights greater or
equal \code{threshold.upper} are classified as links. Record pairs with
weights smaller than \code{threshold.upper} and greater or equal
\code{threshold.lower} are classified as possible links. All remaining
records are classified as non-links.
For the \code{"RecLinkData"} method, weights must have been calculated
for \code{rpairs} using \code{\link{epiWeights}}.
The \code{"RLBigData"}
method checks if weights are present in the underlying database.
If this is the case, classification
is based on the existing weights. If not, weights are calculated on the fly
during classification, but not stored. The latter behaviour might be preferable
when a very large dataset is to be classified or disk space is limited
(see also the notes to \code{\link{epiWeights}}).
}
\value{
For the \code{"\link{RecLinkData}"} method, a S3 object
of class \code{"\link{RecLinkResult}"} that represents a copy
of \code{newdata} with element \code{rpairs$prediction}, which stores
the classification result, as addendum.
For the \code{"\linkS4class{RLBigData}"} method, a S4 object of class
\code{"\linkS4class{RLResult}"}.
}
\author{Andreas Borg, Murat Sariyar}
\seealso{
\code{\link{epiWeights}}
}
\examples{
# generate record pairs
data(RLdata500)
p=compare.dedup(RLdata500,strcmp=TRUE ,strcmpfun=levenshteinSim,
identity=identity.RLdata500)
# calculate weights
p=epiWeights(p)
# classify and show results
summary(epiClassify(p,0.6))
}
\keyword{classif}