https://github.com/cran/RecordLinkage
Tip revision: d650bb5b048f47ae3237cec162d379a89734ff57 authored by Andreas Borg on 02 May 2016, 13:21:08 UTC
version 0.4-9
version 0.4-9
Tip revision: d650bb5
phonetics.Rd
\encoding{latin1}
\name{phonetics}
\alias{phonetics}
\alias{pho_h}
\alias{soundex}
\title{Phonetic Code}
\description{
Interface to phonetic coding functions.
}
\usage{
pho_h(str)
soundex(str)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{str}{A character vector or matrix. Factors are converted to character.}
}
\details{
Translates its argument to a phonetic code. \code{pho_h}
by \enc{Jˆrg}{Joerg} Michael (see references) is intended for German language
and normalizes umlauts and accent characters.
\code{soundex} is a widespread algorithm for English names. This implementation
can only handle common characters. Both algorithms strip off
non-alphabetical characters, with the exception that numbers are left
unchanged by \code{pho_h}.
The C code for \code{soundex} was taken from PostgreSQL 8.3.6.
}
\value{
A character vector or matrix with the same size and dimensions as \code{str},
containing its phonetic encoding.
}
\references{\enc{Jˆrg}{Joerg} Michael, \enc{Doppelg‰nger}{Doppelgaenger} gesucht
-- Ein Programm \enc{f¸r}{fuer} kontextsensitive phonetische Textumwandlung,
in: c't 1999, No. 25, pp. 252--261.
The Source code is published (under GPL) at
\url{http://www.heise.de/ct/ftp/99/25/252/}.
}
\author{Andreas Borg (R interface only)}
\seealso{\code{\link{jarowinkler}} and \code{\link{levenshteinSim}}
for string comparison.}
\keyword{misc}