swh:1:snp:a4c99a50dc49f82b591f268001b320f8c3ca0041
Tip revision: dc000f2a5f006d137f66716b086025d618bf8306 authored by John M Chambers on 14 July 2008, 00:00:00 UTC
version 1.0-5
version 1.0-5
Tip revision: dc000f2
digest.Rd
% $Id: digest.Rd,v 1.1 2007/05/22 14:25:42 jmc Exp $
\name{digest}
\alias{digest}
\title{Create hash function digests for arbitrary R objects}
\description{
The \code{digest} function applies a cryptographical hash function to
arbitrary \R objects. By default, the objects are internally
serialized, and either one of the currently implemented MD5 and SHA-1
hash functions algorithms can be used to compute a compact digest of
the serialized object.
This version of the function accomplishes essentially the same
result as the function of the same name in the \code{digest}
package, but via somewhat different computations, as discussed in
the book \dQuote{Software for Data Analysis}.
}
\usage{
digest(object, algo=c("md5", "sha1", "crc32"),
serialize=TRUE, file=FALSE, length=Inf, use.Call = FALSE)
}
\arguments{
\item{object}{An arbitrary R object which will then be passed to the
\code{\link{serialize}} function, unless the \code{serialize}
argument is set to \code{FALSE}}
\item{algo}{The algorithms to be used; currently available choices are
\code{md5}, which is also the default, \code{sha1} and \code{crc32}}
\item{serialize}{A logical variable indicating whether the object
should be serialized using \code{serialize}. Setting this to
\code{FALSE} allows to compare the digest output of given character
strings to known control output.}
\item{file}{A logical variable indicating whether the object is a file name.}
\item{length}{Number of characters to process. By default, when
\code{length} is set to \code{Inf}, the whole string or file is
processed.}
\item{use.Call}{Should the C interface use the \code{.Call()}
interface or the \code{.C()} interface. An internal question
whose answer should not affect the result.}
}
\value{
The \code{digest} function returns a character string of a fixed
length containing the requested digest of the supplied R object. For
MD5, a string of length 32 is returned; for SHA-1, a string of length
40 is returned; for CRC32 a string of length 8.
}
\details{
See the documentation for the \code{digest} package version of the
function, and the references below for the underlying algorithms.
The version in the present package has been modified for tutorial
reasons, to illustrate some principles of the design of interfaces to
C code.
}
\references{
MD5: \url{http://www.ietf.org/rfc/rfc1321.txt}.
SHA-1: \url{http://www.itl.nist.gov/fipspubs/fip180-1.htm}.
CRC32: \url{ftp://ftp.rocksoft.com/cliens/rocksoft/papers/crc_v3.txt}.
\url{http://www.cr0.net:8040/code/crypto} for the underlying
C functions used here for sha-1 and md5, and further references.
\url{http://zlib.net} for documentation on the zlib library which
supplied the code for crc32.
}
\author{Dirk Eddelbuettel \email{edd@debian.org} for the original \R interface;
Antoine Lucas for the integration of crc32; Jarek Tuszynski for the
file-based operationss; Christophe Devine for the hash function
implementations for sha-1 and md5; Jean-loup Gailly and Mark Adler
for crc32.
John Chambers for the modified C and \R code in this package.}
\seealso{\code{\link{serialize}}, \code{\link{md5sum}}}
\examples{
## Standard RFC 1321 test vectors
md5Input <-
c("",
"a",
"abc",
"message digest",
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
paste("12345678901234567890123456789012345678901234567890123456789012",
"345678901234567890", sep=""))
md5Output <-
c("d41d8cd98f00b204e9800998ecf8427e",
"0cc175b9c0f1b6a831c399e269772661",
"900150983cd24fb0d6963f7d28e17f72",
"f96b697d7cb7938d525a2f31aaf161d0",
"c3fcd3d76192e4007dfb496cca67e13b",
"d174ab98d277d9f5a5611c2c9f419d9f",
"57edf4a22be3c955ac49da2e2107b67a")
for (i in seq(along=md5Input)) {
md5 <- digest(md5Input[i], serialize=FALSE)
stopifnot(identical(md5, md5Output[i]))
}
sha1Input <-
c("abc",
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
NULL)
sha1Output <-
c("a9993e364706816aba3e25717850c26c9cd0d89d",
"84983e441c3bd26ebaae4aa1f95129e5e54670f1",
"34aa973cd4c4daa4f61eeb2bdbad27316534016f")
for (i in seq(along=sha1Input)) {
sha1 <- digest(sha1Input[i], algo="sha1", serialize=FALSE)
stopifnot(identical(sha1, sha1Output[i]))
}
crc32Input <-
c("abc",
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
NULL)
crc32Output <-
c("352441c2",
"171a3f5f",
"2ef80172")
for (i in seq(along=crc32Input)) {
crc32 <- digest(crc32Input[i], algo="crc32", serialize=FALSE)
stopifnot(identical(crc32, crc32Output[i]))
}
# one of the FIPS-
sha1 <- digest("abc", algo="sha1", serialize=FALSE)
stopifnot(identical(sha1, "a9993e364706816aba3e25717850c26c9cd0d89d"))
# example of a digest of a standard R list structure
digest(list(LETTERS, data.frame(a=letters[1:5], b=matrix(1:10,ncol=2))))
# test 'length' parameter and file input
fname = file.path(R.home(),"COPYING")
x = readChar(fname, file.info(fname)$size) # read file
for (alg in c("sha1", "md5", "crc32")) {
# partial file
h1 = digest(x , length=18000, algo=alg, serialize=FALSE)
h2 = digest(fname, length=18000, algo=alg, serialize=FALSE, file=TRUE)
h3 = digest( substr(x,1,18000) , algo=alg, serialize=FALSE)
stopifnot( identical(h1,h2), identical(h1,h3) )
# whole file
h1 = digest(x , algo=alg, serialize=FALSE)
h2 = digest(fname, algo=alg, serialize=FALSE, file=TRUE)
stopifnot( identical(h1,h2) )
}
# compare md5 algorithm to other tools
library(tools)
fname = file.path(R.home(),"COPYING")
h1 = as.character(md5sum(fname))
h2 = digest(fname, algo="md5", file=TRUE)
stopifnot( identical(h1,h2) )
}
\keyword{misc}