https://github.com/cran/sets
Tip revision: 3975849d86d215a32efb59860db5dfdeb54304e6 authored by David Meyer on 11 June 2008, 00:00:00 UTC
version 0.2
version 0.2
Tip revision: 3975849
closure.c
#include <R.h>
#include <Rdefines.h>
//#define __DEBUG
// Bit operations on compound variables.
static void _ior(int *x, int *y, int *r, int l)
{
while (l-- > 0)
r[l] = x[l] | y[l];
}
static void _xor(int *x, int *y, int *r, int l)
{
while (l-- > 0)
r[l] = x[l] ^ y[l];
}
static void _and(int *x, int *y, int *r, int l)
{
while (l-- > 0)
r[l] = x[l] & y[l];
}
typedef void (*OPFUN)(int *, int *, int *, int);
static OPFUN ops[] = { _ior, _and, _xor };
// Compare compound variables.
static int _ieq(int *x, int *y, int l) {
while (l-- > 0)
if (x[l] != y[l])
return 0;
return 1;
}
// Hash function for compound variables.
static int _hash(int *x, int l, int k) {
unsigned int j = l * 100;
k = 32 - k;
while (l-- > 0) {
j ^= 3141592653U * (unsigned int) x[l] >> k;
j *= 97;
}
return 3141592653U * j >> k;
}
// Add index to hash.
static int _hadd(SEXP x, int i, SEXP h, int k) {
int j;
SEXP s;
s = VECTOR_ELT(x, i);
k = _hash(INTEGER(s), LENGTH(s), k);
while ((j = INTEGER(h)[k]) > -1) {
if (_ieq(INTEGER(VECTOR_ELT(x, j)), INTEGER(s), LENGTH(s)))
return j;
k = (k + 1) % LENGTH(h);
}
INTEGER(h)[k] = i;
return -1;
}
// Compute the closure of a set of binary vectors
// under the basic binary operations (see above).
//
// NOTE xor should not be used for obvious reasons.
//
// Inputs are a logical matrix with the elements in
// the rows and the atoms in the columns, and second
// an integer vector specifying the operator to use.
//
// The worst time complexity is O(min(2^nc, nr^2)).
// Note that normally the runtime is order dependent,
// e.g. it takes less iterations if the atoms are
// in increasing order with respect to size (number
// of bits).
//
// Returns a logical matrix. The rows are in depth-
// first-search order.
//
// Version: 0.2
//
// (C) ceeboo 2008
SEXP R_closure(SEXP x, SEXP R_op) {
if (!x || !isMatrix(x) || TYPEOF(x) != LGLSXP)
error("'x' not a logical matrix");
if (!R_op || TYPEOF(R_op) != INTSXP)
error("'op' not an integer vector");
int i, j, k, l, n, nr, nc, nb, hk;
int *p;
OPFUN op;
SEXP r, q, q0, s, t, t0, ht;
nr = INTEGER(GET_DIM(x))[0];
nc = INTEGER(GET_DIM(x))[1];
if (!nc && nr)
error("'x' invalid dimensions");
if (nr < 2)
return x;
// FIXME does using all the bits also work
// on other platforms?
nb = (int) ceil((double) nc / (sizeof(int) * CHAR_BIT));
if (INTEGER(R_op)[0] < 1 ||
INTEGER(R_op)[0] > sizeof(ops) / sizeof(OPFUN))
error("'op' invalid value");
op = ops[INTEGER(R_op)[0]-1];
t = PROTECT(allocVector(VECSXP, nr));
// Encode data. Note that the bits are
// spread evenly across the variables.
for (i = 0; i < nr; i++) {
SET_VECTOR_ELT(t, i, (s = allocVector(INTSXP, nb)));
memset(INTEGER(s), 0, sizeof(int) * nb);
for (k = 0; k < nc; k++) {
j = k % nb;
INTEGER(s)[j] <<= 1;
INTEGER(s)[j] |= LOGICAL(x)[i+k*nr];
}
}
// Initialize hash table.
if (nr > 536870912)
error("size %d too large for hashing", nr);
k = 2 * nr;
n = 2;
hk = 1;
while (k > n) {
n *= 2;
hk += 1;
}
ht = PROTECT(allocVector(INTSXP, n));
for (k = 0; k < n; k++)
INTEGER(ht)[k] = -1;
// Remove duplicates.
n = 0;
for (k = 0; k < nr; k++) {
if (_hadd(t, k, ht, hk) > -1)
continue;
if (n < k)
SET_VECTOR_ELT(t, n, VECTOR_ELT(t, k));
n++;
}
nr = n;
// Reset hash table.
for (k = 0; k < LENGTH(ht); k++)
INTEGER(ht)[k] = -1;
// Result vector.
n = 0;
r = PROTECT(allocVector(VECSXP, nr));
if (op == _xor) {
SET_VECTOR_ELT(r, n, (s = allocVector(INTSXP, nb)));
memset(INTEGER(s), 0, sizeof(int) * nb);
_hadd(r, n++, ht, hk);
}
// Initialize stack.
q = PROTECT(allocVector(VECSXP, nr + 1));
for (k = 2; k < nr + 1; k++)
SET_VECTOR_ELT(q, k, allocVector(INTSXP, nb));
j = 1;
p = INTEGER(PROTECT(allocVector(INTSXP, nr + 1)));
p[1] = 0;
#ifdef __DEBUG
i = 0;
Rprintf("# %7s %7s %7s %7s\n", "iter", "size", "RLEN", "HLEN");
#endif
while (j > 0) {
t0 = VECTOR_ELT(t, p[j]);
if (j > 1)
op(INTEGER(t0), INTEGER(VECTOR_ELT(q, j-1)),
INTEGER((q0 = VECTOR_ELT(q, j))), nb);
else
SET_VECTOR_ELT(q, j, (q0 = t0));
// Resize hash table.
if ((k = 2 * n) == LENGTH(ht)) {
if (n > 536870912)
error("size %d too large for hashing");
UNPROTECT_PTR(ht);
ht = PROTECT(allocVector(INTSXP, k * 2));
for (k = 0; k < LENGTH(ht); k++)
INTEGER(ht)[k] = -1;
hk++;
for (k = 0; k < n; k++)
_hadd(r, k, ht, hk);
}
// Resize result vector.
if (n == LENGTH(r)) {
s = r;
r = PROTECT(allocVector(VECSXP, 2 * n));
for (k = 0; k < n; k++)
SET_VECTOR_ELT(r, k, VECTOR_ELT(s, k));
UNPROTECT_PTR(s);
}
SET_VECTOR_ELT(r, n, q0);
l = n;
if (_hadd(r, n, ht, hk) == -1)
SET_VECTOR_ELT(r, n++, duplicate(q0));
if (p[j] < nr - 1) {
if (n > l) { // expand sequence
j++;
p[j] = p[j-1] + 1;
} else // prune
p[j]++;
} else // backtrack
p[--j]++;
#ifdef __DEBUG
i++;
if (j < 2)
Rprintf("# %7i %7i %7i %7i\n", i, n, LENGTH(r), LENGTH(ht));
#endif
R_CheckUserInterrupt();
}
UNPROTECT(5);
// Decode result.
PROTECT(r);
s = allocMatrix(LGLSXP, n, nc);
for (i = 0; i < n; i++) {
q = VECTOR_ELT(r, i);
for (k = nc - 1; k > -1; k--) {
j = k % nb;
LOGICAL(s)[i+k*n] = INTEGER(q)[j] & 1;
INTEGER(q)[j] >>= 1;
}
}
UNPROTECT(1);
// Set colnames.
if (!isNull((q = getAttrib(x, R_DimNamesSymbol)))) {
PROTECT(s);
setAttrib(s, R_DimNamesSymbol, (r = allocVector(VECSXP, 2)));
SET_VECTOR_ELT(r, 0, R_NilValue);
SET_VECTOR_ELT(r, 1, VECTOR_ELT(q, 1));
UNPROTECT(1);
}
return s;
}
// From the R-2.7.1 source code.
#define NUMERIC int
void R_qsort_int_V(int *v, SEXP I, int i, int j)
#include "qsort-Vbody.h"
#undef NUMERIC
// Complement a compound variable.
static void _not(int *x, int *r, int l)
{
while (l-- > 0)
r[l] = ~x[l];
}
// Test if the first compound variable
// is a subset of the second.
static int _iss(int *x, int *y, int l)
{
while (l-- > 0)
if ((x[l] & y[l]) != x[l])
return 0;
return 1;
}
// Compute the reduction (base) of a logical matrix
// under union or intersection, i.e. a unique minimal
// subset of the rows that spans the same space.
//
// cf. Doignon and Falmagne (1999). Knowledge Spaces.
// Springer, pp. 29 -- 31.
//
// Inputs are as above.
//
// The worst time complexity is O(n^2). Note that
// r(x, &) == !r(!x, |), and vice versa.
//
// Returns a logical matrix.
//
// Version 0.1
//
// (C) ceeboo 2008
SEXP R_reduction(SEXP x, SEXP R_op) {
if (!x || !isMatrix(x) || TYPEOF(x) != LGLSXP)
error("'x' not a logical matrix");
if (!R_op || TYPEOF(R_op) != INTSXP)
error("'op' not an integer vector");
int i, j, k, l, n, nr, nc, nb;
SEXP r, s, q, q0, t;
nr = INTEGER(GET_DIM(x))[0];
nc = INTEGER(GET_DIM(x))[1];
if (!nc && nr)
error("'x' invalid dimensions");
if (nr < 2)
return x;
// FIXME does using all the bits also work
// on other platforms?
nb = (int) ceil((double) nc / (sizeof(int) * CHAR_BIT));
if (INTEGER(R_op)[0] != 1 &&
INTEGER(R_op)[0] != 2)
error("'op' invalid value");
t = PROTECT(allocVector(VECSXP, nr));
q = PROTECT(allocVector(INTSXP, nr));
// Encode data.
for (i = 0; i < nr; i++) {
SET_VECTOR_ELT(t, i, (s = allocVector(INTSXP, nb)));
memset(INTEGER(s), 0, sizeof(int) * nb);
n = 0;
for (k = 0; k < nc; k++) {
j = k % nb;
l = LOGICAL(x)[i+k*nr];
INTEGER(s)[j] <<= 1;
INTEGER(s)[j] |= l;
n += l;
}
// Transform to dual.
if (INTEGER(R_op)[0] == 2) {
_not(INTEGER(s), INTEGER(s), nb);
INTEGER(q)[i] = nc - n;
} else
INTEGER(q)[i] = n;
}
// Order breadth-first.
R_qsort_int_V(INTEGER(q), t, 1, nr);
UNPROTECT_PTR(q);
// Remove duplicates.
q = duplicated(t, FALSE);
n = 0;
for (i = 0; i < nr; i++) {
if (LOGICAL(q)[i] == TRUE)
continue;
if (n < i)
SET_VECTOR_ELT(t, n, VECTOR_ELT(t, i));
n++;
}
nr = n;
// Initialize.
q = PROTECT(allocVector(INTSXP, nb));
r = PROTECT(allocVector(VECSXP, nr));
SET_VECTOR_ELT(r, 0, VECTOR_ELT(t, 0));
n = 1;
#ifdef __DEBUG
k = 0;
Rprintf("# %7s %7s\n", "iter", "size");
#endif
for (i = 1; i < nr; i++) {
memset(INTEGER(q), 0, sizeof(int) * nb);
s = VECTOR_ELT(t, i);
for (j = i - 1; j > -1; j--) {
q0 = VECTOR_ELT(t, j);
if (!_iss(INTEGER(q0), INTEGER(s), nb))
continue;
_ior(INTEGER(q0), INTEGER(q), INTEGER(q), nb);
if (_ieq(INTEGER(s), INTEGER(q), nb))
goto next;
}
SET_VECTOR_ELT(r, n++, s);
next:
#ifdef __DEBUG
k += i - 1 - j;
Rprintf("# %7i %7i\n", k, n);
#endif
R_CheckUserInterrupt();
}
UNPROTECT_PTR(q);
UNPROTECT_PTR(t);
// Decode result.
s = allocMatrix(LGLSXP, n, nc);
for (i = 0; i < n; i++) {
q = VECTOR_ELT(r, i);
// Transform back.
if (INTEGER(R_op)[0] == 2)
_not(INTEGER(q), INTEGER(q), nb);
for (k = nc - 1; k > -1; k--) {
j = k % nb;
LOGICAL(s)[i+k*n] = INTEGER(q)[j] & 1;
INTEGER(q)[j] >>= 1;
}
}
UNPROTECT(1);
// Set colnames.
if (!isNull((q = getAttrib(x, R_DimNamesSymbol)))) {
PROTECT(s);
setAttrib(s, R_DimNamesSymbol, (r = allocVector(VECSXP, 2)));
// FIXME do we need rownames?
SET_VECTOR_ELT(r, 0, R_NilValue);
SET_VECTOR_ELT(r, 1, VECTOR_ELT(q, 1));
UNPROTECT(1);
}
return s;
}
//