/*-*- mode: C; kept-old-versions: 12; kept-new-versions: 20; -*- * * srqfn.f -- translated by f2c (version 20031025) and by * $Id: f2c-clean,v 1.10 2002/03/28 16:37:27 maechler Exp $ * plus extended manual code cleaning by Martin Maechler, ETH Zurich */ #include /* Table of constant values */ static int c__1 = 1; static double c_9995 = .9995; /* BLAS : */ #include /* double ddot_(int *, double *, int *, double *, int *); */ /* int daxpy_(int *, double*, double *, int *, double *, int *); */ #define DDOT(_n_, _X_, _Y_) F77_CALL(ddot)(_n_, _X_, &c__1, _Y_, &c__1) /* SparseM -- ./sparskit2.f : */ #include "sparseM.h" /* Cholesky related : */ #include "cholesky.h" /* advance declaration : */ static void slpfn(int *n, int *m, int *nnza, double *a, int *ja, int *ia, double *ao, int *jao, int *iao, int *nnzdmax, double *d, int *jd, int *id, double *dsub, int *jdsub, int *nsubmax, int *lindx, int *xlindx, int *nnzlmax, double *lnz, int *xlnz, int *invp, int *perm, int *iwmax, int *iwork, int *colcnt, int *snode, int *xsuper, int *split, int *tmpmax, double *tmpvec, double *newrhs, int *cachsz, int *level, double *x, double *s, double *u, double *c, double *y, double *b, double *r, double *z, double *w, double *q, int *nnzemax, double *e, int *je, int *ie, double *dy, double *dx, double *ds, double *dz, double *dw, double *dxdz, double *dsdw, double *xi, double *xinv, double *sinv, double *ww1, double *ww2, double *small, int *ierr, int *maxit, double *timewd); static void bound(double *x, double *dx, double *s, double *ds, double *z, double *dz, double *w, double *dw, int *n, double *beta, double *deltap, double *deltad); /* called from R's rq.fit.sfn() in ../R/sfn.R */ int srqfn_(int *n, int *m, int *nnza, double *a, int *ja, int *ia, double *ao, int *jao, int *iao, int *nnzdmax, double *d, int *jd, int *id, double *dsub, int *jdsub, int *nnzemax, double *e, int *je, int *ie, int *nsubmax, int *lindx, int *xlindx, int *nnzlmax, double *lnz, int *xlnz, int *iw, int *iwmax, int *iwork, int *xsuper, int *tmpmax, double *tmpvec, double *wwm, double *wwn, int *cachsz, int *level, double *x, double *s, double *u, double *c, double *y, double *b, double *small, int *ierr, int *maxit, double *timewd) { /* System generated locals */ int iw_dim1, wwm_dim1, wwn_dim1; /* Parameter adjustments */ wwn_dim1 = *n; wwn -= wwn_dim1; wwm_dim1 = *m; wwm -= wwm_dim1; iw_dim1 = *m; iw -= iw_dim1; /* Function Body */ slpfn(n, m, nnza, a, ja, ia, ao, jao, iao, nnzdmax, d, jd, id, dsub, jdsub, nsubmax, lindx, xlindx, nnzlmax, lnz, xlnz, &iw[iw_dim1], &iw[(iw_dim1 << 1)], iwmax, iwork, &iw[iw_dim1 * 3], &iw[(iw_dim1 << 2)], xsuper, &iw[iw_dim1 * 5], tmpmax, tmpvec, &wwm[(wwm_dim1 << 1)], cachsz, level, x, s, u, c, y, b, &wwn[wwn_dim1 ], &wwn[(wwn_dim1 << 1)], &wwn[wwn_dim1 * 3], &wwn[(wwn_dim1 << 2)], nnzemax, e, je, ie, &wwm[wwm_dim1 * 3], &wwn[wwn_dim1 * 5], &wwn[wwn_dim1 * 6], &wwn[wwn_dim1 * 7], &wwn[(wwn_dim1 << 3)], &wwn[wwn_dim1 * 9], &wwn[wwn_dim1 * 10], &wwn[wwn_dim1 * 11], &wwn[wwn_dim1 * 12], &wwn[wwn_dim1 * 13], &wwn[wwn_dim1 * 14], &wwm[wwm_dim1], small, ierr, maxit, timewd); return 0; } /* srqfn_ */ static void slpfn(int *n, int *m, int *nnza, double *a, int *ja, int *ia, double *ao, int *jao, int *iao, int *nnzdmax, double *d, int *jd, int *id, double *dsub, int *jdsub, int *nsubmax, int *lindx, int *xlindx, int *nnzlmax, double *lnz, int *xlnz, int *invp, int *perm, int *iwmax, int *iwork, int *colcnt, int *snode, int *xsuper, int *split, int *tmpmax, double *tmpvec, double *newrhs, int *cachsz, int *level, double *x, double *s, double *u, double *c, double *y, double *b, double *r, double *z, double *w, double *q, int *nnzemax, double *e, int *je, int *ie, double *dy, double *dx, double *ds, double *dz, double *dw, double *dxdz, double *dsdw, double *xi, double *xinv, double *sinv, double *ww1, double *ww2, double *small, int *ierr, int *maxit, double *timewd) { /* Sparse implentation of LMS's interior point method via Ng-Peyton's sparse Cholesky factorization for sparse symmetric positive definite INPUT: n -- the number of rows in the coefficient matrix A' m -- the number of columns in the coefficient matrix A' nnza -- the number of non-zero elements in A' a -- an nnza-vector of non-zero values of the design matrix (A') stored in csr format ja -- an nnza-vector of indices of the non-zero elements of the coefficient matrix ia -- an (n+1)-vector of pointers to the begining of each row in a and ja ao -- an nnza-vector of work space for the transpose of the design matrix stored in csr format or the design matrix stored in csc format jao -- an nnza-vector of work space for the indices of the transpose of the design matrix iao -- an (n+1)-vector of pointers to the begining of each column in ao and jao nnzdmax -- upper bound of the non-zero elements in AA' d -- an nnzdmax-vector of non-zero values of AQ^(-1) jd -- an nnzdmax-vector of indices in d id -- an (m+1)-vector of pointers to the begining of each row in d and jd dsub -- the values of e excluding the diagonal elements jdsub -- the indices to dsub nsubmax -- upper bound of the dimension of lindx lindx -- an nsub-vector of integer which contains, in column major oder, the row subscripts of the nonzero entries in L in a compressed storage format xlindx -- an (m+1)-vector of int of pointers for lindx nnzlmax -- the upper bound of the non-zero entries in L stored in lnz, including the diagonal entries lnz -- First contains the non-zero entries of d; later contains the entries of the Cholesky factor xlnz -- column pointer for L stored in lnz invp -- an n-vector of int of inverse permutation vector perm -- an n-vector of int of permutation vector iw -- int work array of length m iwmax -- upper bound of the general purpose int working storage iwork; set at 7*m+3 iwork -- an iwsiz-vector of int as work space colcnt -- array of length m, containing the number of non-zeros in each column of the factor, including the diagonal entries snode -- array of length m for recording supernode membership xsuper -- array of length m+1 containing the supernode partitioning split -- an m-vector with splitting of supernodes so that they fit into cache tmpmax -- upper bound of the dimension of tmpvec tmpvec -- a tmpmax-vector of temporary vector newrhs -- extra work vector for right-hand side and solution cachsz -- size of the cache (in kilobytes) on the target machine level -- level of loop unrolling while performing numerical factorization x -- an n-vector, the initial feasible solution in the primal that corresponds to the design matrix A' s -- an n-vector u -- an n-vector of upper bound for x c -- an n-vector, usually the "negative" of the pseudo response y -- an m-vector, the initial dual solution b -- an n-vector, usualy the rhs of the equality constraint X'a = (1-tau)X'e in the rq setting r -- an n-vector of residuals z -- an n-vector of the dual slack variable w -- an n-vector q -- an n-vector of work array containing the diagonal elements of the Q^(-1) matrix nnzemax -- upper bound of the non-zero elements in AA' e -- an nnzdmax-vector containing the non-zero entries of AQ^(-1)A' stored in csr format je -- an nnzemax-vector of indices for e ie -- an (m+1)-vector of pointers to the begining of each row in e and je dy -- work array dx -- work array ds -- work array dz -- work array dw -- work array dxdz -- work array dsdw -- work arry xi -- work array xinv -- work array sinv -- work array ww1 -- work array ww2 -- work array small -- convergence tolerance for interior algorithm ierr -- error flag : 1 -- insufficient storage (work space) when calling extract; 2 -- nnzd > nnzdmax 3 -- insufficient storage in iwork when calling ordmmd; 4 -- insufficient storage in iwork when calling sfinit; 5 -- nnzl > nnzlmax when calling sfinit 6 -- nsub > nsubmax when calling sfinit 7 -- insufficient work space in iwork when calling symfct 8 -- inconsistancy in input when calling symfct 9 -- tmpsiz > tmpmax when calling bfinit; increase tmpmax 10 -- nonpositive diagonal encountered when calling blkfct(), the matrix is not positive definite 11 -- insufficient work storage in tmpvec when calling blkfct() 12 -- insufficient work storage in iwork when calling blkfct() 17 -- tiny diagonals replaced with Inf when calling blkfct() {new error code, was confounded with '10'} maxit -- maximal number of iterations; on return: the number of iterations timewd -- vector of length 7: [7]: amount of time for this subroutine [1:6] time info for the phases of cholfct() only. OUTPUT: y -- an m-vector of primal solution */ /* Local variables */ int i, it, nnzd, nnzdsub, nsuper; double deltad, deltap, mu, gap; double timbeg; /* Parameter adjustments */ --sinv; --xinv; --xi; --dsdw; --dxdz; --dw; --dz; --ds; --dx; --q; --w; --z; --r; --c; --u; --s; --x; --ww1; --ww2; --dy; --b; --y; --newrhs; --perm; --invp; --dsub; --timewd; /* Function Body */ for (i = 1; i <= 7; ++i) timewd[i] = 0.; nnzd = ie[*m] - 1; nnzdsub = nnzd - *m; /* Compute the initial gap */ gap = DDOT(n, &z[1], &x[1]) + DDOT(n, &w[1], &s[1]); /* Start iteration */ it = 0; while(gap >= *small && it <= *maxit) { ++it; /* Create the diagonal matrix Q^(-1) stored in q as an n-vector and update the residuals in r */ for (i = 1; i <= *n; ++i) { q[i] = 1. / (z[i] / x[i] + w[i] / s[i]); r[i] = z[i] - w[i]; } /* Obtain AQ^(-1) and store in d,jd,id in csr format */ F77_CALL(amudia)(m, &c__1, ao, jao, iao, &q[1], d, jd, id); /* Obtain AQ^(-1)A' and store in e,je,ie in csr format */ F77_CALL(amub)(m, m, &c__1, d, jd, id, a, ja, ia, e, je, ie, nnzemax, iwork, ierr); if (*ierr != 0) { *ierr = 2; goto L100; } /* Extract the non-diagonal structure of e,je,ie and store in dsub,jdsub */ i = *nnzemax + 1; F77_CALL(extract)(e, je, ie, &dsub[1], jdsub, m, nnzemax, &i, ierr); if (*ierr != 0) { *ierr = 1; goto L100; } /* Compute b - Ax + AQ^(-1)r and store it in c in two steps First: store Ax in ww2 */ F77_CALL(amux)(m, &x[1], &ww2[1], ao, jao, iao); /* Second: save AQ^(-1)r in c temporarily */ F77_CALL(amux)(m, &r[1], &c[1], d, jd, id); for (i = 1; i <= *m; ++i) { c[i] = b[i] - ww2[i] + c[i]; } /* Compute dy = (AQ^(-1)A')^(-1)(b-Ax+AQ^(-1)r); result returned via dy */ /* chlfct: perform Cholesky's decomposition of e,je,ie */ F77_CALL(chlfct)(m, xlindx, lindx, &invp[1], &perm[1], iwork, &nnzdsub, jdsub, colcnt, &nsuper, snode, xsuper, nnzlmax, nsubmax, xlnz, lnz, ie, je, e, cachsz, tmpmax, level, tmpvec, split, ierr, &it, &timewd[1]); if (*ierr != 0) { /* return the chlfct() error code in {3:12}: */ goto L100; } for (i = 1; i <= *m; ++i) newrhs[i] = c[perm[i]]; /* blkslv: Numerical solution for the new rhs stored in b */ timbeg = F77_CALL(gtimer)(); F77_CALL(blkslv)(&nsuper, xsuper, xlindx, lindx, xlnz, lnz, &newrhs[1]); timewd[7] += F77_CALL(gtimer)() - timbeg; for (i = 1; i <= *m; ++i) dy[i] = newrhs[invp[i]]; /* Compute dx = Q^(-1)(A'dy - r), ds = -dx, dz and dw */ F77_CALL(amux)(n, &dy[1], &dx[1], a, ja, ia); for (i = 1; i <= *n; ++i) { dx[i] = q[i] * (dx[i] - r[i]); ds[i] = -dx[i]; dz[i] = -z[i] * (dx[i] / x[i] + 1.); dw[i] = -w[i] * (ds[i] / s[i] + 1.); } /* Compute the maximum allowable step lengths */ bound(&x[1], &dx[1], &s[1], &ds[1], &z[1], &dz[1], &w[1], &dw[1], n, &c_9995, &deltap, &deltad); if (deltap * deltad < 1.) { /* Update mu */ double g = DDOT(n, &z[1], &x[1]) + deltap * DDOT(n, &z[1], &dx[1]) + deltad * DDOT(n, &dz[1], &x[1]) + deltad * deltap * DDOT(n, &dz[1], &dx[1]) + DDOT(n, &w[1], &s[1]) + deltap * DDOT(n, &w[1], &ds[1]) + deltad * DDOT(n, &dw[1], &s[1]) + deltad * deltap * DDOT(n, &dw[1], &ds[1]); mu = DDOT(n, &z[1], &x[1]) + DDOT(n, &w[1], &s[1]); g /= mu; mu *= g * g * g / (2. * (*n)); /* Compute dxdz and dsdw */ for (i = 1; i <= *n; ++i) { dxdz[i] = dx[i] * dz[i]; dsdw[i] = ds[i] * dw[i]; xinv[i] = 1. / x[i]; sinv[i] = 1. / s[i]; xi[i] = xinv[i] * dxdz[i] - sinv[i] * dsdw[i] - mu * (xinv[i] - sinv[i]); ww1[i] = q[i] * xi[i]; } /* Compute AQ^(-1)(dxdz - dsdw - mu(X^(-1) - S^(-1))) and store it in ww2 temporarily */ F77_CALL(amux)(m, &ww1[1], &ww2[1], ao, jao, iao); for (i = 1; i <= *m; ++i) { c[i] += ww2[i]; } /* Compute dy and return the result in dy */ for (i = 1; i <= *m; ++i) { newrhs[i] = c[perm[i]]; } /* Call blkslv: Numerical solution for the new rhs stored in b */ timbeg = F77_CALL(gtimer)(); F77_CALL(blkslv)(&nsuper, xsuper, xlindx, lindx, xlnz, lnz, &newrhs[1]); timewd[7] += F77_CALL(gtimer)() - timbeg; for (i = 1; i <= *m; ++i) { dy[i] = newrhs[invp[i]]; } /* Compute dx = Q^(-1)(A'dy - r + mu(X^(-1) - S^(-1)) -dxdz + dsdw), ds = -dx, dz and dw */ F77_CALL(amux)(n, &dy[1], &dx[1], a, ja, ia); for (i = 1; i <= *n; ++i) { dx[i] = q[i] * (dx[i] - xi[i] - r[i]); ds[i] = -dx[i]; dz[i] = -z[i] + xinv[i] * (mu - z[i] * dx[i] - dxdz[i]); dw[i] = -w[i] + sinv[i] * (mu - w[i] * ds[i] - dsdw[i]); } /* Compute the maximum allowable step lengths */ bound(&x[1], &dx[1], &s[1], &ds[1], &z[1], &dz[1], &w[1], &dw[1], n, &c_9995, &deltap, &deltad); } /* Take the step */ F77_CALL(daxpy)(n, &deltap, &dx[1], &c__1, &x[1], &c__1); F77_CALL(daxpy)(n, &deltap, &ds[1], &c__1, &s[1], &c__1); F77_CALL(daxpy)(n, &deltad, &dw[1], &c__1, &w[1], &c__1); F77_CALL(daxpy)(n, &deltad, &dz[1], &c__1, &z[1], &c__1); F77_CALL(daxpy)(m, &deltad, &dy[1], &c__1, &y[1], &c__1); gap = DDOT(n, &z[1], &x[1]) + DDOT(n, &w[1], &s[1]); } /* end {while} iterations */ L100: *maxit = it; return; } /* slpfn */ static void bound(double *x, double *dx, double *s, double *ds, double *z, double *dz, double *w, double *dw, int *n, double *beta, double *deltap, double *deltad) { int i; *deltap = R_PosInf; *deltad = R_PosInf; for (i = 0; i < *n; ++i) { if (dx[i] < 0.) *deltap = fmin2(*deltap, -x[i] / dx[i]); if (ds[i] < 0.) *deltap = fmin2(*deltap, -s[i] / ds[i]); if (dz[i] < 0.) *deltad = fmin2(*deltad, -z[i] / dz[i]); if (dw[i] < 0.) *deltad = fmin2(*deltad, -w[i] / dw[i]); } *deltap = fmin2(1., *beta * *deltap); *deltad = fmin2(1., *beta * *deltad); return; } /* bound */