/*-*- mode: C; kept-old-versions: 12;  kept-new-versions: 20; -*-
 *
 * srqfn.f -- translated by f2c (version 20031025) and by
 * $Id: f2c-clean,v 1.10 2002/03/28 16:37:27 maechler Exp $
 * plus extended manual code cleaning by Martin Maechler, ETH Zurich
 */

#include <Rmath.h>

/* Table of constant values */

static int c__1 = 1;
static double c_9995 = .9995;


/* BLAS : */
#include <R_ext/BLAS.h>
/* double ddot_(int *, double *, int *, double *, int *); */
/* int   daxpy_(int *, double*, double *, int *, double *, int *); */

#define DDOT(_n_, _X_, _Y_)  F77_CALL(ddot)(_n_, _X_, &c__1, _Y_, &c__1)

/* SparseM -- ./sparskit2.f : */
#include "sparseM.h"

/* Cholesky related : */
#include "cholesky.h"

/* advance declaration : */
static void
slpfn(int *n, int *m, int *nnza, double *a, int *ja, int *ia,
      double *ao, int *jao, int *iao, int *nnzdmax, double *d, int *jd, int *id,
      double *dsub, int *jdsub, int *nsubmax, int *lindx, int *xlindx,
      int *nnzlmax, double *lnz, int *xlnz,
      int *invp, int *perm, int *iwmax, int *iwork,
      int *colcnt, int *snode, int *xsuper, int *split,
      int *tmpmax, double *tmpvec, double *newrhs, int *cachsz,
      int *level, double *x, double *s, double *u,
      double *c, double *y, double *b, double *r,
      double *z, double *w, double *q, int *nnzemax,
      double *e, int *je, int *ie,
      double *dy, double *dx, double *ds, double *dz, double *dw,
      double *dxdz, double *dsdw, double *xi, double *xinv, double *sinv,
      double *ww1, double *ww2, double *small, int *ierr,
      int *maxit, double *timewd);

static void
bound(double *x, double *dx, double *s,
      double *ds, double *z, double *dz, double *w,
      double *dw, int *n, double *beta, double *deltap,
      double *deltad);


/*  called from R's rq.fit.sfn()  in ../R/sfn.R */
int
srqfn_(int *n, int *m, int *nnza,
       double *a, int *ja, int *ia,
       double *ao, int *jao, int *iao,
       int *nnzdmax, double *d, int *jd, int *id,
	double *dsub, int *jdsub,
       int *nnzemax, double *e, int *je, int *ie,
       int *nsubmax, int *lindx, int *xlindx,
       int *nnzlmax, double *lnz, int *xlnz, int *iw,
       int *iwmax, int *iwork, int *xsuper, int *tmpmax,
       double *tmpvec, double *wwm, double *wwn, int *cachsz,
       int *level, double *x, double *s, double *u,
       double *c, double *y, double *b, double *small,
       int *ierr, int *maxit, double *timewd)
{
    /* System generated locals */
    int iw_dim1, wwm_dim1, wwn_dim1;

    /* Parameter adjustments */
    wwn_dim1 = *n;    wwn -= wwn_dim1;
    wwm_dim1 = *m;    wwm -= wwm_dim1;
    iw_dim1 = *m;     iw -= iw_dim1;


    /* Function Body */
    slpfn(n, m, nnza, a, ja, ia, ao, jao, iao,
	  nnzdmax, d, jd, id, dsub, jdsub, nsubmax,
	  lindx, xlindx, nnzlmax, lnz, xlnz,
	  &iw[iw_dim1], &iw[(iw_dim1 << 1)],
	  iwmax, iwork,
	  &iw[iw_dim1 * 3], &iw[(iw_dim1 << 2)], xsuper, &iw[iw_dim1 * 5],
	  tmpmax, tmpvec, &wwm[(wwm_dim1 << 1)], cachsz, level,
	  x, s, u, c, y, b,
	  &wwn[wwn_dim1    ], &wwn[(wwn_dim1 << 1)],
	  &wwn[wwn_dim1 * 3], &wwn[(wwn_dim1 << 2)],
	  nnzemax, e, je, ie,
	  &wwm[wwm_dim1 * 3], &wwn[wwn_dim1 * 5],
	  &wwn[wwn_dim1 * 6], &wwn[wwn_dim1 * 7],
	  &wwn[(wwn_dim1 << 3)], &wwn[wwn_dim1 * 9],
	  &wwn[wwn_dim1 * 10], &wwn[wwn_dim1 * 11],
	  &wwn[wwn_dim1 * 12], &wwn[wwn_dim1 * 13],
	  &wwn[wwn_dim1 * 14],
	  &wwm[wwm_dim1], small, ierr, maxit, timewd);
    return 0;
} /* srqfn_ */

static void
slpfn(int *n, int *m, int *nnza, double *a, int *ja, int *ia,
      double *ao, int *jao, int *iao, int *nnzdmax, double *d, int *jd, int *id,
      double *dsub, int *jdsub, int *nsubmax, int *lindx, int *xlindx,
      int *nnzlmax, double *lnz, int *xlnz,
      int *invp, int *perm, int *iwmax, int *iwork,
      int *colcnt, int *snode, int *xsuper, int *split,
      int *tmpmax, double *tmpvec, double *newrhs, int *cachsz,
      int *level, double *x, double *s, double *u,
      double *c, double *y, double *b, double *r,
      double *z, double *w, double *q, int *nnzemax,
      double *e, int *je, int *ie,
      double *dy, double *dx, double *ds, double *dz, double *dw,
      double *dxdz, double *dsdw, double *xi, double *xinv, double *sinv,
      double *ww1, double *ww2, double *small, int *ierr,
      int *maxit, double *timewd)
{
/* Sparse implentation of LMS's interior point method via
    Ng-Peyton's sparse Cholesky factorization for sparse
    symmetric positive definite

 INPUT:
     n -- the number of rows in the coefficient matrix A'
     m -- the number of columns in the coefficient matrix A'
     nnza -- the number of non-zero elements in A'
     a -- an nnza-vector of non-zero values of the design
          matrix (A') stored in csr format
     ja -- an nnza-vector of indices of the non-zero elements of
           the coefficient matrix
     ia -- an (n+1)-vector of pointers to the begining of each
           row in a and ja
     ao -- an nnza-vector of work space for the transpose of
           the design matrix stored in csr format or the
           design matrix stored in csc format
     jao -- an nnza-vector of work space for the indices of the
            transpose of the design matrix
     iao -- an (n+1)-vector of pointers to the begining of each
            column in ao and jao
     nnzdmax -- upper bound of the non-zero elements in AA'
     d -- an nnzdmax-vector of non-zero values of AQ^(-1)
     jd -- an nnzdmax-vector of indices in d
     id -- an (m+1)-vector of pointers to the begining of each
           row in d and jd
     dsub -- the values of e excluding the diagonal elements
     jdsub -- the indices to dsub
     nsubmax -- upper bound of the dimension of lindx
     lindx -- an nsub-vector of integer which contains, in
           column major oder, the row subscripts of the nonzero
           entries in L in a compressed storage format
     xlindx -- an (m+1)-vector of int of pointers for lindx
     nnzlmax -- the upper bound of the non-zero entries in
                L stored in lnz, including the diagonal entries
     lnz -- First contains the non-zero entries of d; later
            contains the entries of the Cholesky factor
     xlnz -- column pointer for L stored in lnz
     invp -- an n-vector of int of inverse permutation vector
     perm -- an n-vector of int of permutation vector
     iw -- int work array of length m
     iwmax -- upper bound of the general purpose int
              working storage iwork; set at 7*m+3
     iwork -- an iwsiz-vector of int as work space
     colcnt -- array of length m, containing the number of
               non-zeros in each column of the factor, including
               the diagonal entries
     snode -- array of length m for recording supernode membership
     xsuper -- array of length m+1 containing the supernode
               partitioning
     split -- an m-vector with splitting of supernodes so that
              they fit into cache
     tmpmax -- upper bound of the dimension of tmpvec
     tmpvec -- a tmpmax-vector of temporary vector
     newrhs -- extra work vector for right-hand side and solution
     cachsz -- size of the cache (in kilobytes) on the target machine
     level -- level of loop unrolling while performing numerical
              factorization
     x -- an n-vector, the initial feasible solution in the primal
              that corresponds to the design matrix A'
     s -- an n-vector
     u -- an n-vector of upper bound for x
     c -- an n-vector, usually the "negative" of
          the pseudo response
     y -- an m-vector, the initial dual solution
     b -- an n-vector, usualy the rhs of the equality constraint
          X'a = (1-tau)X'e in the rq setting
     r -- an n-vector of residuals
     z -- an n-vector of the dual slack variable
     w -- an n-vector
     q -- an n-vector of work array containing the diagonal
          elements of the Q^(-1) matrix
     nnzemax -- upper bound of the non-zero elements in AA'
     e -- an nnzdmax-vector containing the non-zero entries of
          AQ^(-1)A' stored in csr format
     je -- an nnzemax-vector of indices for e
     ie -- an (m+1)-vector of pointers to the begining of each row in e and je
     dy -- work array
     dx -- work array
     ds -- work array
     dz -- work array
     dw -- work array
     dxdz -- work array
     dsdw -- work arry
     xi -- work array
     xinv -- work array
     sinv -- work array
     ww1 -- work array
     ww2 -- work array
     small -- convergence tolerance for interior algorithm
     ierr -- error flag :
       1 -- insufficient storage (work space) when calling extract;
       2 -- nnzd > nnzdmax
       3 -- insufficient storage in iwork when calling ordmmd;
       4 -- insufficient storage in iwork when calling sfinit;
       5 -- nnzl > nnzlmax when calling sfinit
       6 -- nsub > nsubmax when calling sfinit
       7 -- insufficient work space in iwork when calling symfct
       8 -- inconsistancy in input when calling symfct
       9 -- tmpsiz > tmpmax when calling bfinit; increase tmpmax
       10 -- nonpositive diagonal encountered when calling blkfct(),
             the matrix is not positive definite
       11 -- insufficient work storage in tmpvec when calling blkfct()
       12 -- insufficient work storage in iwork  when calling blkfct()
       17 -- tiny diagonals replaced with Inf when calling blkfct()
       		{new error code, was confounded with '10'}

     maxit -- maximal number of iterations; on return: the number of iterations

     timewd -- vector of length 7: [7]: amount of time for this subroutine
               [1:6] time info for the phases of cholfct() only.

 OUTPUT:
     y -- an m-vector of primal solution
*/

    /* Local variables */
    int i, it, nnzd, nnzdsub, nsuper;
    double deltad, deltap, mu, gap;
    double timbeg;


/* Parameter adjustments */
    --sinv;
    --xinv;
    --xi;
    --dsdw;
    --dxdz;
    --dw;
    --dz;
    --ds;
    --dx;
    --q;
    --w;
    --z;
    --r;
    --c;
    --u;
    --s;
    --x;

    --ww1;
    --ww2;
    --dy;
    --b;
    --y;
    --newrhs;

    --perm;
    --invp;

    --dsub;
    --timewd;

    /* Function Body */
    for (i = 1; i <= 7; ++i)
	timewd[i] = 0.;

    nnzd = ie[*m] - 1;
    nnzdsub = nnzd - *m;

/*  Compute the initial gap */

    gap = DDOT(n, &z[1], &x[1]) + DDOT(n, &w[1], &s[1]);

/*  Start iteration */

    it = 0;
    while(gap >= *small && it <= *maxit) {

	++it;

/*  Create the diagonal matrix Q^(-1) stored in q as an n-vector
    and update the residuals in r */

	for (i = 1; i <= *n; ++i) {
	    q[i] = 1. / (z[i] / x[i] + w[i] / s[i]);
	    r[i] = z[i] - w[i];
	}

/*  Obtain AQ^(-1) and store in d,jd,id in csr format */

	F77_CALL(amudia)(m, &c__1, ao, jao, iao, &q[1], d, jd, id);

/*  Obtain AQ^(-1)A' and store in e,je,ie in csr format */

	F77_CALL(amub)(m, m, &c__1, d, jd, id,
		       a, ja, ia, e, je, ie, nnzemax, iwork, ierr);

	if (*ierr != 0) { *ierr = 2; goto L100; }

/*  Extract the non-diagonal structure of e,je,ie and store in dsub,jdsub */

	i = *nnzemax + 1;
	F77_CALL(extract)(e, je, ie, &dsub[1], jdsub, m, nnzemax, &i, ierr);

	if (*ierr != 0) { *ierr = 1; goto L100; }

/* Compute b - Ax + AQ^(-1)r and store it in c in two steps
   First: store Ax in ww2 */
	F77_CALL(amux)(m, &x[1], &ww2[1], ao, jao, iao);

/*  Second: save AQ^(-1)r in c temporarily */

	F77_CALL(amux)(m, &r[1], &c[1], d, jd, id);
	for (i = 1; i <= *m; ++i) {
	    c[i] = b[i] - ww2[i] + c[i];
	}

/*  Compute dy = (AQ^(-1)A')^(-1)(b-Ax+AQ^(-1)r); result returned via dy */

	/* chlfct: perform Cholesky's decomposition of e,je,ie */

	F77_CALL(chlfct)(m, xlindx, lindx, &invp[1], &perm[1], iwork, &nnzdsub,
			 jdsub, colcnt, &nsuper, snode, xsuper, nnzlmax,
			 nsubmax, xlnz, lnz, ie, je, e, cachsz, tmpmax,
			 level, tmpvec, split, ierr, &it, &timewd[1]);

	if (*ierr != 0) { /* return the chlfct() error code in {3:12}: */ goto L100; }

	for (i = 1; i <= *m; ++i)
	    newrhs[i] = c[perm[i]];

	/* blkslv: Numerical solution for the new rhs stored in b */
	timbeg = F77_CALL(gtimer)();
	F77_CALL(blkslv)(&nsuper, xsuper, xlindx, lindx, xlnz, lnz, &newrhs[1]);
	timewd[7] += F77_CALL(gtimer)() - timbeg;
	for (i = 1; i <= *m; ++i)
	    dy[i] = newrhs[invp[i]];

/*  Compute dx = Q^(-1)(A'dy - r), ds = -dx, dz  and dw */

	F77_CALL(amux)(n, &dy[1], &dx[1], a, ja, ia);
	for (i = 1; i <= *n; ++i) {
	    dx[i] = q[i] * (dx[i] - r[i]);
	    ds[i] = -dx[i];
	    dz[i] = -z[i] * (dx[i] / x[i] + 1.);
	    dw[i] = -w[i] * (ds[i] / s[i] + 1.);
	}

/* Compute the maximum allowable step lengths */

	bound(&x[1], &dx[1], &s[1], &ds[1], &z[1], &dz[1], &w[1], &dw[1],
	      n, &c_9995, &deltap, &deltad);

	if (deltap * deltad < 1.) {

	    /* Update mu */
	    double
		g =		      DDOT(n, &z[1], &x[1]) +
		    deltap *	      DDOT(n, &z[1], &dx[1]) +
		    deltad *	      DDOT(n, &dz[1], &x[1]) +
		    deltad * deltap * DDOT(n, &dz[1], &dx[1]) +
				      DDOT(n, &w[1], &s[1]) +
		    deltap *	      DDOT(n, &w[1], &ds[1]) +
		    deltad *	      DDOT(n, &dw[1], &s[1]) +
		    deltad * deltap * DDOT(n, &dw[1], &ds[1]);

	    mu = DDOT(n, &z[1], &x[1]) + DDOT(n, &w[1], &s[1]);
	    g /= mu;
	    mu *= g * g * g / (2. * (*n));

	    /* Compute dxdz and dsdw */

	    for (i = 1; i <= *n; ++i) {
		dxdz[i] = dx[i] * dz[i];
		dsdw[i] = ds[i] * dw[i];
		xinv[i] = 1. / x[i];
		sinv[i] = 1. / s[i];
		xi[i] = xinv[i] * dxdz[i] - sinv[i] * dsdw[i] -
		    mu * (xinv[i] - sinv[i]);
		ww1[i] = q[i] * xi[i];
	    }

	    /* Compute AQ^(-1)(dxdz - dsdw - mu(X^(-1) - S^(-1))) and
	       store it in ww2 temporarily */

	    F77_CALL(amux)(m, &ww1[1], &ww2[1], ao, jao, iao);
	    for (i = 1; i <= *m; ++i) {
		c[i] += ww2[i];
	    }

	    /* Compute dy and return the result in dy */

	    for (i = 1; i <= *m; ++i) {
		newrhs[i] = c[perm[i]];
	    }

	    /* Call blkslv: Numerical solution for the new rhs stored in b */

	    timbeg = F77_CALL(gtimer)();
	    F77_CALL(blkslv)(&nsuper, xsuper, xlindx, lindx, xlnz, lnz, &newrhs[1]);
	    timewd[7] += F77_CALL(gtimer)() - timbeg;
	    for (i = 1; i <= *m; ++i) {
		dy[i] = newrhs[invp[i]];
	    }

	    /* Compute dx = Q^(-1)(A'dy - r + mu(X^(-1) - S^(-1)) -dxdz + dsdw),
	       ds = -dx, dz  and dw */

	    F77_CALL(amux)(n, &dy[1], &dx[1], a, ja, ia);
	    for (i = 1; i <= *n; ++i) {
		dx[i] = q[i] * (dx[i] - xi[i] - r[i]);
		ds[i] = -dx[i];
		dz[i] = -z[i] + xinv[i] * (mu - z[i] * dx[i] - dxdz[i]);
		dw[i] = -w[i] + sinv[i] * (mu - w[i] * ds[i] - dsdw[i]);
	    }

	    /* Compute the maximum allowable step lengths */

	    bound(&x[1], &dx[1], &s[1], &ds[1], &z[1], &dz[1], &w[1], &dw[1],
		  n, &c_9995, &deltap, &deltad);
	}

	/* Take the step */

	F77_CALL(daxpy)(n, &deltap, &dx[1], &c__1, &x[1], &c__1);
	F77_CALL(daxpy)(n, &deltap, &ds[1], &c__1, &s[1], &c__1);
	F77_CALL(daxpy)(n, &deltad, &dw[1], &c__1, &w[1], &c__1);
	F77_CALL(daxpy)(n, &deltad, &dz[1], &c__1, &z[1], &c__1);
	F77_CALL(daxpy)(m, &deltad, &dy[1], &c__1, &y[1], &c__1);

	gap = DDOT(n, &z[1], &x[1]) + DDOT(n, &w[1], &s[1]);

    } /* end {while} iterations */


L100:
    *maxit = it;
    return;
} /* slpfn */

static void
bound(double *x, double *dx, double *s,
      double *ds, double *z, double *dz, double *w,
      double *dw, int *n, double *beta, double *deltap,
      double *deltad)
{
    int i;

    *deltap = R_PosInf;
    *deltad = R_PosInf;
    for (i = 0; i < *n; ++i) {
	if (dx[i] < 0.) *deltap = fmin2(*deltap, -x[i] / dx[i]);
	if (ds[i] < 0.) *deltap = fmin2(*deltap, -s[i] / ds[i]);
	if (dz[i] < 0.) *deltad = fmin2(*deltad, -z[i] / dz[i]);
	if (dw[i] < 0.) *deltad = fmin2(*deltad, -w[i] / dw[i]);
    }

    *deltap = fmin2(1., *beta * *deltap);
    *deltad = fmin2(1., *beta * *deltad);
    return;
} /* bound */