c 1 2 3 4 5 6 7 c23456789012345678901234567890123456789012345678901234567890123456789012 subroutine srqfn(n,m,nnza,a,ja,ia,ao,jao,iao,nnzdmax,d,jd,id, & dsub,jdsub,nnzemax,e,je,ie,nsubmax,lindx,xlindx, & nnzlmax,lnz,xlnz,iw,iwmax,iwork,xsuper,tmpmax, & tmpvec,wwm,wwn,cachsz,level,x,s,u,c,y,b,small, & ierr,maxit,timewd) integer nnza,m,n,nnzdmax,nnzemax,iwmax, & nnzlmax,nsubmax,cachsz,level,tmpmax,ierr,maxit, & ja(nnza),jao(nnza),jdsub(nnzemax+1),jd(nnzdmax), & ia(n+1),iao(m+1),id(m+1),lindx(nsubmax),xlindx(m+1), & iw(m,5),xlnz(m+1),iwork(iwmax),xsuper(m+1),je(nnzemax), & ie(m+1) double precision small, & a(nnza),ao(nnza),dsub(nnzemax+1),d(nnzdmax), & lnz(nnzlmax),c(n),y(m),wwm(m,3),tmpvec(tmpmax), & wwn(n,14),x(n),s(n),u(n),e(nnzemax),b(m) double precision timewd(7) call slpfn(n,m,nnza,a,ja,ia,ao,jao,iao,nnzdmax,d,jd,id, & dsub,jdsub,nsubmax,lindx,xlindx,nnzlmax,lnz, & xlnz,iw(1,1),iw(1,2),iwmax,iwork,iw(1,3),iw(1,4), & xsuper,iw(1,5),tmpmax,tmpvec,wwm(1,2),cachsz, & level,x,s,u,c,y,b,wwn(1,1),wwn(1,2),wwn(1,3), & wwn(1,4),nnzemax,e,je,ie,wwm(1,3),wwn(1,5),wwn(1,6), & wwn(1,7),wwn(1,8),wwn(1,9),wwn(1,10),wwn(1,11), & wwn(1,12),wwn(1,13),wwn(1,14),wwm(1,1),small,ierr, & maxit,timewd) return end subroutine slpfn(n,m,nnza,a,ja,ia,ao,jao,iao,nnzdmax,d,jd,id, & dsub,jdsub,nsubmax,lindx,xlindx,nnzlmax,lnz, & xlnz,invp,perm,iwmax,iwork,colcnt,snode,xsuper, & split,tmpmax,tmpvec,newrhs,cachsz,level,x,s,u, & c,y,b,r,z,w,q,nnzemax,e,je,ie,dy,dx,ds,dz,dw,dxdz, & dsdw,xi,xinv,sinv,ww1,ww2,small,ierr,maxit,timewd) c 1 2 3 4 5 6 7 c23456789012345678901234567890123456789012345678901234567890123456789012 c Sparse implentation of LMS's interior point method via c Ng-Peyton's sparse Cholesky factorization for sparse c symmetric positive definite c INPUT: c n -- the number of row in the coefficient matrix A' c m -- the number of column in the coefficient matrix A' c nnza -- the number of non-zero elements in A' c a -- an nnza-vector of non-zero values of the design c matrix (A') stored in csr format c ja -- an nnza-vector of indices of the non-zero elements of c the coefficient matrix c ia -- an (n+1)-vector of pointers to the begining of each c row in a and ja c ao -- an nnza-vector of work space for the transpose of c the design matrix stored in csr format or the c design matrix stored in csc format c jao -- an nnza-vector of work space for the indices of the c transpose of the design matrix c iao -- an (n+1)-vector of pointers to the begining of each c column in ao and jao c nnzdmax -- upper bound of the non-zero elements in AA' c d -- an nnzdmax-vector of non-zero values of AQ^(-1) c jd -- an nnzdmax-vector of indices in d c id -- an (m+1)-vector of pointers to the begining of each c row in d and jd c dsub -- the values of e excluding the diagonal elements c jdsub -- the indices to dsub c nsubmax -- upper bound of the dimension of lindx c lindx -- an nsub-vector of interger which contains, in c column major order, the row subscripts of the nonzero c entries in L in a compressed storage format c xlindx -- an (m+1)-vector of integer of pointers for lindx c nnzlmax -- the upper bound of the non-zero entries in c L stored in lnz, including the diagonal entries c lnz -- First contains the non-zero entries of d; later c contains the entries of the Cholesky factor c xlnz -- column pointer for L stored in lnz c invp -- an n-vector of integer of inverse permutation c vector c perm -- an n-vector of integer of permutation vector c iw -- integer work array of length m c iwmax -- upper bound of the general purpose integer c working storage iwork; set at 7*m+3 c iwork -- an iwsiz-vector of integer as work space c colcnt -- array of length m, containing the number of c non-zeros in each column of the factor, including c the diagonal entries c snode -- array of length m for recording supernode c membership c xsuper -- array of length m+1 containing the supernode c partitioning c split -- an m-vector with splitting of supernodes so that c they fit into cache c tmpmax -- upper bound of the dimension of tmpvec c tmpvec -- a tmpmax-vector of temporary vector c newrhs -- extra work vector for right-hand side and c solution c cachsz -- size of the cache (in kilobytes) on the target c machine c level -- level of loop unrolling while performing numerical c factorization c x -- an n-vector, the initial feasible solution in the primal c that corresponds to the design matrix A' c s -- an n-vector c u -- an n-vector of upper bound for x c c -- an n-vector, usually the "negative" of c the pseudo response c y -- an m-vector, the initial dual solution c b -- an n-vector, the rhs of the equality constraint c usually X'a = (1-tau)X'e in the rq setting c r -- an n-vector of residuals c z -- an n-vector of the dual slack variable c w -- an n-vector c q -- an n-vector of work array containing the diagonal c elements of the Q^(-1) matrix c nnzemax -- upper bound of the non-zero elements in AA' c e -- an nnzdmax-vector containing the non-zero entries of c AQ^(-1)A' stored in csr format c je -- an nnzemax-vector of indices for e c ie -- an (m+1)-vector of pointers to the begining of each c row in e and je c dy -- work array c dx -- work array c ds -- work array c dz -- work array c dw -- work array c dxdz -- work array c dsdw -- work arry c xi -- work array c xinv -- work array c sinv -- work array c ww1 -- work array c ww2 -- work array c small -- convergence tolerance for inetrior algorithm c ierr -- error flag c 1 -- insufficient work space in call to extract c 2 -- nnzd > nnzdmax c 3 -- insufficient storage in iwork when calling ordmmd; c 4 -- insufficient storage in iwork when calling sfinit; c 5 -- nnzl > nnzlmax when calling sfinit c 6 -- nsub > nsubmax when calling sfinit c 7 -- insufficient work space in iwork when calling symfct c 8 -- inconsistancy in input when calling symfct c 9 -- tmpsiz > tmpmax when calling bfinit; increase tmpmax c 10 -- nonpositive diagonal encountered when calling c blkfct, the matrix is not positive definite c 11 -- insufficient work storage in tmpvec when calling c blkfct c 12 -- insufficient work storage in iwork when calling c blkfct c maxit -- upper limit of the iteration; on return holds the c number of iterations c timew -- amount of time to execute this subroutine c OUTPUT: c y -- an m-vector of primal solution c 1 2 3 4 5 6 7 c23456789012345678901234567890123456789012345678901234567890123456789012 integer nnza,m,n,nsuper,nnzdmax,nnzemax,iwmax,nnzd, & nnzlmax,nsubmax,cachsz,level,tmpmax,ierr,maxit,it, & ja(nnza),jao(nnza),jdsub(nnzemax+1),jd(nnzdmax), & ia(n+1),iao(m+1),id(m+1),lindx(nsubmax),xlindx(m+1), & invp(m),perm(m),xlnz(m+1),iwork(iwmax), & colcnt(m),snode(m),xsuper(m+1),split(m),je(nnzemax), & ie(m+1) double precision ddot,gap,zero,one,beta,small,deltap,deltad,mu,g, & a(nnza),ao(nnza),dsub(nnzemax+1),d(nnzdmax), & lnz(nnzlmax),c(n),b(m),newrhs(m),y(m), & tmpvec(tmpmax),r(n),z(n),w(n),x(n),s(n), & u(n),q(n), & e(nnzemax),dy(m),dx(n),ds(n),dz(n),dw(n), & dxdz(n),dsdw(n),xinv(n),sinv(n),xi(n), & ww1(n),ww2(m) double precision timewd(7) real gtimer,timbeg,timend external smxpy1,smxpy2,smxpy4,smxpy8 external mmpy1,mmpy2,mmpy4,mmpy8 parameter (beta=9.995d-1, one=1.0d0, zero=0.0d0) do i = 1,7 timewd(i) = 0.0 enddo it = 0 nnzd = ie(m+1) - 1 nnzdsub = nnzd - m c c Compute the initial gap c gap = ddot(n,z,1,x,1) + ddot(n,w,1,s,1) c c Start iteration c 20 continue if(gap .lt. small .or. it .gt. maxit) goto 30 it = it + 1 c c Create the diagonal matrix Q^(-1) stored in q as an n-vector c and update the residuals in r c do i=1,n q(i) = one/(z(i)/x(i)+w(i)/s(i)) r(i) = z(i) - w(i) enddo c c Obtain AQ^(-1) and store in d,jd,id in csr format c call amudia(m,1,ao,jao,iao,q,d,jd,id) c c Obtain AQ^(-1)A' and store in e,je,ie in csr format c call amub(m,m,1,d,jd,id,a,ja,ia,e,je,ie,nnzemax,iwork,ierr) if (ierr .ne. 0) then ierr = 2 go to 100 endif c c Extract the non-diagonal structure of e,je,ie and store in dsub,jdsub c call extract(e,je,ie,dsub,jdsub,m,nnzemax,nnzemax+1,ierr) if (ierr .ne. 0) then ierr = 1 go to 100 endif c c Compute b - Ax + AQ^(-1)r and store it in c in two steps c First: store Ax in ww2 call amux(m,x,ww2,ao,jao,iao) c c Second: save AQ^(-1)r in c temporarily c call amux(m,r,c,d,jd,id) do i = 1,m c(i) = b(i) - ww2(i) + c(i) enddo c c Compute dy = (AQ^(-1)A')^(-1)(b-Ax+AQ^(-1)r); result returned via dy c c Call chlfct to perform Cholesky's decomposition of e,je,ie c call chlfct(m,xlindx,lindx,invp,perm,iwork,nnzdsub,jdsub, & colcnt,nsuper,snode,xsuper,nnzlmax,nsubmax,xlnz,lnz, & ie,je,e,cachsz,tmpmax,level,tmpvec,split,ierr,it, & timewd) if (ierr .ne. 0) go to 100 c c Call blkslv: Numerical solution for the new rhs stored in b c do i = 1,m newrhs(i) = c(perm(i)) enddo timbeg = gtimer() call blkslv(nsuper,xsuper,xlindx,lindx,xlnz,lnz,newrhs) timend = gtimer() timewd(7) = timewd(7) + timend - timbeg do i = 1,m dy(i) = newrhs(invp(i)) enddo c c Compute dx = Q^(-1)(A'dy - r), ds = -dx, dz and dw c call amux(n,dy,dx,a,ja,ia) do i=1,n dx(i) = q(i) * (dx(i) - r(i)) ds(i) = -dx(i) dz(i) = -z(i) * (one + dx(i) / x(i)) dw(i) = -w(i) * (one + ds(i) / s(i)) enddo c c Compute the maximum allowable step lengths c call bound(x,dx,s,ds,z,dz,w,dw,n,beta,deltap,deltad) if (deltap * deltad .lt. one) then c c Update mu c mu = ddot(n,z,1,x,1) + ddot(n,w,1,s,1) g = ddot(n,z,1,x,1) + deltap*ddot(n,z,1,dx,1) & + deltad*ddot(n,dz,1,x,1) + deltad*deltap*ddot(n,dz,1,dx,1) & + ddot(n,w,1,s,1) + deltap*ddot(n,w,1,ds,1) & + deltad*ddot(n,dw,1,s,1) + deltad*deltap*ddot(n,dw,1,ds,1) mu = mu*((g/mu)**3)/(2.d0*dfloat(n)) c c Compute dxdz and dsdw c do i = 1,n dxdz(i) = dx(i)*dz(i) dsdw(i) = ds(i)*dw(i) xinv(i) = one/x(i) sinv(i) = one/s(i) xi(i) = xinv(i) * dxdz(i) - sinv(i) * dsdw(i) & - mu * (xinv(i) - sinv(i)) ww1(i) = q(i) * xi(i) enddo c c Compute AQ^(-1)(dxdz - dsdw - mu(X^(-1) - S^(-1))) and c store it in ww2 temporarily c call amux(m,ww1,ww2,ao,jao,iao) do i = 1,m c(i) = c(i) + ww2(i) enddo c c c Compute dy and return the result in dy c c Call blkslv: Numerical solution for the new rhs stored in b c do i = 1,m newrhs(i) = c(perm(i)) enddo timbeg = gtimer() call blkslv(nsuper,xsuper,xlindx,lindx,xlnz,lnz,newrhs) timend = gtimer() timewd(7) = timewd(7) + timend - timbeg do i = 1,m dy(i) = newrhs(invp(i)) enddo c c Compute dx = Q^(-1)(A'dy - r + mu(X^(-1) - S^(-1)) -dxdz + dsdw), c ds = -dx, dz and dw c call amux(n,dy,dx,a,ja,ia) do i=1,n dx(i) = q(i) * (dx(i) - xi(i) - r(i)) ds(i) = -dx(i) dz(i) = -z(i) + xinv(i)*(mu - z(i)*dx(i) - dxdz(i)) dw(i) = -w(i) + sinv(i)*(mu - w(i)*ds(i) - dsdw(i)) enddo c c Compute the maximum allowable step lengths c call bound(x,dx,s,ds,z,dz,w,dw,n,beta,deltap,deltad) endif c c Take the step c call daxpy(n,deltap,dx,1,x,1) call daxpy(n,deltap,ds,1,s,1) call daxpy(n,deltad,dw,1,w,1) call daxpy(n,deltad,dz,1,z,1) call daxpy(m,deltad,dy,1,y,1) gap = ddot(n,z,1,x,1) + ddot(n,w,1,s,1) goto 20 30 continue 100 continue maxit = it return end