c 1 2 3 4 5 6 7 c23456789012345678901234567890123456789012345678901234567890123456789012 subroutine srqfnc(n1,m,nnza1,a1,ja1,ia1,ao1,jao1,iao1,n2,nnza2, & a2,ja2,ia2,ao2,jao2,iao2,nnzdmax,d,jd,id, & dsub,jdsub,nnzemax,e,je,ie,nnzgmax,g,jg,ig, & nnzhmax,h,jh,ih,nsubmax,lindx,xlindx,nnzlmax, & lnz,xlnz,iw,iwmax,iwork,xsuper,tmpmax,tmpvec, & maxn1n2,ww1,wwm,wwn1,wwn2,cachsz,level,x1,x2, & s,u,c1,c2,y,small,ierr,maxit,timewd) integer nnza1,nnza2,m,n1,n2,nnzdmax,nnzemax,nnzgmax,nnzhmax,iwmax, & nnzlmax,nsubmax,cachsz,level,tmpmax,ierr,maxit,maxn1n2, & ja1(nnza1),jao1(nnza1),ja2(nnza2),jao2(nnza2), & jdsub(nnzhmax+1),jd(nnzdmax),ia1(n1+1),iao1(m+1), & ia2(n2+1),iao2(m+1),id(m+1),lindx(nsubmax),xlindx(m+1), & iw(m,5),xlnz(m+1),iwork(iwmax),xsuper(m+1),je(nnzemax), & ie(m+1),jg(nnzgmax),ig(m+1),jh(nnzhmax),ih(m+1) double precision small, & a1(nnza1),ao1(nnza1),a2(nnza2),ao2(nnza2), & dsub(nnzhmax+1),d(nnzdmax),g(nnzgmax), & h(nnzhmax),lnz(nnzlmax),c1(n1),c2(n2),y(m), & ww1(maxn1n2),wwm(m,6),tmpvec(tmpmax), & wwn1(n1,10),wwn2(n2,7),x1(n1),x2(n2),s(n1), & u(n1),e(nnzemax) double precision timewd(7) parameter (beta=9.995d-1, one=1.0d0, zero=0.0d0) call slpfnc(n1,m,nnza1,a1,ja1,ia1,ao1,jao1,iao1,n2,nnza2, & a2,ja2,ia2,ao2,jao2,iao2,nnzdmax,d,jd,id,dsub, & jdsub,nsubmax,lindx,xlindx,nnzlmax,lnz,xlnz,iw(1,1), & iw(1,2),iwmax,iwork,iw(1,3),iw(1,4),xsuper,iw(1,5), & tmpmax,tmpvec,wwm(1,2),wwm(1,3),cachsz,level,x1,x2,s,u, & c1,c2,y,wwm(1,1),wwn2(1,1),wwn1(1,1), & wwn2(1,2),wwn1(1,2),wwn1(1,3),wwn2(1,3),nnzemax,e,je, & ie,nnzgmax,g,jg,ig,nnzhmax,h,jh,ih,wwm(1,4),wwn1(1,4), & wwn2(1,4),wwn1(1,5),wwn1(1,6),wwn2(1,5),wwn1(1,7), & wwn1(1,8),wwn2(1,6),wwn1(1,9),wwn1(1,10),wwn2(1,7), & maxn1n2,ww1,wwm(1,5),wwm(1,6),small,ierr,maxit,timewd) return end c23456789012345678901234567890123456789012345678901234567890123456789012 subroutine slpfnc(n1,m,nnza1,a1,ja1,ia1,ao1,jao1,iao1,n2,nnza2, & a2,ja2,ia2,ao2,jao2,iao2,nnzdmax,d,jd,id,dsub, & jdsub,nsubmax,lindx,xlindx,nnzlmax,lnz,xlnz,invp, & perm,iwmax,iwork,colcnt,snode,xsuper,split, & tmpmax,tmpvec,rhs,newrhs,cachsz,level,x1,x2,s,u, & c1,c2,y,b,r2,z1, & z2,w,q1,q2,nnzemax,e,je, & ie,nnzgmax,g,jg,ig,nnzhmax,h,jh,ih,dy,dx1, & dx2,ds,dz1,dz2,dw, & dxdz1,dxdz2,dsdw,xi1,xi2, & maxn1n2,ww1,ww2,ww3,small,ierr,maxit,timewd) c 1 2 3 4 5 6 7 c23456789012345678901234567890123456789012345678901234567890123456789012 c Sparse implentation of LMS's interior point method via c Ng-Peyton's sparse Cholesky factorization for sparse c symmetric positive definite c INPUT: c n1 -- the number of row in the coefficient matrix A1' c m -- the number of column in the coefficient matrix A1' c nnza1 -- the number of non-zero elements in A' c a1 -- an nnza1-vector of non-zero values of the design c matrix (A1') stored in csr format c ja1 -- an nnza1-vector of indices of the non-zero elements of c the coefficient matrix c ia1 -- an (n1+1)-vector of pointers to the begining of each c row in a1 and ja1 c ao1 -- an nnza1-vector of work space for the transpose of c the design matrix stored in csr format or the c design matrix stored in csc format c jao1 -- an nnza1-vector of work space for the indices of the c transpose of the design matrix c iao1 -- an (n1+1)-vector of pointers to the begining of each c column in ao1 and jao1 c n2 -- the number of row in the constraint matrix A2' c nnza2 -- the number of non-zero elements in A2' c a2 -- an nnza2-vector of non-zero values of the contraint c matrix (A2') stored in csr format c ja2 -- an nnza2-vector of indices of the non-zero elements of c the constraint matrix c ia2 -- an (n2+1)-vector of pointers to the begining of each c row in a2 and ja2 c ao2 -- an nnza2-vector of work space for the transpose of c the constraint matrix stored in csr format or the c constraint matrix stored in csc format c jao2 -- an nnza2-vector of work space for the indices of the c transpose of the constraint matrix c iao2 -- an (n2+1)-vector of pointers to the begining of each c column in ao2 and jao2 c nnzdmax -- upper bound of the non-zero elements in A1A1' c d -- an nnzdmax-vector of non-zero values used to store c the transpose of the design matrix multiplied by the design c matrix (A1A1') stored in csr format; c also used to store A1Q1^(-1) and A2Q2^(-1) later c jd -- an nnzdmax-vector of indices in d c id -- an (m+1)-vector of pointers to the begining of each c row in d and jd c dsub -- the values of d excluding the diagonal elements c jdsub -- the indices to dsub c nsubmax -- upper bound of the dimension of lindx c lindx -- an nsub-vector of interger which contains, in c column major order, the row subscripts of the nonzero c entries in L in a compressed storage format c xlindx -- an (m+1)-vector of integer of pointers for lindx c nnzlmax -- the upper bound of the non-zero entries in c L stored in lnz, including the diagonal entries c lnz -- First contains the non-zero entries of d; later c contains the entries of the Cholesky factor c xlnz -- column pointer for L stored in lnz c invp -- an n1-vector of integer of inverse permutation c vector c perm -- an n1-vector of integer of permutation vector c iw -- integer work array of length m c iwmax -- upper bound of the general purpose integer c working storage iwork; set at 7*m+3 c iwork -- an iwsiz-vector of integer as work space c colcnt -- array of length m, containing the number of c non-zeros in each column of the factor, including c the diagonal entries c snode -- array of length m for recording supernode c membership c xsuper -- array of length m+1 containing the supernode c partitioning c split -- an m-vector with splitting of supernodes so that c they fit into cache c tmpmax -- upper bound of the dimension of tmpvec c tmpvec -- a tmpmax-vector of temporary vector c rhs -- m-vector to store the rhs c newrhs -- extra work vector for right-hand side and c solution c cachsz -- size of the cache (in kilobytes) on the target c machine c level -- level of loop unrolling while performing numerical c factorization c x1 -- an n1-vector, the initial feasible solution for the primal c solution that corresponds to the design matrix A1' c x2 -- an n2-vector, the initial feasible solution for the primal c solution that corresponds to the constraint matrix A2' c s -- an n1-vector c u -- an n1-vector of the upper bound for x1 c c1 -- an n1-vector in the primal; negative response in the c regression quantile setting c c2 -- an n2-vector, the negative rhs of the inequality constraint c y -- an m-vector, the initial dual solution c b -- an n1-vector, usualy the rhs of the equality constraint c X'a = (1-tau)X'e in the rq setting c r2 -- an n2-vector of residuals c z1 -- an n1-vector of the dual slack variable c z2 -- an n2-vector c w -- an n-vector c q1 -- an n1-vector of work array containing the diagonal c elements of the Q1^(-1) matrix c q2 -- an n2-vector of work array containing the diagonal c elements of the Q2^(-1) matrix c e -- an nnzdmax-vector containing the non-zero entries of c A1Q1^(-1)A1' stored in csr format c je -- an nnzdmax-vector of indices for e c ie -- an (m+1)-vector of pointers to the begining of each c row in e and je c nnzgmax -- upper bound of the non-zero elements in g,jg c g -- an nnzgmax-vector containing the non-zero entries of c A2Q2^(-1)A2' stored in csr format c jg -- an nnzgmax-vector of indices for g c ig -- an (m+1)-vector of pointers to the begining of each c row in g and jg c nnzhmax -- upper bound of the non-zero elements in h,jh c h -- an nnzhmax-vector containing the non-zero entries of c AQ^(-1)A' stored in csr format c jh -- an nnzhmax-vector of indices for h c ih -- an (m+1)-vector of pointers to the begining of each c row in h and jh c dy -- an m-vector of work array c dx1 -- an n1-vector of work array c dx2 -- an n2-vector of work array c ds -- an n1-vector of work array c dz1 -- an n1-vector of work array c dz2 -- an n2-vector of work array c dw -- an n1-vector of work array c dxdz1 -- an n1-vector of work array c dxdz2 -- an n2-vector of work array c dsdw -- an n1-vector of work arry c xi1 -- an n1-vector of work array c xi2 -- an n2-vector of work array c xinv1 -- an n1-vector of work array c xinv2 -- an n2-vector of work array c sinv -- work array c maxn1n2 -- max(n1,n2) c ww1 -- an maxn1n2-vector of work array c ww2 -- an m-vector of work array c ww3 -- an m-vector of work array c small -- convergence tolerance for inetrior algorithm c ierr -- error flag c 1 -- insufficient storage when calling extract; c 3 -- insufficient storage in iwork when calling ordmmd; c 4 -- insufficient storage in iwork when calling sfinit; c 5 -- nnzl > nnzlmax when calling sfinit c 6 -- nsub > nsubmax when calling sfinit c 7 -- insufficient work space in iwork when calling symfct c 8 -- inconsistancy in input when calling symfct c 9 -- tmpsiz > tmpmax when calling symfct; increase tmpmax c 10 -- nonpositive diagonal encountered when calling c blkfct c 11 -- insufficient work storage in tmpvec when calling c blkfct c 12 -- insufficient work storage in iwork when calling c blkfct c 13 -- nnzd > nnzdmax in e,je when calling amub c 14 -- nnzd > nnzdmax in g,jg when calling amub c 15 -- nnzd > nnzdmax in h,jh when calling aplb c maxit -- upper limit of the iteration; on return holds the c number of iterations c timewd -- amount of time to execute this subroutine c OUTPUT: c y -- an m-vector of primal solution c 1 2 3 4 5 6 7 c23456789012345678901234567890123456789012345678901234567890123456789012 integer nnza1,nnza2,m,n1,n2,maxn1n2,nsuper,nnzdmax,nnzemax, & nnzgmax, & nnzhmax,iwmax,nnzlmax,nsubmax,cachsz,level,tmpmax,ierr, & maxit,it,nnzdsub,nnzd, & ja1(nnza1),jao1(nnza1),ja2(nnza2),jao2(nnza2), & jdsub(nnzhmax+1),jd(nnzdmax),ia1(n1+1),iao1(m+1), & ia2(n2+1),iao2(m+1),id(m+1),lindx(nsubmax),xlindx(m+1), & invp(m),perm(m),xlnz(m+1),iwork(iwmax),colcnt(m), & snode(m),xsuper(m+1),split(m),je(nnzemax),ie(m+1), & jg(nnzgmax),ig(m+1),jh(nnzhmax),ih(m+1) double precision ddot,gap,zero,one,beta,small,deltap,deltad,mu,g1, & a1(nnza1),ao1(nnza1),a2(nnza2),ao2(nnza2), & dsub(nnzhmax+1),d(nnzdmax),lnz(nnzlmax),c1(n1), & c2(n2),b(m),rhs(m),newrhs(m),y(m),tmpvec(tmpmax), & r2(n2),z1(n1),z2(n2),w(n1),x1(n1), & x2(n2),s(n1),u(n1),q1(n1),q2(n2),e(nnzemax), & g(nnzgmax),h(nnzhmax),dy(m),dx1(n1),dx2(n2), & ds(n1),dz1(n1),dz2(n2),dw(n1),dxdz1(n1), & dxdz2(n2),dsdw(n1), & xi1(n1),xi2(n2),ww1(maxn1n2),ww2(m),ww3(m) double precision timewd(7) real gtimer,timbeg,timend external smxpy1,smxpy2,smxpy4,smxpy8 external mmpy1,mmpy2,mmpy4,mmpy8 parameter (beta=9.995d-1, one=1.0d0, zero=0.0d0) do i = 1,7 timewd(i) = 0.0 enddo it = 0 c c Compute the initial gap c gap = ddot(n1,z1,1,x1,1) + ddot(n2,z2,1,x2,1) + ddot(n1,w,1,s,1) c c Start iteration c 20 continue if(gap .lt. small .or. it .gt. maxit) goto 30 it = it + 1 c c Create the diagonal matrix Q1^(-1) stored in q1 as an n1-vector, c the diagonal matrix Q2^(-1) stored in q2 as an n2-vector, c and store the residuals in r1 in ds, and r3 in dy temporarily, c and r2 in r2 permanently c c Call amux to obtain A1x1 and store the value in ww2 c call amux(m,x1,ww2,ao1,jao1,iao1) c c Call amux to obtain A2x2 and store the value in ww3 c call amux(m,x2,ww3,ao2,jao2,iao2) c c Store A2'y temporarily in r2 c call amux(n2,y,r2,a2,ja2,ia2) do i=1,n1 q1(i) = one/(z1(i)/x1(i)+w(i)/s(i)) ds(i) = z1(i) - w(i) enddo do i=1,n2 q2(i) = x2(i)/z2(i) r2(i) = c2(i)-r2(i) enddo do i=1,m dy(i) = b(i) - ww2(i) - ww3(i) enddo c c Obtain AQA = A1Q1^(-1)A1' + A2Q2^(-1)A2' in 5 steps c c Step1: Obtain A1Q1^(-1) and store the values in d,jd,id in csr format c Also compute A1Q1^(-1)r1 and store the values in ww2 to be used c to generate r3; c Step2: Compute A1Q1^(-1)A1' and store the values in e,je,ie c Step3: Obtain A2Q2^(-1) and store the values in d,jd,id in csr format c Also compute A2Q2^(-1)r2 and store the values in in ww3 to c be used to generate r3; c Step4: Compute A2Q2^(-1)A2' and store the value in g,jg,ig c Step5: Compute AQA and store the values in h,jh,ih c c Step 1 c call amudia(m,1,ao1,jao1,iao1,q1,d,jd,id) call amux(m,ds,ww2,d,jd,id) c c Step 2 c call amub(m,m,1,d,jd,id,a1,ja1,ia1,e,je,ie,nnzemax,iwork,ierr) if (ierr .ne. 0) then ierr = 13 go to 100 endif c c Step 3 c call amudia(m,1,ao2,jao2,iao2,q2,d,jd,id) call amux(m,r2,ww3,d,jd,id) c c Step 4 c call amub(m,m,1,d,jd,id,a2,ja2,ia2,g,jg,ig,nnzgmax,iwork,ierr) if (ierr .ne. 0) then ierr = 14 go to 100 endif c c Step 5 c call aplb(m,m,1,e,je,ie,g,jg,ig,h,jh,ih,nnzhmax,iwork,ierr) if (ierr .ne. 0) then ierr = 15 go to 100 endif c c Generate rhs = r3 + A1Q1^(-1)r1 + A2Q2^(-1)r2 and store in rhs c do i = 1,m rhs(i) = dy(i) + ww2(i) + ww3(i) enddo c c Extract the non-diagonal structure of h,jh,ih and store in dsub,jdsub c nnzd = ih(m+1) - 1 nnzdsub = nnzd - m call extract(h,jh,ih,dsub,jdsub,m,nnzhmax,nnzhmax+1,ierr) if (ierr .ne. 0) then ierr = 1 go to 100 endif c c Compute dy = (AQ^(-1)A')^(-1)rhs; result returned via dy c c Call chlfct to perform Cholesky's decomposition of h,jh,ih c call chlfct(m,xlindx,lindx,invp,perm,iwork,nnzdsub,jdsub, & colcnt,nsuper,snode,xsuper,nnzlmax,nsubmax,xlnz,lnz, & ih,jh,h,cachsz,tmpmax,level,tmpvec,split,ierr,it, & timewd) if (ierr .ne. 0) go to 100 c c Call blkslv: Numerical solution for the new rhs stored in rhs c do i = 1,m newrhs(i) = rhs(perm(i)) enddo timbeg = gtimer() call blkslv(nsuper,xsuper,xlindx,lindx,xlnz,lnz,newrhs) timend = gtimer() timewd(7) = timewd(7) + timend - timbeg do i = 1,m dy(i) = newrhs(invp(i)) enddo c c Compute dx1 = Q1^(-1)(A1'dy - r1), ds = -dx1, dz1, dz2 and dw c call amux(n1,dy,dx1,a1,ja1,ia1) call amux(n2,dy,dx2,a2,ja2,ia2) do i = 1,n1 dx1(i) = q1(i) * (dx1(i) - ds(i)) ds(i) = -dx1(i) dz1(i) = -z1(i) * (one + dx1(i) / x1(i)) dw(i) = -w(i) * (one + ds(i) / s(i)) enddo do i = 1,n2 dx2(i) = q2(i) * (dx2(i) - r2(i)) dz2(i) = -z2(i) * (one + dx2(i) / x2(i)) enddo c c Compute the maximum allowable step lengths c call boundc(x1,dx1,x2,dx2,s,ds,z1,dz1,z2,dz2,w,dw,n1,n2, & beta,deltap,deltad) if (deltap * deltad .lt. one) then c c Update mu c mu = ddot(n1,z1,1,x1,1) + ddot(n2,z2,1,x2,1) & + ddot(n1,w,1,s,1) g1 = mu + deltap*ddot(n1,z1,1,dx1,1) & + deltad*ddot(n1,dz1,1,x1,1) & + deltad*deltap*ddot(n1,dz1,1,dx1,1) & + deltap*ddot(n2,z2,1,dx2,1) & + deltad*ddot(n2,dz2,1,x2,1) & + deltad*deltap*ddot(n2,dz2,1,dx2,1) & + deltap*ddot(n1,w,1,ds,1) & + deltad*ddot(n1,dw,1,s,1) & + deltad*deltap*ddot(n1,dw,1,ds,1) mu = mu*((g1/mu)**3)/(2.d0*dfloat(n1)+dfloat(n2)) c c Compute dx1dz1, dx2dz2 and dsdw c do i = 1,n1 dxdz1(i) = dx1(i)*dz1(i) dsdw(i) = ds(i)*dw(i) xi1(i) = dxdz1(i)/x1(i) - dsdw(i)/s(i) & - mu * (one/x1(i) - one/s(i)) ww1(i) = q1(i) * xi1(i) enddo c c Compute A1Q1^(-1)(X1^(-1)*dx1dz1 - S^(-1)*dsdw - mu(X1^(-1) - S^(-1))) and c store it in ww2 temporarily c call amux(m,ww1,ww2,ao1,jao1,iao1) do i = 1,n2 dxdz2(i) = dx2(i)*dz2(i) xi2(i) = (dxdz2(i) - mu)/x2(i) ww1(i) = q2(i) * xi2(i) enddo c c Compute A2Q2^(-1)(X2^(-1)*dx2dz2 - mu X2^(-1)) and store it in ww3 c temporarily c call amux(m,ww1,ww3,ao2,jao2,iao2) do i = 1,m rhs(i) = rhs(i) + ww2(i) + ww3(i) enddo c c c Compute (AQ^(-1)A')^(-1)rhs and return the result in dy c c Call blkslv: Numerical solution for the new rhs stored in rhs c do i = 1,m newrhs(i) = rhs(perm(i)) enddo timbeg = gtimer() call blkslv(nsuper,xsuper,xlindx,lindx,xlnz,lnz,newrhs) timend = gtimer() timewd(7) = timewd(7) + timend - timbeg do i = 1,m dy(i) = newrhs(invp(i)) enddo c c Compute dx1=Q1^(-1)(A1'dy-X1^(-1)*dx1dz1-S^(-1)*dsdw c -mu*(X1^(-1)-S^(-1))-r1), ds = -dx1, dz1, dz2 and dw c call amux(n1,dy,dx1,a1,ja1,ia1) call amux(n2,dy,dx2,a2,ja2,ia2) do i = 1,n1 dx1(i) = q1(i) * (dx1(i) - xi1(i) - z1(i) + w(i)) ds(i) = -dx1(i) dz1(i) = -z1(i) + (mu - z1(i)*dx1(i) & - dxdz1(i))/x1(i) dw(i) = -w(i) + (mu - w(i)*ds(i) - dsdw(i))/s(i) enddo do i = 1,n2 dx2(i) = q2(i) * (dx2(i) - xi2(i) - r2(i)) dz2(i) = -z2(i) + (mu - z2(i)*dx2(i) - & dxdz2(i))/x2(i) enddo c c Compute the maximum allowable step lengths c call boundc(x1,dx1,x2,dx2,s,ds,z1,dz1,z2,dz2,w,dw,n1,n2, & beta,deltap,deltad) endif c c Take the step c call daxpy(n1,deltap,dx1,1,x1,1) call daxpy(n2,deltap,dx2,1,x2,1) call daxpy(n1,deltap,ds,1,s,1) call daxpy(n1,deltad,dw,1,w,1) call daxpy(n1,deltad,dz1,1,z1,1) call daxpy(n2,deltad,dz2,1,z2,1) call daxpy(m,deltad,dy,1,y,1) gap = ddot(n1,z1,1,x1,1) + ddot(n2,z2,1,x2,1) + & ddot(n1,w,1,s,1) goto 20 30 continue 100 continue maxit = it return end