https://github.com/kul-forbes/ForBES
Revision dc65c2366e10a76b7166228adb65acb9a20d68a0 authored by Pantelis Sopasakis on 05 March 2018, 20:56:03 UTC, committed by GitHub on 05 March 2018, 20:56:03 UTC
crash of Forbes on Matlab 2017a
2 parent s 2d876d9 + 8253a09
Raw File
Tip revision: dc65c2366e10a76b7166228adb65acb9a20d68a0 authored by Pantelis Sopasakis on 05 March 2018, 20:56:03 UTC
Merge pull request #3 from Zilleplus/master
Tip revision: dc65c23
LineSearch_HagerZhang.m
function [alpha, cachet, cachet1, ops, lsopt, info] = LineSearch_HagerZhang(cache, dir, slope, t0, lsopt, varargin)
% Hager Zhang line search based on CG-DESCENT Version 6.7  (April 7, 2014)
%    Approximate Wolfe line search routine
%    info:
%        0 (Wolfe or approximate Wolfe conditions satisfied)
%        3 (slope always negative in line search)
%        4 (number of line search iterations exceed nline)
%        6 (excessive updating of eps)
%        7 (Wolfe conditions never satisfied)
%    ========================================================================= */

    % precompute stuff for the line search
    [cache, ops] = Cache_LineSearch(cache, dir);

    cachet1 = [];

    AWolfe = lsopt.AWolfe;
    alpha = t0;
    f0 = cache.FBE;

    if lsopt.PertRule
        epsilon = lsopt.eps*abs(f0);
    else
        epsilon = lsopt.eps;
    end

    lsopt.wolfe_hi  = lsopt.delta*slope;
    lsopt.wolfe_lo  = lsopt.sigma*slope;
    lsopt.awolfe_hi = (2*lsopt.delta - 1)*slope;
    lsopt.fpert     = f0 + epsilon;

    % evaluate function or gradient at alpha (starting guess)
    if ( lsopt.QuadOK )
        [cachet, ops1] = LineFBE(cache, alpha, 3);
        ops = Ops_Sum(ops, ops1);
        f = cachet.FBE; df = cachet.dFBE;
        fb = f;
        if ( ~AWolfe ), fb = fb - alpha*lsopt.wolfe_hi ;end
        qb = true ; % function value at b known
    else
        [cachet, ops1] = LineFBE(cache, alpha, 2);
        ops = Ops_Sum(ops, ops1);
        df = cachet.dFBE;
        qb = false ;
    end
    b = alpha ;

    if ( AWolfe )
        db = df ;
        d0 = slope;
        da = slope;
    else
        db = df - lsopt.wolfe_hi ;
        d0 = slope - lsopt.wolfe_hi ;
        da = d0;
    end
    a  = 0 ;
    a1 = 0 ;
    d1 = d0 ;
    fa = f0 ;

    %     if a quadratic interpolation step performed, check Wolfe conditions */
    if ( (lsopt.QuadOK) && (f <= f0) )
        if ( HagerZhangTestWolfe (alpha, f, df, f0,lsopt) ), info = 0; return ;end
    end

    %       Find initial interval [a,b] such that
    %       da <= 0, db >= 0, fa <= fpert = [(f0 + eps*abs(f0)) or (f0 + eps)] */
    rho = lsopt.rho ;
    ngrow = 1 ;
    while ( db < 0 )
        if ( ~qb )
            [cachet, ops1] = LineFBE(cache, alpha, 1, cachet);
            ops = Ops_Sum(ops, ops1);
            f = cachet.FBE;
            if ( AWolfe )
                fb = f ;
            else
                fb = f - b*lsopt.wolfe_hi ;
            end
            qb = true ;
        end
        if ( fb > lsopt.fpert ) % contract interval [a, b]
            [a,fa,da,b,fb,db,alpha,status,lsopt,cachet,cnt1] = HagerZhangUpdate (a, fa, da, b, fb, db,prob,gam,cache,lsopt,f0) ;
            cnt = cnt+cnt1;
            if ( status == 0 ), info = 0; return ;end  % /* Wolfe conditions hold */
            if ( status == -2 ), break,end ; %/* db >= 0 */
            if ( lsopt.neps > 0 ), info = 6;return,end
        end

        % expansion phase
        ngrow = ngrow +1 ;
        if ( ngrow > lsopt.nexpand ), info = 3; return,end
        % update interval (a replaced by b) */
        a = b ;
        fa = fb ;
        da = db ;
        % store old values of a and corresponding derivative
        d2 = d1 ;
        d1 = da ;
        a2 = a1 ;
        a1 = a ;

        bmin = rho*b ;
        if ( (ngrow == 2) || (ngrow == 3) || (ngrow == 6) )
            if ( d1 > d2 )
                if ( ngrow == 2 )
                    b = a1 - (a1-a2)*(d1/(d1-d2)) ;
                else
                    if ( (d1-d2)/(a1-a2) >= (d2-d0)/a2 )
                        % convex derivative, secant overestimates minimizer
                        b = a1 - (a1-a2)*(d1/(d1-d2)) ;
                    else
                        % concave derivative, secant underestimates minimizer
                        b = a1 - lsopt.SecantAmp*(a1-a2)*(d1/(d1-d2)) ;
                    end
                end
                % safeguard growth
                b = min (b, lsopt.ExpandSafe*a1) ;
            else
                rho = rho*lsopt.RhoGrow ;
            end
        else
            rho = rho*lsopt.RhoGrow ;
        end
        b = max (bmin, b) ;
        alpha = b ;
        [cachet, ops1] = LineFBE(cache, alpha, 2);
        ops = Ops_Sum(ops, ops1);
        df = cachet.dFBE;
        b = alpha ;
        qb = false ;
        if ( AWolfe )
            db = df ;
        else
            db = df - lsopt.wolfe_hi ;
        end

    end

    %     /* we now have fa <= fpert, da >= 0, db <= 0 */
    toggle = 0 ;
    width = b - a ;
    qb0 = false ;
    for iter = 0:lsopt.nsecant-1
        %         /* determine the next iterate */
        if ( (toggle == 0) || ((toggle == 2) && ((b-a) <= width)) )
            lsopt.QuadOK = true ;
            if ( lsopt.UseCubic && qb )
                alpha = HagerZhangCubInterp (a, fa, da, b, fb, db) ;
                if ( alpha < 0 ) %/* use secant method */
                    if ( -da < db )
                        alpha = a - (a-b)*(da/(da-db)) ;
                    elseif ( da ~= db )
                        alpha = b - (a-b)*(db/(da-db)) ;
                    else
                        alpha = -1. ;
                    end
                end
            else
                if  ( -da < db )
                    alpha = a - (a-b)*(da/(da-db)) ;
                elseif ( da ~= db )
                    alpha = b - (a-b)*(db/(da-db)) ;
                else
                    alpha = -1. ;
                end
            end
            width = lsopt.gamma*(b - a) ;

        elseif ( toggle == 1 ) %/* iteration based on smallest value*/
            lsopt.QuadOK = true ;
            if ( lsopt.UseCubic )
                if ( alpha == a ) %/* a is most recent iterate */
                    alpha = HagerZhangCubInterp (a0, fa0, da0, a, fa, da) ;
                elseif ( qb0 )        %/* b is most recent iterate */
                    alpha = HagerZhangCubInterp (b, fb, db, b0, fb0, db0) ;
                else
                    alpha = -1. ;
                end
                %                 /* if alpha no good, use cubic between a and b */
                if ( (alpha <= a) || (alpha >= b) )
                    if ( qb )
                        alpha = HagerZhangCubInterp (a, fa, da, b, fb, db) ;
                    else
                        alpha = -1. ;
                    end
                end

                %                 /* if alpha still no good, use secant method */
                if ( alpha < 0 )
                    if ( -da < db )
                        alpha = a - (a-b)*(da/(da-db)) ;
                    elseif ( da ~= db )
                        alpha = b - (a-b)*(db/(da-db)) ;
                    else
                        alpha = -1. ;
                    end
                end
            else %/* ( use secant ) */
                if ( (alpha == a) && (da > da0) ) %/* use a0 if possible */
                    alpha = a - (a-a0)*(da/(da-da0)) ;
                elseif ( db < db0 )                   %/* use b0 if possible */
                    alpha = b - (b-b0)*(db/(db-db0)) ;
                else %/* secant based on a and b */
                    if      ( -da < db )
                        alpha = a - (a-b)*(da/(da-db)) ;
                    elseif ( da ~= db )
                        alpha = b - (a-b)*(db/(da-db)) ;
                    else
                        alpha = -1. ;
                    end
                end
                if ( (alpha <= a) || (alpha >= b) )
                    if      ( -da < db )
                        alpha = a - (a-b)*(da/(da-db)) ;
                    elseif ( da ~= db )
                        alpha = b - (a-b)*(db/(da-db)) ;
                    else
                        alpha = -1. ;
                    end
                end
            end
        else
            alpha = .5*(a+b) ; %/* use bisection if b-a decays slowly */
            lsopt.QuadOK = false ;
        end
        if ( (alpha <= a) || (alpha >= b) )
            alpha = .5*(a+b) ;
            if ( (alpha == a) || (alpha == b) ),
                info = 7;
                return ;
            end
            lsopt.QuadOK = false ; %/* bisection was used */
        end

        if ( toggle == 0 ) %/* save values for next iteration */
            a0 = a ;
            b0 = b ;
            da0 = da ;
            db0 = db ;
            fa0 = fa ;
            if ( qb )
                fb0 = fb ;
                qb0 = true ;
            end
        end

        toggle = toggle + 1 ;
        if ( toggle > 2 ), toggle = 0 ;end
        [cachet, ops1] = LineFBE(cache, alpha, 3);
        ops = Ops_Sum(ops, ops1);
        f = cachet.FBE;df = cachet.dFBE;
        if ( lsopt.QuadOK )
            if ( HagerZhangTestWolfe (alpha, f, df, f0,lsopt) )
                info = 0;
                return
            end
        end

        if ( ~AWolfe )
            f = f - alpha*lsopt.wolfe_hi ;
            df = df - lsopt.wolfe_hi ;
        end

        if ( df >= 0 )
            b = alpha ;
            fb = f ;
            db = df ;
            qb = true ;
        elseif ( f <= lsopt.fpert )
            a = alpha ;
            da = df ;
            fa = f ;
        else
            B = b ;
            if ( qb ), fB = fb ;end
            dB = db ;
            b = alpha ;
            fb = f ;
            db = df ;
            %             /* contract interval [a, alpha] */
            [a,fa,da,b,fb,db,alpha,status,lsopt,cachet,ops1] = HagerZhangUpdate (a,fa,da,b,fb,db,cache,lsopt,f0) ;
            ops = Ops_Sum(ops, ops1);
            if ( status == 0 ), info = 0; return; end
            if ( status == -1 ) %/* eps reduced, use [a, b] = [alpha, b] */
                if ( lsopt.neps > 5 ), info = 6; return; end
                a = b ;
                fa = fb ;
                da = db ;
                b = B ;
                if ( qb ), fb = fB ;end
                db = dB ;
            else
                qb = true ;
            end
        end
    end
    info = 4;
end

% /* =========================================================================
%    ==== update ========================================================
%    =========================================================================
%    The input for this routine is an interval [a, b] with the property that
%    fa <= fpert, da >= 0, db >= 0, and fb >= fpert. The returned status is
%
%   11  function or derivative not defined
%    0  if the Wolfe conditions are satisfied
%   -1  if a new value for eps is generated with the property that for the
%       corresponding fpert, we have fb <= fpert
%   -2  if a subinterval, also denoted [a, b], is generated with the property
%       that fa <= fpert, da >= 0, and db <= 0
%
%    NOTE: The input arguments are unchanged when status = -1
%    ========================================================================= */
function [a,fa,da,b,fb,db,alpha,info,lsopt,cachet,ops] = HagerZhangUpdate(a,fa,da,b,fb,db,cache,lsopt,f0)
    ops = Ops_Init();
    AWolfe = lsopt.AWolfe ;
    f1 = fb ;
    toggle = 0 ;
    width = 0 ;
    for iter=0:lsopt.ncontract
        if ( (toggle == 0) || ((toggle == 2) && ((b-a) <= width)) )
            %             /* cubic based on bracketing interval */
            alpha = HagerZhangCubInterp (a, fa, da, b, fb, db) ;
            toggle = 0 ;
            width = lsopt.gamma*(b-a) ;
            if ( iter ),
                lsopt.QuadOK = true ;
            end %/* at least 2 cubic iterations */
        elseif ( toggle == 1 )
            lsopt.QuadOK = true ;
            %             /* cubic based on most recent iterate and smallest value */
            if ( old < a ) %/* a is most recent iterate */
                alpha = HagerZhangCubInterp (a, fa, da, old, fold, dold) ;
            else         %  /* b is most recent iterate */
                alpha = HagerZhangCubInterp (a, fa, da, b, fb, db) ;
            end
        else
            alpha = .5*(a+b) ; %/* use bisection if b-a decays slowly */
            lsopt.QuadOK = false ;
        end

        if ( (alpha <= a) || (alpha >= b) )
            alpha = .5*(a+b) ;
            lsopt.QuadOK = false ; %/* bisection was used */
        end
        toggle = toggle + 1 ;
        if ( toggle > 2 )
            toggle = 0 ;
        end
        [cachet, ops1] = LineFBE(cache, alpha, 3);
        ops = Ops_Sum(ops, ops1);
        f = cachet.FBE; df = cachet.dFBE;

        if ( lsopt.QuadOK )
            if ( HagerZhangTestWolfe (alpha, f, df, f0, lsopt) ),
                info = 0;
                return
            end
        end

        if ( ~AWolfe )
            f = f - alpha*lsopt.wolfe_hi ;
            df = df - lsopt.wolfe_hi ;
        end
        if ( df >= 0 )
            a = alpha ;
            fb = f ;
            db = df ;
            info = -2;
            return
        end
        if ( f <= lsopt.fpert ) %/* update a using alpha */
            old = a ;
            fold = fa ;
            dold = da ;
            a = alpha ;
            fa = f ;
            da = df ;
        else                     %/* update b using alpha */
            old = b ;
            fold = fb;
            dold = db;
            b = alpha ;
            fb = f ;
            db = df ;
        end

    end

    %% This might need debugging
    %   see if the cost is small enough to change the PertRule
    if ( abs (fb) <= lsopt.SmallCost ),
        lsopt.PertRule = false ;
    end

    %  increase eps if slope is negative after Parm->nshrink iterations
    if ( lsopt.PertRule )
        if ( f0 ~= 0)
            lsopt.eps = lsopt.egrow*(f1-f0)/abs (f0) ;
            lsopt.fpert = f0 + abs (f0)*lsopt.eps ;
        else
            lsopt.fpert = 2*f1 ;
        end
    else
        lsopt.eps = lsopt.egrow*(f1-f0) ;
        lsopt.fpert = f0 + lsopt.eps ;
    end
    lsopt.neps = lsopt.neps+1 ;
    info = -1 ;
end

function done = HagerZhangTestWolfe (alpha, f, df,f0,lsopt)
    done = false;
    if ( df >= lsopt.wolfe_lo )
        % c test original Wolfe conditions
        if ( f-f0 <= alpha*lsopt.wolfe_hi )
            done = true;
            % c test approximate Wolfe conditions
        elseif ( lsopt.AWolfe )
            done = ( (f <= lsopt.fpert) & (df <= lsopt.awolfe_hi));
        end
    end
end

function t = HagerZhangCubInterp(t1,f1,df1,t2,f2,df2)
    % t1, t2 might not be sorted
    delta = t2 - t1 ;
    if  delta == 0
        t = t1;
    else
        d1 = df1 + df2 - 3*(f2-f1)/delta;
        d2 = d1^2 - df1*df2 ;
        if  d2 < 0 % /* complex roots, use secant method */
            if ( abs(df1) < abs (df2) )
                t = t1 - (t1 - t2)*(df1/(df1-df2)) ;
            elseif ( df1 ~= df1 )
                t = t2 - (t1 - t2)*(df2/(df1-df2)) ;
            else
                t = - 1 ;
            end
        else
            % first way: from Hager-Zhang code
            d2 = sqrt(d2)*sign(delta);
            v1 = df1 + d1 - d2 ;
            v2 = df2 + d1 + d2 ;
            if ( (v1 == 0) && (v2 == 0) )
                t = -1;
            elseif ( abs (v1) >= abs (v2) )
                t = t1 + delta*df1/v1 ;
            else
                t = t2 - delta*df2/v2 ;
            end
            %
            % second way: from Bonnans, Lemarechal
            %         d2 = sqrt(d2)*sign(delta);
            %         v1 = df2 + d2 - d1;
            %         v2 = df2 - df1 + 2*d2;
            %         if ( (v1 == 0) && (v2 == 0) )
            %             t = -1;
            %         else
            %             t = t2 - delta*(v1/v2);
            %         end
        end
    end
end
back to top