https://github.com/JuliaLang/julia
Raw File
Tip revision: d43ca71d171a02e4816036b34764098d9b984ee4 authored by Jameson Nash on 12 May 2023, 16:29:37 UTC
macroexpand: stop pre-running the hygiene pass
Tip revision: d43ca71
log.jl
# This file is a part of Julia. License is MIT: https://julialang.org/license

# Implementation of
#  "Table-driven Implementation of the Logarithm Function in IEEE Floating-point Arithmetic"
#  Tang, Ping-Tak Peter
#  ACM Trans. Math. Softw. (1990), 16(4):378--400
#  https://doi.org/10.1145/98267.98294

# Does not currently handle floating point flags (inexact, div-by-zero, etc).

import .Base.unsafe_trunc
import .Base.Math.@horner

# Float64 lookup table.
# to generate values:
  # N=39 # (can be up to N=42).
  # sN = 2.0^N
  # isN = 1.0/sN
  # s7 = 2.0^7
  # is7 = 1.0/s7
  # for j=0:128
  #   l_big = Base.log(big(1.0+j*is7))
  #   l_hi = isN*Float64(round(sN*l_big))
  #   l_lo = Float64(l_big-l_hi)
  #   j % 2 == 0 && print("\n    ")
  #   print("(",l_hi,",",l_lo,"),")
  # end

const t_log_Float64 = ((0.0,0.0),(0.007782140442941454,-8.865052917267247e-13),
    (0.015504186536418274,-4.530198941364935e-13),(0.0231670592820592,-5.248209479295644e-13),
    (0.03077165866670839,4.529814257790929e-14),(0.0383188643027097,-5.730994833076631e-13),
    (0.04580953603181115,-5.16945692881222e-13),(0.053244514518155484,6.567993368985218e-13),
    (0.06062462181580486,6.299848199383311e-13),(0.06795066190898069,-4.729424109166329e-13),
    (0.07522342123775161,-1.6408301585598662e-13),(0.08244366921098845,8.614512936087814e-14),
    (0.08961215869021544,-5.283050530808144e-13),(0.09672962645890948,-3.5836667430094137e-13),
    (0.10379679368088546,7.581073923016376e-13),(0.11081436634049169,-2.0157368416016215e-13),
    (0.11778303565552051,8.629474042969438e-13),(0.1247034785010328,-7.556920687451337e-14),
    (0.1315763577895268,-8.075373495358435e-13),(0.13840232285838283,7.363043577087051e-13),
    (0.14518200984457508,-7.718001336828099e-14),(0.15191604202664166,-7.996871607743758e-13),
    (0.15860503017574956,8.890223439724663e-13),(0.16524957289584563,-5.384682618788232e-13),
    (0.17185025692742784,-7.686134224018169e-13),(0.17840765747314435,-3.2605717931058157e-13),
    (0.18492233849428885,-2.7685884310448306e-13),(0.1913948530000198,-3.903387893794952e-13),
    (0.1978257433293038,6.160755775588723e-13),(0.20421554142922105,-5.30156516006026e-13),
    (0.21056476910780475,-4.55112422774782e-13),(0.21687393830143264,-8.182853292737783e-13),
    (0.22314355131493357,-7.238189921749681e-13),(0.22937410106533207,-4.86240001538379e-13),
    (0.23556607131286,-9.30945949519689e-14),(0.24171993688651128,6.338907368997553e-13),
    (0.24783616390413954,4.4171755371315547e-13),(0.25391520998164196,-6.785208495970588e-13),
    (0.25995752443668607,2.3999540484211735e-13),(0.2659635484978935,-7.555569400283742e-13),
    (0.27193371548310097,5.407904186145515e-13),(0.2778684510030871,3.692037508208009e-13),
    (0.28376817313073843,-9.3834172236637e-14),(0.28963329258294834,9.43339818951269e-14),
    (0.29546421289342106,4.148131870425857e-13),(0.3012613305781997,-3.7923164802093147e-14),
    (0.3070250352957373,-8.25463138725004e-13),(0.31275571000333,5.668653582900739e-13),
    (0.318453731119007,-4.723727821986367e-13),(0.32411946865431673,-1.0475750058776541e-13),
    (0.32975328637257917,-1.1118671389559323e-13),(0.33535554192167183,-5.339989292003297e-13),
    (0.3409265869704541,1.3912841212197566e-13),(0.3464667673470103,-8.017372713972018e-13),
    (0.35197642315688427,2.9391859187648e-13),(0.3574558889213222,4.815896111723205e-13),
    (0.3629054936900502,-6.817539406325327e-13),(0.36832556115950865,-8.009990055432491e-13),
    (0.3737164097929053,6.787566823158706e-13),(0.37907835293481185,1.5761203773969435e-13),
    (0.3844116989112081,-8.760375990774874e-13),(0.38971675114044046,-4.152515806343612e-13),
    (0.3949938082405424,3.2655698896907146e-13),(0.40024316412745975,-4.4704265010452445e-13),
    (0.4054651081078191,3.452764795203977e-13),(0.4106599249844294,8.390050778518307e-13),
    (0.4158278951435932,1.1776978751369214e-13),(0.4209692946442374,-1.0774341461609579e-13),
    (0.42608439531068143,2.186334329321591e-13),(0.43117346481813,2.413263949133313e-13),
    (0.4362367667745275,3.90574622098307e-13),(0.44127456080423144,6.437879097373207e-13),
    (0.44628710262804816,3.713514191959202e-13),(0.45127464413963025,-1.7166921336082432e-13),
    (0.4562374334818742,-2.8658285157914353e-13),(0.4611757151214988,6.713692791384601e-13),
    (0.46608972992544295,-8.437281040871276e-13),(0.4709797152190731,-2.821014384618127e-13),
    (0.4758459048698569,1.0701931762114255e-13),(0.4806885293455707,1.8119346366441111e-13),
    (0.4855078157816024,9.840465278232627e-14),(0.49030398804461583,5.780031989454028e-13),
    (0.49507726679803454,-1.8302857356041668e-13),(0.4998278695566114,-1.620740015674495e-13),
    (0.5045560107519123,4.83033149495532e-13),(0.5092619017905236,-7.156055317238212e-13),
    (0.5139457511013461,8.882123951857185e-13),(0.5186077642083546,-3.0900580513238243e-13),
    (0.5232481437651586,-6.10765519728515e-13),(0.5278670896204858,3.565996966334783e-13),
    (0.532464798869114,3.5782396591276384e-13),(0.5370414658973459,-4.622608700154458e-13),
    (0.5415972824321216,6.227976291722515e-13),(0.5461324375974073,7.283894727206574e-13),
    (0.5506471179523942,2.680964661521167e-13),(0.5551415075406112,-1.0960825046059278e-13),
    (0.5596157879353996,2.3119493838005378e-14),(0.5640701382853877,-5.846905800529924e-13),
    (0.5685047353526897,-2.1037482511444942e-14),(0.5729197535620187,-2.332318294558741e-13),
    (0.5773153650352469,-4.2333694288141915e-13),(0.5816917396350618,-4.3933937969737843e-13),
    (0.5860490450031648,4.1341647073835564e-13),(0.590387446602108,6.841763641591467e-14),
    (0.5947071077462169,4.758553400443064e-13),(0.5990081896452466,8.367967867475769e-13),
    (0.6032908514389419,-8.576373464665864e-13),(0.6075552502243227,2.1913281229340092e-13),
    (0.6118015411066153,-6.224284253643115e-13),(0.6160298772156239,-1.098359432543843e-13),
    (0.6202404097512044,6.531043137763365e-13),(0.6244332880123693,-4.758019902171077e-13),
    (0.6286086594227527,-3.785425126545704e-13),(0.6327666695706284,4.0939233218678666e-13),
    (0.636907462236195,8.742438391485829e-13),(0.6410311794206791,2.521818845684288e-13),
    (0.6451379613736208,-3.6081313604225574e-14),(0.649227946625615,-5.05185559242809e-13),
    (0.6533012720119586,7.869940332335532e-13),(0.6573580727090302,-6.702087696194906e-13),
    (0.6613984822452039,1.6108575753932459e-13),(0.6654226325445052,5.852718843625151e-13),
    (0.6694306539429817,-3.5246757297904794e-13),(0.6734226752123504,-1.8372084495629058e-13),
    (0.6773988235909201,8.860668981349492e-13),(0.6813592248072382,6.64862680714687e-13),
    (0.6853040030982811,6.383161517064652e-13),(0.6892332812385575,2.5144230728376075e-13),
    (0.6931471805601177,-1.7239444525614835e-13))

# Float32 lookup table
# to generate values:
  # N=16
  # sN = 2f0^N
  # isN = 1f0/sN
  # s7 = 2.0^7
  # is7 = 1.0/s7
  # for j=0:128
  #   j % 4 == 0 && print("\n    ")
  #   print(float64(Base.log(big(1.0+j*is7))),",")
  # end

const t_log_Float32 = (0.0,0.007782140442054949,0.015504186535965254,0.02316705928153438,
    0.030771658666753687,0.0383188643021366,0.0458095360312942,0.053244514518812285,
    0.06062462181643484,0.06795066190850775,0.07522342123758753,0.08244366921107459,
    0.08961215868968714,0.09672962645855111,0.10379679368164356,0.11081436634029011,
    0.11778303565638346,0.12470347850095724,0.13157635778871926,0.13840232285911913,
    0.1451820098444979,0.15191604202584197,0.15860503017663857,0.16524957289530717,
    0.17185025692665923,0.1784076574728183,0.184922338494012,0.19139485299962947,
    0.19782574332991987,0.2042155414286909,0.21056476910734964,0.21687393830061436,
    0.22314355131420976,0.22937410106484582,0.2355660713127669,0.24171993688714516,
    0.24783616390458127,0.25391520998096345,0.25995752443692605,0.26596354849713794,
    0.27193371548364176,0.2778684510034563,0.2837681731306446,0.28963329258304266,
    0.2954642128938359,0.3012613305781618,0.3070250352949119,0.3127557100038969,
    0.3184537311185346,0.324119468654212,0.329753286372468,0.3353555419211378,
    0.3409265869705932,0.34646676734620857,0.3519764231571782,0.3574558889218038,
    0.3629054936893685,0.3683255611587076,0.37371640979358406,0.37907835293496944,
    0.38441169891033206,0.3897167511400252,0.394993808240869,0.4002431641270127,
    0.4054651081081644,0.4106599249852684,0.415827895143711,0.42096929464412963,
    0.4260843953109001,0.4311734648183713,0.43623676677491807,0.4412745608048752,
    0.44628710262841953,0.45127464413945856,0.4562374334815876,0.46117571512217015,
    0.46608972992459924,0.470979715218791,0.4758459048699639,0.4806885293457519,
    0.4855078157817008,0.4903039880451938,0.4950772667978515,0.4998278695564493,
    0.5045560107523953,0.5092619017898079,0.5139457511022343,0.5186077642080457,
    0.5232481437645479,0.5278670896208424,0.5324647988694718,0.5370414658968836,
    0.5415972824327444,0.5461324375981357,0.5506471179526623,0.5551415075405016,
    0.5596157879354227,0.564070138284803,0.5685047353526688,0.5729197535617855,
    0.5773153650348236,0.5816917396346225,0.5860490450035782,0.5903874466021763,
    0.5947071077466928,0.5990081896460834,0.6032908514380843,0.6075552502245418,
    0.6118015411059929,0.616029877215514,0.6202404097518576,0.6244332880118935,
    0.6286086594223741,0.6327666695710378,0.6369074622370692,0.6410311794209312,
    0.6451379613735847,0.6492279466251099,0.6533012720127457,0.65735807270836,
    0.661398482245365,0.6654226325450905,0.6694306539426292,0.6734226752121667,
    0.6773988235918061,0.6813592248079031,0.6853040030989194,0.689233281238809,
    0.6931471805599453)

# truncate lower order bits (up to 26)
# ideally, this should be able to use ANDPD instructions, see #9868.
@inline function truncbits(x::Float64)
    reinterpret(Float64, reinterpret(UInt64,x) & 0xffff_ffff_f800_0000)
end

logb(::Type{Float32},::Val{2})  = 1.4426950408889634
logb(::Type{Float32},::Val{:ℯ}) = 1.0
logb(::Type{Float32},::Val{10}) = 0.4342944819032518
logbU(::Type{Float64},::Val{2})  = 1.4426950408889634
logbL(::Type{Float64},::Val{2})  = 2.0355273740931033e-17
logbU(::Type{Float64},::Val{:ℯ}) = 1.0
logbL(::Type{Float64},::Val{:ℯ}) = 0.0
logbU(::Type{Float64},::Val{10}) = 0.4342944819032518
logbL(::Type{Float64},::Val{10}) = 1.098319650216765e-17

# Procedure 1
@inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,base=Val(:ℯ))
    jp = unsafe_trunc(Int,128.0*F)-127

    ## Steps 1 and 2
    Base.@assume_effects :nothrow :noub @inbounds hi,lo = t_log_Float64[jp]
    l_hi = mf* 0.6931471805601177 + hi
    l_lo = mf*-1.7239444525614835e-13 + lo

    ## Step 3
    # @inbounds u = f*c_invF[jp]
    # u = f/F
    # q = u*u*@horner(u,
    #                 -0x1.0_0000_0000_0001p-1,
    #                 +0x1.5_5555_5550_9ba5p-2,
    #                 -0x1.f_ffff_ffeb_6526p-3,
    #                 +0x1.9_99b4_dfed_6fe4p-3,
    #                 -0x1.5_5576_6647_2e04p-3)

    ## Step 3' (alternative)
    u = (2.0f)/(y+F)
    v = u*u
    q = u*v*@horner(v,
                    0.08333333333303913,
                    0.012500053168098584)

    ## Step 4
    m_hi = logbU(Float64, base)
    m_lo = logbL(Float64, base)
    return fma(m_hi, l_hi, fma(m_hi, (u + (q + l_lo)), m_lo*l_hi))
end

# Procedure 2
@inline function log_proc2(f::Float64,base=Val(:ℯ))
    ## Step 1
    g = 1.0/(2.0+f)
    u = 2.0*f*g
    v = u*u

    ## Step 2
    q = u*v*@horner(v,
                    0.08333333333333179,
                    0.012500000003771751,
                    0.0022321399879194482,
                    0.0004348877777076146)

    ## Step 3
    # based on:
    #   2(f-u) = 2(f(2+f)-2f)/(2+f) = 2f^2/(2+f) = fu
    #   2(f-u1-u2) - f*(u1+u2) = 0
    #   2(f-u1) - f*u1 = (2+f)u2
    #   u2 = (2(f-u1) - f*u1)/(2+f)

    m_hi = logbU(Float64, base)
    m_lo = logbL(Float64, base)
    return fma(m_hi, u, fma(m_lo, u, m_hi*fma(fma(-u,f,2(f-u)), g, q)))
end

# Procedure 1
@inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,base=Val(:ℯ))
    jp = unsafe_trunc(Int,128.0f0*F)-127

    ## Steps 1 and 2
    Base.@assume_effects :nothrow :noub @inbounds hi = t_log_Float32[jp]
    l = mf*0.6931471805599453 + hi

    ## Step 3
    # @inbounds u = f*c_invF[jp]
    # q = u*u*@horner(u,
    #                 Float32(-0x1.00006p-1),
    #                 Float32(0x1.55546cp-2))

    ## Step 3' (alternative)
    u = (2f0f)/(y+F)
    v = u*u
    q = u*v*0.08333351f0

    ## Step 4
    Float32(logb(Float32, base)*(l + (u + q)))
end

# Procedure 2
@inline function log_proc2(f::Float32,base=Val(:ℯ))
    ## Step 1
    # compute in higher precision
    u64 = Float64(2f0*f)/(2.0+f)
    u = Float32(u64)
    v = u*u

    ## Step 2
    q = u*v*@horner(v,
                    0.08333332f0,
                    0.012512346f0)

    ## Step 3: not required

    ## Step 4
    Float32(logb(Float32, base)*(u64 + q))
end

@noinline log2(x::Float32)  = _log(x, Val(2),  :log2)
@noinline log(x::Float32)   = _log(x, Val(:ℯ), :log)
@noinline log10(x::Float32) = _log(x, Val(10), :log10)
@noinline log2(x::Float64)  = _log(x, Val(2),  :log2)
@noinline log(x::Float64)   = _log(x, Val(:ℯ), :log)
@noinline log10(x::Float64) = _log(x, Val(10), :log10)

@inline function _log(x::Float64, base, func::Symbol)
    if x > 0.0
        x == Inf && return x

        # Step 2
        if 0.9394130628134757 < x < 1.0644944589178595
            f = x-1.0
            return log_proc2(f, base)
        end

        # Step 3
        xu = reinterpret(UInt64,x)
        m = Int(xu >> 52) & 0x07ff
        if m == 0 # x is subnormal
            x *= 1.8014398509481984e16 # 0x1p54, normalise significand
            xu = reinterpret(UInt64,x)
            m = Int(xu >> 52) & 0x07ff - 54
        end
        m -= 1023
        y = reinterpret(Float64,(xu & 0x000f_ffff_ffff_ffff) | 0x3ff0_0000_0000_0000)

        mf = Float64(m)
        F = (y + 3.5184372088832e13) - 3.5184372088832e13 # 0x1p-7*round(0x1p7*y)
        f = y-F

        return log_proc1(y,mf,F,f,base)
    elseif x == 0.0
        return -Inf
    elseif isnan(x)
        return NaN
    else
        throw_complex_domainerror(func, x)
    end
end

@inline function _log(x::Float32, base, func::Symbol)
    if x > 0f0
        x == Inf32 && return x

        # Step 2
        if 0.939413f0 < x < 1.0644945f0
            f = x-1f0
            return log_proc2(f, base)
        end

        # Step 3
        xu = reinterpret(UInt32,x)
        m = Int(xu >> 23) & 0x00ff
        if m == 0 # x is subnormal
            x *= 3.3554432f7 # 0x1p25, normalise significand
            xu = reinterpret(UInt32,x)
            m = Int(xu >> 23) & 0x00ff - 25
        end
        m -= 127
        y = reinterpret(Float32,(xu & 0x007f_ffff) | 0x3f80_0000)

        mf = Float32(m)
        F = (y + 65536.0f0) - 65536.0f0 # 0x1p-7*round(0x1p7*y)
        f = y-F

        return log_proc1(y,mf,F,f,base)
    elseif x == 0f0
        return -Inf32
    elseif isnan(x)
        return NaN32
    else
        throw_complex_domainerror(func, x)
    end
end


function log1p(x::Float64)
    if x > -1.0
        x == Inf && return x
        if -1.1102230246251565e-16 < x < 1.1102230246251565e-16
            return x # Inexact

        # Step 2
        elseif -0.06058693718652422 < x < 0.06449445891785943
            return log_proc2(x)
        end

        # Step 3
        z = 1.0 + x
        zu = reinterpret(UInt64,z)
        s = reinterpret(Float64,0x7fe0_0000_0000_0000 - (zu & 0xfff0_0000_0000_0000)) # 2^-m
        m = Int(zu >> 52) & 0x07ff - 1023 # z cannot be subnormal
        c = m > 0 ? 1.0-(z-x) : x-(z-1.0) # 1+x = z+c exactly
        y = reinterpret(Float64,(zu & 0x000f_ffff_ffff_ffff) | 0x3ff0_0000_0000_0000)

        mf = Float64(m)
        F = (y + 3.5184372088832e13) - 3.5184372088832e13 # 0x1p-7*round(0x1p7*y)
        f = (y - F) + c*s #2^m(F+f) = 1+x = z+c

        log_proc1(y,mf,F,f)
    elseif x == -1.0
        -Inf
    elseif isnan(x)
        NaN
    else
        throw_complex_domainerror_neg1(:log1p, x)
    end
end

function log1p(x::Float32)
    if x > -1f0
        x == Inf32 && return x
        if -5.9604645f-8 < x < 5.9604645f-8
            return x # Inexact
        # Step 2
        elseif -0.06058694f0 < x < 0.06449446f0
            return log_proc2(x)
        end

        # Step 3
        z = 1f0 + x
        zu = reinterpret(UInt32,z)
        s = reinterpret(Float32,0x7f000000 - (zu & 0xff80_0000)) # 2^-m
        m = Int(zu >> 23) & 0x00ff - 127 # z cannot be subnormal
        c = m > 0 ? 1f0-(z-x) : x-(z-1f0) # 1+x = z+c
        y = reinterpret(Float32,(zu & 0x007f_ffff) | 0x3f80_0000)

        mf = Float32(m)
        F = (y + 65536.0f0) - 65536.0f0 # 0x1p-7*round(0x1p7*y)
        f = (y - F) + s*c #2^m(F+f) = 1+x = z+c

        log_proc1(y,mf,F,f)
    elseif x == -1f0
        -Inf32
    elseif isnan(x)
        NaN32
    else
        throw_complex_domainerror_neg1(:log1p, x)
    end
end

#function make_compact_table(N)
#    table = Tuple{UInt64,Float64}[]
#    lo, hi = 0x1.69555p-1, 0x1.69555p0
#    for i in 0:N-1
#        # I am not fully sure why this is the right formula to use, but it apparently is
#        center = i/(2*N) + lo < 1 ? (i+.5)/(2*N) + lo : (i+.5)/N + hi -1
#        invc = Float64(center < 1 ? round(N/center)/N : round(2*N/center)/(N*2))
#        c = inv(big(invc))
#        logc = Float64(round(0x1p43*log(c))/0x1p43)
#        logctail = reinterpret(Float64, Float64(log(c) - logc))
#        p1 = (reinterpret(UInt64,invc) >> 45) % UInt8
#        push!(table, (p1|reinterpret(UInt64,logc),logctail))
#    end
#    return Tuple(table)
#end
#const t_log_table_compact = make_compact_table(128)
const t_log_table_compact = (
    (0xbfd62c82f2b9c8b5, 5.929407345889625e-15),
    (0xbfd5d1bdbf5808b4, -2.544157440035963e-14),
    (0xbfd57677174558b3, -3.443525940775045e-14),
    (0xbfd51aad872df8b2, -2.500123826022799e-15),
    (0xbfd4be5f957778b1, -8.929337133850617e-15),
    (0xbfd4618bc21c60b0, 1.7625431312172662e-14),
    (0xbfd404308686a8af, 1.5688303180062087e-15),
    (0xbfd3a64c556948ae, 2.9655274673691784e-14),
    (0xbfd347dd9a9880ad, 3.7923164802093147e-14),
    (0xbfd2e8e2bae120ac, 3.993416384387844e-14),
    (0xbfd2895a13de88ab, 1.9352855826489123e-14),
    (0xbfd2895a13de88ab, 1.9352855826489123e-14),
    (0xbfd22941fbcf78aa, -1.9852665484979036e-14),
    (0xbfd1c898c16998a9, -2.814323765595281e-14),
    (0xbfd1675cababa8a8, 2.7643769993528702e-14),
    (0xbfd1058bf9ae48a7, -4.025092402293806e-14),
    (0xbfd0a324e27390a6, -1.2621729398885316e-14),
    (0xbfd0402594b4d0a5, -3.600176732637335e-15),
    (0xbfd0402594b4d0a5, -3.600176732637335e-15),
    (0xbfcfb9186d5e40a4, 1.3029797173308663e-14),
    (0xbfcef0adcbdc60a3, 4.8230289429940886e-14),
    (0xbfce27076e2af0a2, -2.0592242769647135e-14),
    (0xbfcd5c216b4fc0a1, 3.149265065191484e-14),
    (0xbfcc8ff7c79aa0a0, 4.169796584527195e-14),
    (0xbfcc8ff7c79aa0a0, 4.169796584527195e-14),
    (0xbfcbc286742d909f, 2.2477465222466186e-14),
    (0xbfcaf3c94e80c09e, 3.6507188831790577e-16),
    (0xbfca23bc1fe2b09d, -3.827767260205414e-14),
    (0xbfca23bc1fe2b09d, -3.827767260205414e-14),
    (0xbfc9525a9cf4509c, -4.7641388950792196e-14),
    (0xbfc87fa06520d09b, 4.9278276214647115e-14),
    (0xbfc7ab890210e09a, 4.9485167661250996e-14),
    (0xbfc7ab890210e09a, 4.9485167661250996e-14),
    (0xbfc6d60fe719d099, -1.5003333854266542e-14),
    (0xbfc5ff3070a79098, -2.7194441649495324e-14),
    (0xbfc5ff3070a79098, -2.7194441649495324e-14),
    (0xbfc526e5e3a1b097, -2.99659267292569e-14),
    (0xbfc44d2b6ccb8096, 2.0472357800461955e-14),
    (0xbfc44d2b6ccb8096, 2.0472357800461955e-14),
    (0xbfc371fc201e9095, 3.879296723063646e-15),
    (0xbfc29552f81ff094, -3.6506824353335045e-14),
    (0xbfc1b72ad52f6093, -5.4183331379008994e-14),
    (0xbfc1b72ad52f6093, -5.4183331379008994e-14),
    (0xbfc0d77e7cd09092, 1.1729485484531301e-14),
    (0xbfc0d77e7cd09092, 1.1729485484531301e-14),
    (0xbfbfec9131dbe091, -3.811763084710266e-14),
    (0xbfbe27076e2b0090, 4.654729747598445e-14),
    (0xbfbe27076e2b0090, 4.654729747598445e-14),
    (0xbfbc5e548f5bc08f, -2.5799991283069902e-14),
    (0xbfba926d3a4ae08e, 3.7700471749674615e-14),
    (0xbfba926d3a4ae08e, 3.7700471749674615e-14),
    (0xbfb8c345d631a08d, 1.7306161136093256e-14),
    (0xbfb8c345d631a08d, 1.7306161136093256e-14),
    (0xbfb6f0d28ae5608c, -4.012913552726574e-14),
    (0xbfb51b073f06208b, 2.7541708360737882e-14),
    (0xbfb51b073f06208b, 2.7541708360737882e-14),
    (0xbfb341d7961be08a, 5.0396178134370583e-14),
    (0xbfb341d7961be08a, 5.0396178134370583e-14),
    (0xbfb16536eea38089, 1.8195060030168815e-14),
    (0xbfaf0a30c0118088, 5.213620639136504e-14),
    (0xbfaf0a30c0118088, 5.213620639136504e-14),
    (0xbfab42dd71198087, 2.532168943117445e-14),
    (0xbfab42dd71198087, 2.532168943117445e-14),
    (0xbfa77458f632c086, -5.148849572685811e-14),
    (0xbfa77458f632c086, -5.148849572685811e-14),
    (0xbfa39e87b9fec085, 4.6652946995830086e-15),
    (0xbfa39e87b9fec085, 4.6652946995830086e-15),
    (0xbf9f829b0e780084, -4.529814257790929e-14),
    (0xbf9f829b0e780084, -4.529814257790929e-14),
    (0xbf97b91b07d58083, -4.361324067851568e-14),
    (0xbf8fc0a8b0fc0082, -1.7274567499706107e-15),
    (0xbf8fc0a8b0fc0082, -1.7274567499706107e-15),
    (0xbf7fe02a6b100081, -2.298941004620351e-14),
    (0xbf7fe02a6b100081, -2.298941004620351e-14),
    (0x0000000000000080, 0.0),
    (0x0000000000000080, 0.0),
    (0x3f8010157589007e, -1.4902732911301337e-14),
    (0x3f9020565893807c, -3.527980389655325e-14),
    (0x3f98492528c9007a, -4.730054772033249e-14),
    (0x3fa0415d89e74078, 7.580310369375161e-15),
    (0x3fa466aed42e0076, -4.9893776716773285e-14),
    (0x3fa894aa149fc074, -2.262629393030674e-14),
    (0x3faccb73cdddc072, -2.345674491018699e-14),
    (0x3faeea31c006c071, -1.3352588834854848e-14),
    (0x3fb1973bd146606f, -3.765296820388875e-14),
    (0x3fb3bdf5a7d1e06d, 5.1128335719851986e-14),
    (0x3fb5e95a4d97a06b, -5.046674438470119e-14),
    (0x3fb700d30aeac06a, 3.1218748807418837e-15),
    (0x3fb9335e5d594068, 3.3871241029241416e-14),
    (0x3fbb6ac88dad6066, -1.7376727386423858e-14),
    (0x3fbc885801bc4065, 3.957125899799804e-14),
    (0x3fbec739830a2063, -5.2849453521890294e-14),
    (0x3fbfe89139dbe062, -3.767012502308738e-14),
    (0x3fc1178e8227e060, 3.1859736349078334e-14),
    (0x3fc1aa2b7e23f05f, 5.0900642926060466e-14),
    (0x3fc2d1610c86805d, 8.710783796122478e-15),
    (0x3fc365fcb015905c, 6.157896229122976e-16),
    (0x3fc4913d8333b05a, 3.821577743916796e-14),
    (0x3fc527e5e4a1b059, 3.9440046718453496e-14),
    (0x3fc6574ebe8c1057, 2.2924522154618074e-14),
    (0x3fc6f0128b757056, -3.742530094732263e-14),
    (0x3fc7898d85445055, -2.5223102140407338e-14),
    (0x3fc8beafeb390053, -1.0320443688698849e-14),
    (0x3fc95a5adcf70052, 1.0634128304268335e-14),
    (0x3fca93ed3c8ae050, -4.3425422595242564e-14),
    (0x3fcb31d8575bd04f, -1.2527395755711364e-14),
    (0x3fcbd087383be04e, -5.204008743405884e-14),
    (0x3fcc6ffbc6f0104d, -3.979844515951702e-15),
    (0x3fcdb13db0d4904b, -4.7955860343296286e-14),
    (0x3fce530effe7104a, 5.015686013791602e-16),
    (0x3fcef5ade4dd0049, -7.252318953240293e-16),
    (0x3fcf991c6cb3b048, 2.4688324156011588e-14),
    (0x3fd07138604d5846, 5.465121253624792e-15),
    (0x3fd0c42d67616045, 4.102651071698446e-14),
    (0x3fd1178e8227e844, -4.996736502345936e-14),
    (0x3fd16b5ccbacf843, 4.903580708156347e-14),
    (0x3fd1bf99635a6842, 5.089628039500759e-14),
    (0x3fd214456d0eb841, 1.1782016386565151e-14),
    (0x3fd2bef07cdc903f, 4.727452940514406e-14),
    (0x3fd314f1e1d3603e, -4.4204083338755686e-14),
    (0x3fd36b6776be103d, 1.548345993498083e-14),
    (0x3fd3c2527733303c, 2.1522127491642888e-14),
    (0x3fd419b423d5e83b, 1.1054030169005386e-14),
    (0x3fd4718dc271c83a, -5.534326352070679e-14),
    (0x3fd4c9e09e173039, -5.351646604259541e-14),
    (0x3fd522ae0738a038, 5.4612144489920215e-14),
    (0x3fd57bf753c8d037, 2.8136969901227338e-14),
    (0x3fd5d5bddf596036, -1.156568624616423e-14))

 @inline function log_tab_unpack(t::UInt64)
    invc = UInt64(t&UInt64(0xff)|0x1ff00)<<45
    logc = t&(~UInt64(0xff))
    return (reinterpret(Float64, invc), reinterpret(Float64, logc))
end

# Log implementation that returns 2 numbers which sum to give true value with about 68 bits of precision
# Since `log` only makes sense for positive exponents, we speed up the implementation by stealing the sign bit
# of the input for an extra bit of the exponent which is used to normalize subnormal inputs.
# Does not normalize results.
# Adapted and modified from https://github.com/ARM-software/optimized-routines/blob/master/math/pow.c
# Copyright (c) 2018-2020, Arm Limited. (which is also MIT licensed)
# note that this isn't an exact translation as this version compacts the table to reduce cache pressure.
function _log_ext(xu::UInt64)
    # x = 2^k z; where z is in range [0x1.69555p-1,0x1.69555p-0) and exact.
    # The range is split into N subintervals.
    # The ith subinterval contains z and c is near the center of the interval.
    tmp = reinterpret(Int64, xu - 0x3fe6955500000000) #0x1.69555p-1
    z = reinterpret(Float64, xu - (tmp & 0xfff0000000000000))
    k = Float64(tmp >> 52)
    # log(x) = k*Ln2 + log(c) + log1p(z/c-1).
    # N.B. :nothrow and :noub since `idx` is known to be `1 ≤ idx ≤ length(t_log_table_compact)`
    idx = (tmp >> 45) & (length(t_log_table_compact)-1) + 1
    t, logctail = Base.@assume_effects :nothrow :noub @inbounds t_log_table_compact[idx]
    invc, logc = log_tab_unpack(t)
    # Note: invc is j/N or j/N/2 where j is an integer in [N,2N) and
    # |z/c - 1| < 1/N, so r = z/c - 1 is exactly representable.
    r = fma(z, invc, -1.0)
    # k*Ln2 + log(c) + r.
    t1 = muladd(k, 0.6931471805598903, logc) #ln(2) hi part
    t2 = t1 + r
    lo1 = muladd(k, 5.497923018708371e-14, logctail) #ln(2) lo part
    lo2 = t1 - t2 + r
    ar = -0.5 * r
    ar2, lo3 = two_mul(r, ar)
    # k*Ln2 + log(c) + r + .5*r*r.
    hi = t2 + ar2
    lo4 = t2 - hi + ar2
    p = evalpoly(r, (-0x1.555555555556p-1, 0x1.0000000000006p-1, -0x1.999999959554ep-2, 0x1.555555529a47ap-2, -0x1.2495b9b4845e9p-2, 0x1.0002b8b263fc3p-2))
    lo = lo1 + lo2 + lo3 + muladd(r*ar2, p, lo4)
    return hi, lo
end
back to top