https://github.com/JuliaLang/julia
Raw File
Tip revision: e781c1dac674721ec59bd5e7af0c06ba891bc5b3 authored by Valentin Churavy on 22 November 2020, 01:26:13 UTC
fixup! Use Threads.Condition instead of Event
Tip revision: e781c1d
float.jl
# This file is a part of Julia, but is derived from
# https://github.com/google/double-conversion which has the following license
#
# Copyright 2006-2014, the V8 project authors. All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
#       copyright notice, this list of conditions and the following
#       disclaimer in the documentation and/or other materials provided
#       with the distribution.
#     * Neither the name of Google Inc. nor the names of its
#       contributors may be used to endorse or promote products derived
#       from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import Base: -, *

struct Float
    s::UInt64
    e::Int32
    de::Int32
end

Float() = Float(0,0,0)
Float(x,y) = Float(x,y,Int32(0))
Float(d::AbstractFloat) = Float(_significand(d), _exponent(d))

# Consts
const Float10MSBits = 0xFFC0000000000000 # used normalize(Float)
const FloatSignMask = 0x8000000000000000 # used in normalize(Float)
const FloatSignificandSize = Int32(64)

function normalize(v::Float)
    f = v.s
    e::Int32 = v.e
    while (f & Float10MSBits) == 0
        f <<= 10
        e -= 10
    end
    while (f & FloatSignMask) == 0
        f <<= 1
        e -= 1
    end
    return Float(f,e)
end
function normalize(v::Float64)
    s = _significand(v); e = _exponent(v)
    while (s & HiddenBit(Float64)) == 0
        s <<= UInt64(1)
        e -= Int32(1)
    end
    s <<= UInt64(FloatSignificandSize - SignificandSize(Float64))
    e -=  Int32( FloatSignificandSize - SignificandSize(Float64))
    return Float(s, e)
end

# Float128
#DenormalExponent(::Type{Float128}) = Int32(-ExponentBias(Float128) + 1)
#ExponentMask(::Type{Float128}) = 0x7fff0000000000000000000000000000
#PhysicalSignificandSize(::Type{Float128}) = Int32(112)
#SignificandSize(::Type{Float128}) = Int32(113)
#ExponentBias(::Type{Float128}) = Int32(0x00003fff + PhysicalSignificandSize(Float128))
#SignificandMask(::Type{Float128}) = 0x0000ffffffffffffffffffffffffffff
#HiddenBit(::Type{Float128}) = 0x00010000000000000000000000000000
#uint_t(d::Float128) = reinterpret(UInt128,d)
# Float64
DenormalExponent(::Type{Float64}) = Int32(-ExponentBias(Float64) + 1)
ExponentMask(::Type{Float64}) = 0x7FF0000000000000
PhysicalSignificandSize(::Type{Float64}) = Int32(52)
SignificandSize(::Type{Float64}) = Int32(53)
ExponentBias(::Type{Float64}) = Int32(0x3FF + PhysicalSignificandSize(Float64))
SignificandMask(::Type{Float64}) = 0x000FFFFFFFFFFFFF
HiddenBit(::Type{Float64}) = 0x0010000000000000
uint_t(d::Float64) = reinterpret(UInt64,d)
# Float32
DenormalExponent(::Type{Float32}) = Int32(-ExponentBias(Float32) + 1)
ExponentMask(::Type{Float32}) = 0x7F800000
PhysicalSignificandSize(::Type{Float32}) = Int32(23)
SignificandSize(::Type{Float32}) = Int32(24)
ExponentBias(::Type{Float32}) = Int32(0x7F + PhysicalSignificandSize(Float32))
SignificandMask(::Type{Float32}) = 0x007FFFFF
HiddenBit(::Type{Float32}) = 0x00800000
uint_t(d::Float32) = reinterpret(UInt32,d)
# Float16
DenormalExponent(::Type{Float16}) = Int32(-ExponentBias(Float16) + 1)
ExponentMask(::Type{Float16}) = 0x7c00
PhysicalSignificandSize(::Type{Float16}) = Int32(10)
SignificandSize(::Type{Float16}) = Int32(11)
ExponentBias(::Type{Float16}) = Int32(0x000f + PhysicalSignificandSize(Float16))
SignificandMask(::Type{Float16}) = 0x03ff
HiddenBit(::Type{Float16}) = 0x0400
uint_t(d::Float16) = reinterpret(UInt16,d)

function _exponent(d::T) where T<:AbstractFloat
  isdenormal(d) && return DenormalExponent(T)
  biased_e::Int32 = Int32((uint_t(d) & ExponentMask(T)) >> PhysicalSignificandSize(T))
  return Int32(biased_e - ExponentBias(T))
end
function _significand(d::T) where T<:AbstractFloat
  s = uint_t(d) & SignificandMask(T)
  return !isdenormal(d) ? s + HiddenBit(T) : s
end
isdenormal(d::T) where {T<:AbstractFloat} = (uint_t(d) & ExponentMask(T)) == 0

function normalizedbound(f::AbstractFloat)
    v = Float(_significand(f),_exponent(f))
    m_plus = normalize(Float((v.s << 1) + 1, v.e - 1))
    if lowerboundaryiscloser(f)
        m_minus = Float((v.s << 2) - 1, v.e - 2)
    else
        m_minus = Float((v.s << 1) - 1, v.e - 1)
    end
    return Float(m_minus.s << (m_minus.e - m_plus.e), m_plus.e), m_plus
end
function lowerboundaryiscloser(f::T) where T<:AbstractFloat
    physical_significand_is_zero = (uint_t(f) & SignificandMask(T)) == 0
    return physical_significand_is_zero && (_exponent(f) != DenormalExponent(T))
end

(-)(a::Float,b::Float) = Float(a.s - b.s,a.e,a.de)

const FloatM32 = 0xFFFFFFFF

function (*)(this::Float,other::Float)
    a::UInt64 = this.s >> 32
    b::UInt64 = this.s & FloatM32
    c::UInt64 = other.s >> 32
    d::UInt64 = other.s & FloatM32
    ac::UInt64 = a * c
    bc::UInt64 = b * c
    ad::UInt64 = a * d
    bd::UInt64 = b * d
    tmp::UInt64 = (bd >> 32) + (ad & FloatM32) + (bc & FloatM32)
    # By adding 1U << 31 to tmp we round the final result.
    # Halfway cases will be round up.
    tmp += UInt64(1) << 31
    result_f::UInt64 = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32)
    return Float(result_f,this.e + other.e + 64,this.de)
end

const CachedPowers = Float[
  Float(0xfa8fd5a0081c0288, -1220, -348),
  Float(0xbaaee17fa23ebf76, -1193, -340),
  Float(0x8b16fb203055ac76, -1166, -332),
  Float(0xcf42894a5dce35ea, -1140, -324),
  Float(0x9a6bb0aa55653b2d, -1113, -316),
  Float(0xe61acf033d1a45df, -1087, -308),
  Float(0xab70fe17c79ac6ca, -1060, -300),
  Float(0xff77b1fcbebcdc4f, -1034, -292),
  Float(0xbe5691ef416bd60c, -1007, -284),
  Float(0x8dd01fad907ffc3c, -980, -276),
  Float(0xd3515c2831559a83, -954, -268),
  Float(0x9d71ac8fada6c9b5, -927, -260),
  Float(0xea9c227723ee8bcb, -901, -252),
  Float(0xaecc49914078536d, -874, -244),
  Float(0x823c12795db6ce57, -847, -236),
  Float(0xc21094364dfb5637, -821, -228),
  Float(0x9096ea6f3848984f, -794, -220),
  Float(0xd77485cb25823ac7, -768, -212),
  Float(0xa086cfcd97bf97f4, -741, -204),
  Float(0xef340a98172aace5, -715, -196),
  Float(0xb23867fb2a35b28e, -688, -188),
  Float(0x84c8d4dfd2c63f3b, -661, -180),
  Float(0xc5dd44271ad3cdba, -635, -172),
  Float(0x936b9fcebb25c996, -608, -164),
  Float(0xdbac6c247d62a584, -582, -156),
  Float(0xa3ab66580d5fdaf6, -555, -148),
  Float(0xf3e2f893dec3f126, -529, -140),
  Float(0xb5b5ada8aaff80b8, -502, -132),
  Float(0x87625f056c7c4a8b, -475, -124),
  Float(0xc9bcff6034c13053, -449, -116),
  Float(0x964e858c91ba2655, -422, -108),
  Float(0xdff9772470297ebd, -396, -100),
  Float(0xa6dfbd9fb8e5b88f, -369, -92),
  Float(0xf8a95fcf88747d94, -343, -84),
  Float(0xb94470938fa89bcf, -316, -76),
  Float(0x8a08f0f8bf0f156b, -289, -68),
  Float(0xcdb02555653131b6, -263, -60),
  Float(0x993fe2c6d07b7fac, -236, -52),
  Float(0xe45c10c42a2b3b06, -210, -44),
  Float(0xaa242499697392d3, -183, -36),
  Float(0xfd87b5f28300ca0e, -157, -28),
  Float(0xbce5086492111aeb, -130, -20),
  Float(0x8cbccc096f5088cc, -103, -12),
  Float(0xd1b71758e219652c, -77, -4),
  Float(0x9c40000000000000, -50, 4),
  Float(0xe8d4a51000000000, -24, 12),
  Float(0xad78ebc5ac620000, 3, 20),
  Float(0x813f3978f8940984, 30, 28),
  Float(0xc097ce7bc90715b3, 56, 36),
  Float(0x8f7e32ce7bea5c70, 83, 44),
  Float(0xd5d238a4abe98068, 109, 52),
  Float(0x9f4f2726179a2245, 136, 60),
  Float(0xed63a231d4c4fb27, 162, 68),
  Float(0xb0de65388cc8ada8, 189, 76),
  Float(0x83c7088e1aab65db, 216, 84),
  Float(0xc45d1df942711d9a, 242, 92),
  Float(0x924d692ca61be758, 269, 100),
  Float(0xda01ee641a708dea, 295, 108),
  Float(0xa26da3999aef774a, 322, 116),
  Float(0xf209787bb47d6b85, 348, 124),
  Float(0xb454e4a179dd1877, 375, 132),
  Float(0x865b86925b9bc5c2, 402, 140),
  Float(0xc83553c5c8965d3d, 428, 148),
  Float(0x952ab45cfa97a0b3, 455, 156),
  Float(0xde469fbd99a05fe3, 481, 164),
  Float(0xa59bc234db398c25, 508, 172),
  Float(0xf6c69a72a3989f5c, 534, 180),
  Float(0xb7dcbf5354e9bece, 561, 188),
  Float(0x88fcf317f22241e2, 588, 196),
  Float(0xcc20ce9bd35c78a5, 614, 204),
  Float(0x98165af37b2153df, 641, 212),
  Float(0xe2a0b5dc971f303a, 667, 220),
  Float(0xa8d9d1535ce3b396, 694, 228),
  Float(0xfb9b7cd9a4a7443c, 720, 236),
  Float(0xbb764c4ca7a44410, 747, 244),
  Float(0x8bab8eefb6409c1a, 774, 252),
  Float(0xd01fef10a657842c, 800, 260),
  Float(0x9b10a4e5e9913129, 827, 268),
  Float(0xe7109bfba19c0c9d, 853, 276),
  Float(0xac2820d9623bf429, 880, 284),
  Float(0x80444b5e7aa7cf85, 907, 292),
  Float(0xbf21e44003acdd2d, 933, 300),
  Float(0x8e679c2f5e44ff8f, 960, 308),
  Float(0xd433179d9c8cb841, 986, 316),
  Float(0x9e19db92b4e31ba9, 1013, 324),
  Float(0xeb96bf6ebadf77d9, 1039, 332),
  Float(0xaf87023b9bf0ee6b, 1066, 340)]

const CachedPowersLength = length(CachedPowers)
const CachedPowersOffset = 348  # -1 * the first decimal_exponent.
const D_1_LOG2_10 = 0.30102999566398114  #  1 / lg(10)
# Difference between the decimal exponents in the table above.
const DecimalExponentDistance = 8
const MinDecimalExponent = -348
const MaxDecimalExponent = 340

function binexp_cache(min_exponent,max_exponent)
    k = ceil(Integer,(min_exponent+63)*D_1_LOG2_10)
    index = div(CachedPowersOffset+k-1,DecimalExponentDistance) + 1
    cp = CachedPowers[index+1]
    return cp
end
back to top