https://github.com/JuliaLang/julia
Tip revision: a2c0544603e62bd5ae095457d0ce08888cf5a958 authored by Shuhei Kadowaki on 06 September 2023, 11:52:57 UTC
allow codegen to invoke const-prop'ed method instance
allow codegen to invoke const-prop'ed method instance
Tip revision: a2c0544
hamt.jl
module HashArrayMappedTries
export HAMT
##
# Implements "Ideal Hash Trees" Phil Bagwell 2000
#
# Notable divergence is that we forgo a resizable root table.
# Root tables improve lookup performance for large sizes, but
# limit space efficiency if the HAMT is used for a persistent
# dictionary, since each persistent operation would duplicate
# the root table.
#
# We do not handle perfect hash-collision. We would need to
# add an additional node type for Collisions. Perfect hash
# collisions should not occur in practice since we perform
# rehashing after using 55 bits (MAX_SHIFT) of the original hash.
#
# Use https://github.com/vchuravy/HashArrayMappedTries.jl if
# you want to use this implementation in a package.
#
# A HAMT is formed by tree of levels, where at each level
# we use a portion of the bits of the hash for indexing
#
# We use a branching width (ENTRY_COUNT) of 32, giving us
# 5bits of indexing per level
# 0000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000
# L11 L10 L9 L8 L7 L6 L5 L4 L3 L2 L1 L0
#
# At each level we use a 32bit bitmap to store which elements are occupied.
# Since our storage is "sparse" we need to map from index in [0,31] to
# the actual storage index. We mask the bitmap with (1 << i) - 1 and count
# the ones in the result. The number of set ones (+1) gives us the index
# into the storage array.
#
# HAMT can be both persistent and non-persistent.
# The `path` function searches for a matching entries, and for persistency
# optionally copies the path so that it can be safely mutated.
# TODO:
# When `trie.data` becomes empty we could remove it from it's parent,
# but we only know so fairly late. Maybe have a compact function?
const ENTRY_COUNT = UInt(32)
const BITMAP = UInt32
const NBITS = sizeof(UInt) * 8
# @assert ispow2(ENTRY_COUNT)
const BITS_PER_LEVEL = trailing_zeros(ENTRY_COUNT)
const LEVEL_MASK = (UInt(1) << BITS_PER_LEVEL) - UInt(1)
const MAX_SHIFT = (NBITS ÷ BITS_PER_LEVEL - 1) * BITS_PER_LEVEL
mutable struct Leaf{K, V}
const key::K
const val::V
end
"""
HAMT{K,V}
A HashArrayMappedTrie that optionally supports persistence.
"""
mutable struct HAMT{K, V}
const data::Vector{Union{Leaf{K, V}, HAMT{K, V}}}
bitmap::BITMAP
end
HAMT{K, V}() where {K, V} = HAMT(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 0), zero(BITMAP))
function HAMT{K,V}(k::K, v) where {K, V}
v = convert(V, v)
# For a single element we can't have a hash-collision
trie = HAMT(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 1), zero(BITMAP))
trie.data[1] = Leaf{K,V}(k,v)
bi = BitmapIndex(HashState(k))
set!(trie, bi)
return trie
end
HAMT(k::K, v::V) where {K, V} = HAMT{K,V}(K, V)
struct HashState{K}
key::K
hash::UInt
depth::Int
shift::Int
end
HashState(key)= HashState(key, hash(key), 0, 0)
# Reconstruct
HashState(key, depth, shift) = HashState(key, hash(key, UInt(depth ÷ BITS_PER_LEVEL)), depth, shift)
function next(h::HashState)
depth = h.depth + 1
shift = h.shift + BITS_PER_LEVEL
if shift > MAX_SHIFT
# Note we use `UInt(depth ÷ BITS_PER_LEVEL)` to seed the hash function
# the hash docs, do we need to hash `UInt(depth ÷ BITS_PER_LEVEL)` first?
h_hash = hash(h.key, UInt(depth ÷ BITS_PER_LEVEL))
else
h_hash = h.hash
end
return HashState(h.key, h_hash, depth, shift)
end
struct BitmapIndex
x::UInt
end
BitmapIndex(h::HashState) = BitmapIndex((h.hash >> h.shift) & LEVEL_MASK)
Base.:(<<)(v, bi::BitmapIndex) = v << bi.x
Base.:(>>)(v, bi::BitmapIndex) = v >> bi.x
isset(trie::HAMT, bi::BitmapIndex) = isodd(trie.bitmap >> bi)
function set!(trie::HAMT, bi::BitmapIndex)
trie.bitmap |= (UInt32(1) << bi)
# Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
end
function unset!(trie::HAMT, bi::BitmapIndex)
trie.bitmap &= ~(UInt32(1) << bi)
# Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
end
function entry_index(trie::HAMT, bi::BitmapIndex)
mask = (UInt32(1) << bi.x) - UInt32(1)
count_ones(trie.bitmap & mask) + 1
end
islevel_empty(trie::HAMT) = trie.bitmap == 0
islevel_empty(::Leaf) = false
"""
path(trie, h, copyf)::(found, present, trie, i, top, level)
Internal function that walks a HAMT and finds the slot for hash.
Returns if a value is `present` and a value is `found`.
It returns the `trie` and the index `i` into `trie.data`, as well
as the current `level`.
If a copy function is provided `copyf` use the return `top` for the
new persistent tree.
"""
@inline function path(trie::HAMT{K,V}, key, _h, copy=false) where {K, V}
h = HashState(key, _h, 0, 0)
if copy
trie = top = HAMT{K,V}(Base.copy(trie.data), trie.bitmap)
else
trie = top = trie
end
while true
bi = BitmapIndex(h)
i = entry_index(trie, bi)
if isset(trie, bi)
next = @inbounds trie.data[i]
if next isa Leaf{K,V}
# Check if key match if not we will need to grow.
found = (next.key === h.key || isequal(next.key, h.key))
return found, true, trie, i, bi, top, h
end
if copy
next = HAMT{K,V}(Base.copy(next.data), next.bitmap)
@inbounds trie.data[i] = next
end
trie = next::HAMT{K,V}
else
# found empty slot
return true, false, trie, i, bi, top, h
end
h = HashArrayMappedTries.next(h)
end
end
"""
Internal function that given an obtained path, either set the value
or grows the HAMT by inserting a new trie instead.
"""
@inline function insert!(found, present, trie::HAMT{K,V}, i, bi, h, val) where {K,V}
if found # we found a slot, just set it to the new leaf
# replace or insert
if present # replace
@inbounds trie.data[i] = Leaf{K, V}(h.key, val)
else
Base.insert!(trie.data, i, Leaf{K, V}(h.key, val))
end
set!(trie, bi)
else
@assert present
# collision -> grow
leaf = @inbounds trie.data[i]::Leaf{K,V}
leaf_h = HashState(leaf.key, h.depth, h.shift)
if leaf_h.hash == h.hash
error("Perfect hash collision")
end
while true
new_trie = HAMT{K, V}()
if present
@inbounds trie.data[i] = new_trie
else
i = entry_index(trie, bi)
Base.insert!(trie.data, i, new_trie)
end
set!(trie, bi)
h = next(h)
leaf_h = next(leaf_h)
bi_new = BitmapIndex(h)
bi_old = BitmapIndex(leaf_h)
if bi_new == bi_old # collision in new trie -> retry
trie = new_trie
bi = bi_new
present = false
continue
end
i_new = entry_index(new_trie, bi_new)
Base.insert!(new_trie.data, i_new, Leaf{K, V}(h.key, val))
set!(new_trie, bi_new)
i_old = entry_index(new_trie, bi_old)
Base.insert!(new_trie.data, i_old, leaf)
set!(new_trie, bi_old)
break
end
end
end
length(::Leaf) = 1
length(trie::HAMT) = sum((length(trie.data[i]) for i in eachindex(trie.data)), init=0)
isempty(::Leaf) = false
function isempty(trie::HAMT)
if islevel_empty(trie)
return true
end
return all(isempty(trie.data[i]) for i in eachindex(trie.data))
end
# DFS
function iterate(trie::HAMT, state=nothing)
if state === nothing
state = (;parent=nothing, trie, i=1)
end
while state !== nothing
i = state.i
if i > Base.length(state.trie.data)
state = state.parent
continue
end
trie = state.trie.data[i]
state = (;parent=state.parent, trie=state.trie, i=i+1)
if trie isa Leaf
return (trie.key => trie.val, state)
else
# we found a new level
state = (;parent=state, trie, i=1)
continue
end
end
return nothing
end
end # module HashArrayMapTries