Revision 3711749292ba9c29ad2e3b9eaee90995f8c8290a authored by Keno Fischer on 11 October 2023, 14:41:22 UTC, committed by GitHub on 11 October 2023, 14:41:22 UTC
This should be NFC and is intended to allow the optimizer to delete :enter statements (by replacing them with `nothing`), without leaving dangling `:leave`s around. This is accomplished by having `leave` take (a variable number of) `:enter` tokens (that are already being used by `:pop_exception`). The semantics are that a literal `nothing` or an SSAValue pointing to a `nothing` statement are ignored, and one exception handler is popped for each remaining argument. The actual value of the token is ignored, except that the verifier asserts that it belongs to an `:enter`. Note that we don't need to do the same for :pop_exception, because the token generated by an `:enter` is semantically only in scope for :pop_exception during its catch block. If we determine the `:enter` is dead, then its catch block is guaranteed to not be executed and will be deleted wholesale by cfg liveness. I was considering doing something fancier where :leave is changed back to taking an integer after optimization, but the case where the IR size is bigger after this change (when we are `:leave`ing many handlers) is fairly rare and likely not worth the additional complexity or time cost to do anything special. If it does show up in size benchmarks, I'd rather give `:leave` a special, compact encoding.
1 parent 8180240
hamt.jl
module HashArrayMappedTries
export HAMT
##
# Implements "Ideal Hash Trees" Phil Bagwell 2000
#
# Notable divergence is that we forgo a resizable root table.
# Root tables improve lookup performance for large sizes, but
# limit space efficiency if the HAMT is used for a persistent
# dictionary, since each persistent operation would duplicate
# the root table.
#
# We do not handle perfect hash-collision. We would need to
# add an additional node type for Collisions. Perfect hash
# collisions should not occur in practice since we perform
# rehashing after using 55 bits (MAX_SHIFT) of the original hash.
#
# Use https://github.com/vchuravy/HashArrayMappedTries.jl if
# you want to use this implementation in a package.
#
# A HAMT is formed by tree of levels, where at each level
# we use a portion of the bits of the hash for indexing
#
# We use a branching width (ENTRY_COUNT) of 32, giving us
# 5bits of indexing per level
# 0000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000
# L11 L10 L9 L8 L7 L6 L5 L4 L3 L2 L1 L0
#
# At each level we use a 32bit bitmap to store which elements are occupied.
# Since our storage is "sparse" we need to map from index in [0,31] to
# the actual storage index. We mask the bitmap wiht (1 << i) - 1 and count
# the ones in the result. The number of set ones (+1) gives us the index
# into the storage array.
#
# HAMT can be both persitent and non-persistent.
# The `path` function searches for a matching entries, and for persistency
# optionally copies the path so that it can be safely mutated.
# TODO:
# When `trie.data` becomes empty we could remove it from it's parent,
# but we only know so fairly late. Maybe have a compact function?
const ENTRY_COUNT = UInt(32)
const BITMAP = UInt32
const NBITS = sizeof(UInt) * 8
# @assert ispow2(ENTRY_COUNT)
const BITS_PER_LEVEL = trailing_zeros(ENTRY_COUNT)
const LEVEL_MASK = (UInt(1) << BITS_PER_LEVEL) - UInt(1)
const MAX_SHIFT = (NBITS ÷ BITS_PER_LEVEL - 1) * BITS_PER_LEVEL
mutable struct Leaf{K, V}
const key::K
const val::V
end
"""
HAMT{K,V}
A HashArrayMappedTrie that optionally supports persistence.
"""
mutable struct HAMT{K, V}
const data::Vector{Union{Leaf{K, V}, HAMT{K, V}}}
bitmap::BITMAP
end
HAMT{K, V}() where {K, V} = HAMT(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 0), zero(BITMAP))
function HAMT{K,V}(k::K, v) where {K, V}
v = convert(V, v)
# For a single element we can't have a hash-collision
trie = HAMT(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 1), zero(BITMAP))
trie.data[1] = Leaf{K,V}(k,v)
bi = BitmapIndex(HashState(k))
set!(trie, bi)
return trie
end
HAMT(k::K, v::V) where {K, V} = HAMT{K,V}(K, V)
struct HashState{K}
key::K
hash::UInt
depth::Int
shift::Int
end
HashState(key)= HashState(key, hash(key), 0, 0)
# Reconstruct
HashState(key, depth, shift) = HashState(key, hash(key, UInt(depth ÷ BITS_PER_LEVEL)), depth, shift)
function next(h::HashState)
depth = h.depth + 1
shift = h.shift + BITS_PER_LEVEL
if shift > MAX_SHIFT
# Note we use `UInt(depth ÷ BITS_PER_LEVEL)` to seed the hash function
# the hash docs, do we need to hash `UInt(depth ÷ BITS_PER_LEVEL)` first?
h_hash = hash(h.key, UInt(depth ÷ BITS_PER_LEVEL))
else
h_hash = h.hash
end
return HashState(h.key, h_hash, depth, shift)
end
struct BitmapIndex
x::UInt
end
BitmapIndex(h::HashState) = BitmapIndex((h.hash >> h.shift) & LEVEL_MASK)
Base.:(<<)(v, bi::BitmapIndex) = v << bi.x
Base.:(>>)(v, bi::BitmapIndex) = v >> bi.x
isset(trie::HAMT, bi::BitmapIndex) = isodd(trie.bitmap >> bi)
function set!(trie::HAMT, bi::BitmapIndex)
trie.bitmap |= (UInt32(1) << bi)
# Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
end
function unset!(trie::HAMT, bi::BitmapIndex)
trie.bitmap &= ~(UInt32(1) << bi)
# Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
end
function entry_index(trie::HAMT, bi::BitmapIndex)
mask = (UInt32(1) << bi.x) - UInt32(1)
count_ones(trie.bitmap & mask) + 1
end
islevel_empty(trie::HAMT) = trie.bitmap == 0
islevel_empty(::Leaf) = false
"""
path(trie, h, copyf)::(found, present, trie, i, top, level)
Internal function that walks a HAMT and finds the slot for hash.
Returns if a value is `present` and a value is `found`.
It returns the `trie` and the index `i` into `trie.data`, as well
as the current `level`.
If a copy function is provided `copyf` use the return `top` for the
new persistent tree.
"""
@inline function path(trie::HAMT{K,V}, key, _h, copy=false) where {K, V}
h = HashState(key, _h, 0, 0)
if copy
trie = top = HAMT{K,V}(Base.copy(trie.data), trie.bitmap)
else
trie = top = trie
end
while true
bi = BitmapIndex(h)
i = entry_index(trie, bi)
if isset(trie, bi)
next = @inbounds trie.data[i]
if next isa Leaf{K,V}
# Check if key match if not we will need to grow.
found = (next.key === h.key || isequal(next.key, h.key))
return found, true, trie, i, bi, top, h
end
if copy
next = HAMT{K,V}(Base.copy(next.data), next.bitmap)
@inbounds trie.data[i] = next
end
trie = next::HAMT{K,V}
else
# found empty slot
return true, false, trie, i, bi, top, h
end
h = HashArrayMappedTries.next(h)
end
end
"""
Internal function that given an obtained path, either set the value
or grows the HAMT by inserting a new trie instead.
"""
@inline function insert!(found, present, trie::HAMT{K,V}, i, bi, h, val) where {K,V}
if found # we found a slot, just set it to the new leaf
# replace or insert
if present # replace
@inbounds trie.data[i] = Leaf{K, V}(h.key, val)
else
Base.insert!(trie.data, i, Leaf{K, V}(h.key, val))
end
set!(trie, bi)
else
@assert present
# collision -> grow
leaf = @inbounds trie.data[i]::Leaf{K,V}
leaf_h = HashState(leaf.key, h.depth, h.shift)
if leaf_h.hash == h.hash
error("Perfect hash collision")
end
while true
new_trie = HAMT{K, V}()
if present
@inbounds trie.data[i] = new_trie
else
i = entry_index(trie, bi)
Base.insert!(trie.data, i, new_trie)
end
set!(trie, bi)
h = next(h)
leaf_h = next(leaf_h)
bi_new = BitmapIndex(h)
bi_old = BitmapIndex(leaf_h)
if bi_new == bi_old # collision in new trie -> retry
trie = new_trie
bi = bi_new
present = false
continue
end
i_new = entry_index(new_trie, bi_new)
Base.insert!(new_trie.data, i_new, Leaf{K, V}(h.key, val))
set!(new_trie, bi_new)
i_old = entry_index(new_trie, bi_old)
Base.insert!(new_trie.data, i_old, leaf)
set!(new_trie, bi_old)
break
end
end
end
length(::Leaf) = 1
length(trie::HAMT) = sum((length(trie.data[i]) for i in eachindex(trie.data)), init=0)
isempty(::Leaf) = false
function isempty(trie::HAMT)
if islevel_empty(trie)
return true
end
return all(isempty(trie.data[i]) for i in eachindex(trie.data))
end
# DFS
function iterate(trie::HAMT, state=nothing)
if state === nothing
state = (;parent=nothing, trie, i=1)
end
while state !== nothing
i = state.i
if i > Base.length(state.trie.data)
state = state.parent
continue
end
trie = state.trie.data[i]
state = (;parent=state.parent, trie=state.trie, i=i+1)
if trie isa Leaf
return (trie.key => trie.val, state)
else
# we found a new level
state = (;parent=state, trie, i=1)
continue
end
end
return nothing
end
end # module HashArrayMapTries
Computing file changes ...