# This file is a part of Julia. License is MIT: https://julialang.org/license const Bits = Vector{UInt64} const CHK0 = zero(UInt64) const NO_OFFSET = Int === Int64 ? -one(Int) << 60 : -one(Int) << 29 # + NO_OFFSET must be small enough to stay < 0 when added with any offset. # An offset is in the range -2^57:2^57 (64-bits architectures) # or -2^26:2^26 (32-bits architectures) # + when the offset is NO_OFFSET, the bits field *must* be empty # + NO_OFFSET could be made to be > 0, but a negative one allows # a small optimization in the in(x, ::BitSet) method mutable struct BitSet <: AbstractSet{Int} bits::Vector{UInt64} # 1st stored Int equals 64*offset offset::Int BitSet() = new(sizehint!(zeros(UInt64, 0), 4), NO_OFFSET) end """ BitSet([itr]) Construct a sorted set of `Int`s generated by the given iterable object, or an empty set. Implemented as a bit string, and therefore designed for dense integer sets. If the set will be sparse (for example, holding a few very large integers), use [`Set`](@ref) instead. """ BitSet(itr) = union!(BitSet(), itr) # Special implementation for BitSet, which lacks a fast `length` method. function union!(s::BitSet, itr) for x in itr push!(s, x) end return s end @inline intoffset(s::BitSet) = s.offset << 6 eltype(::Type{BitSet}) = Int empty(s::BitSet, ::Type{Int}=Int) = BitSet() emptymutable(s::BitSet, ::Type{Int}=Int) = BitSet() copy(s1::BitSet) = copy!(BitSet(), s1) copymutable(s::BitSet) = copy(s) function copy!(dest::BitSet, src::BitSet) resize!(dest.bits, length(src.bits)) copyto!(dest.bits, src.bits) dest.offset = src.offset dest end sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s) function _bits_getindex(b::Bits, n::Int, offset::Int) ci = _div64(n) - offset + 1 1 <= ci <= length(b) || return false @inbounds r = (b[ci] & (one(UInt64) << _mod64(n))) != 0 r end function _bits_findnext(b::Bits, start::Int) # start is 0-based # @assert start >= 0 _div64(start) + 1 > length(b) && return -1 ind = unsafe_bitfindnext(b, start+1) ind === nothing ? -1 : ind - 1 end function _bits_findprev(b::Bits, start::Int) # start is 0-based # @assert start <= 64 * length(b) - 1 start >= 0 || return -1 ind = unsafe_bitfindprev(b, start+1) ind === nothing ? -1 : ind - 1 end # An internal function for setting the inclusion bit for a given integer @inline function _setint!(s::BitSet, idx::Int, b::Bool) cidx = _div64(idx) len = length(s.bits) diff = cidx - s.offset if diff >= len b || return s # setting a bit to zero outside the set's bits is a no-op # we put the following test within one of the two branches, # with the NO_OFFSET trick, to avoid having to perform it at # each and every call to _setint! if s.offset == NO_OFFSET # initialize the offset # we assume isempty(s.bits) s.offset = cidx diff = 0 end _growend0!(s.bits, diff - len + 1) elseif diff < 0 b || return s _growbeg0!(s.bits, -diff) s.offset += diff diff = 0 end _unsafe_bitsetindex!(s.bits, b, diff+1, _mod64(idx)) s end # An internal function to resize a Bits object and ensure the newly allocated # elements are zeroed (will become unnecessary if this behavior changes) @inline function _growend0!(b::Bits, nchunks::Int) len = length(b) _growend!(b, nchunks) for i in len+1:length(b) @inbounds b[i] = CHK0 # resize! gives dirty memory end end @inline function _growbeg0!(b::Bits, nchunks::Int) _growbeg!(b, nchunks) for i in 1:nchunks @inbounds b[i] = CHK0 end end function union!(s::BitSet, r::AbstractUnitRange{<:Integer}) isempty(r) && return s a, b = _check_bitset_bounds(first(r)), _check_bitset_bounds(last(r)) cidxa = _div64(a) cidxb = _div64(b) if s.offset == NO_OFFSET s.offset = cidxa end len = length(s.bits) diffa = cidxa - s.offset diffb = cidxb - s.offset # grow s.bits as necessary if diffb >= len _growend!(s.bits, diffb - len + 1) # we set only some values to CHK0, those which will not be # fully overwritten (i.e. only or'ed with `|`) s.bits[end] = CHK0 # end == diffb + 1 if diffa >= len s.bits[diffa + 1] = CHK0 end end if diffa < 0 _growbeg!(s.bits, -diffa) s.bits[1] = CHK0 if diffb < 0 s.bits[diffb - diffa + 1] = CHK0 end s.offset = cidxa # s.offset += diffa diffb -= diffa diffa = 0 end # update s.bits i = _mod64(a) j = _mod64(b) @inbounds if diffa == diffb s.bits[diffa + 1] |= (((~CHK0) >> i) << (i+63-j)) >> (63-j) else s.bits[diffa + 1] |= ((~CHK0) >> i) << i s.bits[diffb + 1] |= (~CHK0 << (63-j)) >> (63-j) for n = diffa+1:diffb-1 s.bits[n+1] = ~CHK0 end end s end function _matched_map!(f, s1::BitSet, s2::BitSet) left_false_is_false = f(false, false) == f(false, true) == false right_false_is_false = f(false, false) == f(true, false) == false # we must first handle the NO_OFFSET case; we could test for # isempty(s1) but it can be costly, so the user has to call # empty!(s1) herself before-hand to re-initialize to NO_OFFSET if s1.offset == NO_OFFSET return left_false_is_false ? s1 : copy!(s1, s2) elseif s2.offset == NO_OFFSET return right_false_is_false ? empty!(s1) : s1 end s1.offset = _matched_map!(f, s1.bits, s1.offset, s2.bits, s2.offset, left_false_is_false, right_false_is_false) s1 end # An internal function that takes a pure function `f` and maps across two BitArrays # allowing the lengths and offsets to be different and altering b1 with the result # WARNING: the assumptions written in the else clauses must hold function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int, left_false_is_false::Bool, right_false_is_false::Bool) l1, l2 = length(a1), length(a2) bdiff = b2 - b1 e1, e2 = l1+b1, l2+b2 ediff = e2 - e1 # map! over the common indices @inbounds for i = max(1, 1+bdiff):min(l1, l2+bdiff) a1[i] = f(a1[i], a2[i-bdiff]) end if ediff > 0 if left_false_is_false # We don't need to worry about the trailing bits — they're all false else # @assert f(false, x) == x _growend!(a1, ediff) # if a1 and a2 are not overlapping, we infer implied "false" values from a2 for outer l1 = l1+1:bdiff @inbounds a1[l1] = CHK0 end # update ediff in case l1 was updated ediff = e2 - l1 - b1 # copy actual chunks from a2 unsafe_copyto!(a1, l1+1, a2, l2+1-ediff, ediff) l1 = length(a1) end elseif ediff < 0 if right_false_is_false # We don't need to worry about the trailing bits — they're all false _deleteend!(a1, min(l1, -ediff)) # no need to update l1, as if bdiff > 0 (case below), then bdiff will # be smaller anyway than an updated l1 else # @assert f(x, false) == x # We don't need to worry about the trailing bits — they already have the # correct value end end if bdiff < 0 if left_false_is_false # We don't need to worry about the leading bits — they're all false else # @assert f(false, x) == x _growbeg!(a1, -bdiff) # if a1 and a2 are not overlapping, we infer implied "false" values from a2 for i = l2+1:-bdiff @inbounds a1[i] = CHK0 end b1 += bdiff # updated return value # copy actual chunks from a2 unsafe_copyto!(a1, 1, a2, 1, min(-bdiff, l2)) end elseif bdiff > 0 if right_false_is_false # We don't need to worry about the trailing bits — they're all false _deletebeg!(a1, min(l1, bdiff)) b1 += bdiff else # @assert f(x, false) == x # We don't need to worry about the trailing bits — they already have the # correct value end end b1 # the new offset end @noinline _throw_bitset_bounds_err() = throw(ArgumentError("elements of BitSet must be between typemin(Int) and typemax(Int)")) @inline _is_convertible_Int(n) = typemin(Int) <= n <= typemax(Int) @inline _check_bitset_bounds(n) = _is_convertible_Int(n) ? Int(n) : _throw_bitset_bounds_err() @inline _check_bitset_bounds(n::Int) = n @noinline _throw_keyerror(n) = throw(KeyError(n)) @inline push!(s::BitSet, n::Integer) = _setint!(s, _check_bitset_bounds(n), true) push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s) @inline pop!(s::BitSet) = pop!(s, last(s)) @inline function pop!(s::BitSet, n::Integer) if n in s delete!(s, n) n else _throw_keyerror(n) end end @inline function pop!(s::BitSet, n::Integer, default) if n in s delete!(s, n) n else default end end @inline delete!(s::BitSet, n::Int) = _setint!(s, n, false) @inline delete!(s::BitSet, n::Integer) = _is_convertible_Int(n) ? delete!(s, Int(n)) : s popfirst!(s::BitSet) = pop!(s, first(s)) function empty!(s::BitSet) empty!(s.bits) s.offset = NO_OFFSET s end isempty(s::BitSet) = _check0(s.bits, 1, length(s.bits)) # Mathematical set functions: union!, intersect!, setdiff!, symdiff! union(s::BitSet, sets...) = union!(copy(s), sets...) union!(s1::BitSet, s2::BitSet) = _matched_map!(|, s1, s2) intersect(s1::BitSet, s2::BitSet) = length(s1.bits) < length(s2.bits) ? intersect!(copy(s1), s2) : intersect!(copy(s2), s1) intersect!(s1::BitSet, s2::BitSet) = _matched_map!(&, s1, s2) setdiff!(s1::BitSet, s2::BitSet) = _matched_map!((p, q) -> p & ~q, s1, s2) function symdiff!(s::BitSet, ns) for x in ns int_symdiff!(s, x) end return s end function int_symdiff!(s::BitSet, n::Integer) n0 = _check_bitset_bounds(n) val = !(n0 in s) _setint!(s, n0, val) s end symdiff!(s1::BitSet, s2::BitSet) = _matched_map!(xor, s1, s2) filter!(f, s::BitSet) = unsafe_filter!(f, s) @inline in(n::Int, s::BitSet) = _bits_getindex(s.bits, n, s.offset) @inline in(n::Integer, s::BitSet) = _is_convertible_Int(n) ? in(Int(n), s) : false function iterate(s::BitSet, (word, idx) = (CHK0, 0)) while word == 0 idx == length(s.bits) && return nothing idx += 1 word = @inbounds s.bits[idx] end trailing_zeros(word) + (idx - 1 + s.offset) << 6, (_blsr(word), idx) end @noinline _throw_bitset_notempty_error() = throw(ArgumentError("collection must be non-empty")) function first(s::BitSet) idx = _bits_findnext(s.bits, 0) idx == -1 ? _throw_bitset_notempty_error() : idx + intoffset(s) end function last(s::BitSet) idx = _bits_findprev(s.bits, (length(s.bits) << 6) - 1) idx == -1 ? _throw_bitset_notempty_error() : idx + intoffset(s) end length(s::BitSet) = bitcount(s.bits) # = mapreduce(count_ones, +, s.bits; init=0) function show(io::IO, s::BitSet) print(io, "BitSet([") first = true for n in s !first && print(io, ", ") print(io, n) first = false end print(io, "])") end function _check0(a::Vector{UInt64}, b::Int, e::Int) @inbounds for i in b:e a[i] == CHK0 || return false end true end function ==(s1::BitSet, s2::BitSet) # Swap so s1 has always the smallest offset if s1.offset > s2.offset s1, s2 = s2, s1 end a1 = s1.bits a2 = s2.bits b1, b2 = s1.offset, s2.offset l1, l2 = length(a1), length(a2) e1 = l1+b1 overlap0 = max(0, e1 - b2) included = overlap0 >= l2 # whether a2's indices are included in a1's overlap = included ? l2 : overlap0 # Ensure non-overlap chunks are zero (unlikely) _check0(a1, 1, l1-overlap0) || return false if included _check0(a1, b2-b1+l2+1, l1) || return false else _check0(a2, 1+overlap, l2) || return false end # compare overlap values if overlap > 0 _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false end return true end issubset(a::BitSet, b::BitSet) = a == intersect(a,b) ⊊(a::BitSet, b::BitSet) = a <= b && a != b minimum(s::BitSet) = first(s) maximum(s::BitSet) = last(s) extrema(s::BitSet) = (first(s), last(s)) issorted(s::BitSet) = true