https://github.com/JuliaLang/julia
Raw File
Tip revision: b7498550cbabafab972bfcae3c662107d1f974de authored by K Pamnany on 29 November 2023, 21:07:14 UTC
Improve `ReentrantLock`
Tip revision: b749855
substring.jl
# This file is a part of Julia. License is MIT: https://julialang.org/license

"""
    SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s))
    SubString(s::AbstractString, r::UnitRange{<:Integer})

Like [`getindex`](@ref), but returns a view into the parent string `s`
within range `i:j` or `r` respectively instead of making a copy.

The [`@views`](@ref) macro converts any string slices `s[i:j]` into
substrings `SubString(s, i, j)` in a block of code.

# Examples
```jldoctest
julia> SubString("abc", 1, 2)
"ab"

julia> SubString("abc", 1:2)
"ab"

julia> SubString("abc", 2)
"bc"
```
"""
struct SubString{T<:AbstractString} <: AbstractString
    string::T
    offset::Int
    ncodeunits::Int

    function SubString{T}(s::T, i::Int, j::Int) where T<:AbstractString
        i ≤ j || return new(s, 0, 0)
        @boundscheck begin
            checkbounds(s, i:j)
            @inbounds isvalid(s, i) || string_index_err(s, i)
            @inbounds isvalid(s, j) || string_index_err(s, j)
        end
        return new(s, i-1, nextind(s,j)-i)
    end
    function SubString{T}(s::T, i::Int, j::Int, ::Val{:noshift}) where T<:AbstractString
        @boundscheck begin
            si, sj = i + 1, prevind(s, j + i + 1)
            @inbounds isvalid(s, si) || string_index_err(s, si)
            @inbounds isvalid(s, sj) || string_index_err(s, sj)
        end
        new(s, i, j)
    end
end

@propagate_inbounds SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j)
@propagate_inbounds SubString(s::T, i::Int, j::Int, v::Val{:noshift}) where {T<:AbstractString} = SubString{T}(s, i, j, v)
@propagate_inbounds SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s)) = SubString(s, Int(i), Int(j))
@propagate_inbounds SubString(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, first(r), last(r))

@propagate_inbounds function SubString(s::SubString, i::Int, j::Int)
    @boundscheck i ≤ j && checkbounds(s, i:j)
    SubString(s.string, s.offset+i, s.offset+j)
end

SubString(s::AbstractString) = SubString(s, 1, lastindex(s)::Int)
SubString{T}(s::T) where {T<:AbstractString} = SubString{T}(s, 1, lastindex(s)::Int)

@propagate_inbounds view(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
@propagate_inbounds maybeview(s::AbstractString, r::AbstractUnitRange{<:Integer}) = view(s, r)
@propagate_inbounds maybeview(s::AbstractString, args...) = getindex(s, args...)

convert(::Type{SubString{S}}, s::AbstractString) where {S<:AbstractString} =
    SubString(convert(S, s))::SubString{S}
convert(::Type{T}, s::T) where {T<:SubString} = s

# Regex match allows only Union{String, SubString{String}} so define conversion to this type
convert(::Type{Union{String, SubString{String}}}, s::String) = s
convert(::Type{Union{String, SubString{String}}}, s::SubString{String}) = s
convert(::Type{Union{String, SubString{String}}}, s::AbstractString) = convert(String, s)::String

function String(s::SubString{String})
    parent = s.string
    copy = GC.@preserve parent unsafe_string(pointer(parent, s.offset+1), s.ncodeunits)
    return copy
end

ncodeunits(s::SubString) = s.ncodeunits
codeunit(s::SubString) = codeunit(s.string)::CodeunitType
length(s::SubString) = length(s.string, s.offset+1, s.offset+s.ncodeunits)

function codeunit(s::SubString, i::Integer)
    @boundscheck checkbounds(s, i)
    @inbounds return codeunit(s.string, s.offset + i)
end

function iterate(s::SubString, i::Integer=firstindex(s))
    i == ncodeunits(s)+1 && return nothing
    @boundscheck checkbounds(s, i)
    y = iterate(s.string, s.offset + i)
    y === nothing && return nothing
    c, i = y::Tuple{AbstractChar,Int}
    return c, i - s.offset
end

function getindex(s::SubString, i::Integer)
    @boundscheck checkbounds(s, i)
    @inbounds return getindex(s.string, s.offset + i)
end

isascii(ss::SubString{String}) = isascii(codeunits(ss))

function isvalid(s::SubString, i::Integer)
    ib = true
    @boundscheck ib = checkbounds(Bool, s, i)
    @inbounds return ib && isvalid(s.string, s.offset + i)::Bool
end

thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)

parent(s::SubString) = s.string
parentindices(s::SubString) = (s.offset + 1 : thisind(s.string, s.offset + s.ncodeunits),)

function ==(a::Union{String, SubString{String}}, b::Union{String, SubString{String}})
    sizeof(a) == sizeof(b) && _memcmp(a, b) == 0
end

function cmp(a::SubString{String}, b::SubString{String})
    c = _memcmp(a, b)
    return c < 0 ? -1 : c > 0 ? +1 : cmp(sizeof(a), sizeof(b))
end

# don't make unnecessary copies when passing substrings to C functions
cconvert(::Type{Ptr{UInt8}}, s::SubString{String}) = s
cconvert(::Type{Ptr{Int8}}, s::SubString{String}) = s

function unsafe_convert(::Type{Ptr{R}}, s::SubString{String}) where R<:Union{Int8, UInt8}
    convert(Ptr{R}, pointer(s.string)) + s.offset
end

pointer(x::SubString{String}) = pointer(x.string) + x.offset
pointer(x::SubString{String}, i::Integer) = pointer(x.string) + x.offset + (i-1)

function hash(s::SubString{String}, h::UInt)
    h += memhash_seed
    ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof(s), h % UInt32) + h
end

"""
    reverse(s::AbstractString) -> AbstractString

Reverses a string. Technically, this function reverses the codepoints in a string and its
main utility is for reversed-order string processing, especially for reversed
regular-expression searches. See also [`reverseind`](@ref) to convert indices in `s` to
indices in `reverse(s)` and vice-versa, and `graphemes` from module `Unicode` to
operate on user-visible "characters" (graphemes) rather than codepoints.
See also [`Iterators.reverse`](@ref) for
reverse-order iteration without making a copy. Custom string types must implement the
`reverse` function themselves and should typically return a string with the same type
and encoding. If they return a string with a different encoding, they must also override
`reverseind` for that string type to satisfy `s[reverseind(s,i)] == reverse(s)[i]`.

# Examples
```jldoctest
julia> reverse("JuliaLang")
"gnaLailuJ"
```

!!! note
    The examples below may be rendered differently on different systems.
    The comments indicate how they're supposed to be rendered

Combining characters can lead to surprising results:

```jldoctest
julia> reverse("ax̂e") # hat is above x in the input, above e in the output
"êxa"

julia> using Unicode

julia> join(reverse(collect(graphemes("ax̂e")))) # reverses graphemes; hat is above x in both in- and output
"ex̂a"
```
"""
function reverse(s::Union{String,SubString{String}})::String
    # Read characters forwards from `s` and write backwards to `out`
    out = _string_n(sizeof(s))
    offs = sizeof(s) + 1
    for c in s
        offs -= ncodeunits(c)
        __unsafe_string!(out, c, offs)
    end
    return out
end

string(a::String)            = String(a)
string(a::SubString{String}) = String(a)

function Symbol(s::SubString{String})
    return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int), s, sizeof(s))
end

@inline function __unsafe_string!(out, c::Char, offs::Integer) # out is a (new) String (or StringVector)
    x = bswap(reinterpret(UInt32, c))
    n = ncodeunits(c)
    GC.@preserve out begin
        unsafe_store!(pointer(out, offs), x % UInt8)
        n == 1 && return n
        x >>= 8
        unsafe_store!(pointer(out, offs+1), x % UInt8)
        n == 2 && return n
        x >>= 8
        unsafe_store!(pointer(out, offs+2), x % UInt8)
        n == 3 && return n
        x >>= 8
        unsafe_store!(pointer(out, offs+3), x % UInt8)
    end
    return n
end

@assume_effects :nothrow @inline function __unsafe_string!(out, s::String, offs::Integer)
    n = sizeof(s)
    GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
    return n
end

@inline function __unsafe_string!(out, s::SubString{String}, offs::Integer)
    n = sizeof(s)
    GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
    return n
end

@assume_effects :nothrow @inline function __unsafe_string!(out, s::Symbol, offs::Integer)
    n = sizeof(s)
    GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
    return n
end

# nothrow needed here because for v in a can't prove the indexing is inbounds.
@assume_effects :foldable :nothrow string(a::Union{Char, String, Symbol}...) = _string(a...)

string(a::Union{Char, String, SubString{String}, Symbol}...) = _string(a...)

function _string(a::Union{Char, String, SubString{String}, Symbol}...)
    n = 0
    for v in a
        # 4 types is too many for automatic Union-splitting, so we split manually
        # and allow one specializable call site per concrete type
        if v isa Char
            n += ncodeunits(v)
        elseif v isa String
            n += sizeof(v)
        elseif v isa SubString{String}
            n += sizeof(v)
        else
            n += sizeof(v::Symbol)
        end
    end
    out = _string_n(n)
    offs = 1
    for v in a
        if v isa Char
            offs += __unsafe_string!(out, v, offs)
        elseif v isa String || v isa SubString{String}
            offs += __unsafe_string!(out, v, offs)
        else
            offs += __unsafe_string!(out, v::Symbol, offs)
        end
    end
    return out
end

# don't assume effects for general integers since we cannot know their implementation
# not nothrow because r<0 throws
@assume_effects :foldable repeat(s::String, r::BitInteger) = @invoke repeat(s::String, r::Integer)

function repeat(s::Union{String, SubString{String}}, r::Integer)
    r < 0 && throw(ArgumentError("can't repeat a string $r times"))
    r == 0 && return ""
    r == 1 && return String(s)
    n = sizeof(s)
    out = _string_n(n*r)
    if n == 1 # common case: repeating a single-byte string
        @inbounds b = codeunit(s, 1)
        memset(unsafe_convert(Ptr{UInt8}, out), b, r)
    else
        for i = 0:r-1
            GC.@preserve s out unsafe_copyto!(pointer(out, i*n+1), pointer(s), n)
        end
    end
    return out
end

function filter(f, s::Union{String, SubString{String}})
    out = StringVector(sizeof(s))
    offset = 1
    for c in s
        if f(c)
            offset += __unsafe_string!(out, c, offset)
        end
    end
    resize!(out, offset-1)
    sizehint!(out, offset-1)
    return String(out)
end

getindex(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
back to top