Revision 3711749292ba9c29ad2e3b9eaee90995f8c8290a authored by Keno Fischer on 11 October 2023, 14:41:22 UTC, committed by GitHub on 11 October 2023, 14:41:22 UTC
This should be NFC and is intended to allow the optimizer to delete :enter statements (by replacing them with `nothing`), without leaving dangling `:leave`s around. This is accomplished by having `leave` take (a variable number of) `:enter` tokens (that are already being used by `:pop_exception`). The semantics are that a literal `nothing` or an SSAValue pointing to a `nothing` statement are ignored, and one exception handler is popped for each remaining argument. The actual value of the token is ignored, except that the verifier asserts that it belongs to an `:enter`. Note that we don't need to do the same for :pop_exception, because the token generated by an `:enter` is semantically only in scope for :pop_exception during its catch block. If we determine the `:enter` is dead, then its catch block is guaranteed to not be executed and will be deleted wholesale by cfg liveness. I was considering doing something fancier where :leave is changed back to taking an integer after optimization, but the case where the IR size is bigger after this change (when we are `:leave`ing many handlers) is fairly rare and likely not worth the additional complexity or time cost to do anything special. If it does show up in size benchmarks, I'd rather give `:leave` a special, compact encoding.
1 parent 8180240
pcre.jl
# This file is a part of Julia. License is MIT: https://julialang.org/license
## low-level pcre2 interface ##
module PCRE
import ..RefValue
# include($BUILDROOT/base/pcre_h.jl)
include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl"))
const PCRE_LIB = "libpcre2-8"
function create_match_context()
JIT_STACK_START_SIZE = 32768
JIT_STACK_MAX_SIZE = 1048576
jit_stack = ccall((:pcre2_jit_stack_create_8, PCRE_LIB), Ptr{Cvoid},
(Csize_t, Csize_t, Ptr{Cvoid}),
JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, C_NULL)
ctx = ccall((:pcre2_match_context_create_8, PCRE_LIB),
Ptr{Cvoid}, (Ptr{Cvoid},), C_NULL)
ccall((:pcre2_jit_stack_assign_8, PCRE_LIB), Cvoid,
(Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}), ctx, C_NULL, jit_stack)
return ctx
end
THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
PCRE_COMPILE_LOCK = nothing
_tid() = Int(ccall(:jl_threadid, Int16, ())) + 1
_mth() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
function get_local_match_context()
tid = _tid()
ctxs = THREAD_MATCH_CONTEXTS
if length(ctxs) < tid
# slow path to allocate it
l = PCRE_COMPILE_LOCK::Threads.SpinLock
lock(l)
try
ctxs = THREAD_MATCH_CONTEXTS
if length(ctxs) < tid
global THREAD_MATCH_CONTEXTS = ctxs = copyto!(fill(C_NULL, length(ctxs) + _mth()), ctxs)
end
finally
unlock(l)
end
end
ctx = @inbounds ctxs[tid]
if ctx == C_NULL
# slow path to allocate it
ctx = create_match_context()
THREAD_MATCH_CONTEXTS[tid] = ctx
end
return ctx
end
# supported options for different use cases
# arguments to pcre2_compile
const COMPILE_MASK =
ALT_BSUX |
ALT_CIRCUMFLEX |
ALT_VERBNAMES |
ANCHORED |
# AUTO_CALLOUT |
CASELESS |
DOLLAR_ENDONLY |
DOTALL |
# DUPNAMES |
ENDANCHORED |
EXTENDED |
EXTENDED_MORE |
FIRSTLINE |
LITERAL |
MATCH_INVALID_UTF |
MATCH_UNSET_BACKREF |
MULTILINE |
NEVER_BACKSLASH_C |
NEVER_UCP |
NEVER_UTF |
NO_AUTO_CAPTURE |
NO_AUTO_POSSESS |
NO_DOTSTAR_ANCHOR |
NO_START_OPTIMIZE |
NO_UTF_CHECK |
UCP |
UNGREEDY |
USE_OFFSET_LIMIT |
UTF
# arguments to pcre2_set_newline
const COMPILE_NEWLINE_MASK = (
NEWLINE_CR,
NEWLINE_LF,
NEWLINE_CRLF,
NEWLINE_ANY,
NEWLINE_ANYCRLF,
NEWLINE_NUL)
# arguments to pcre2_set_compile_extra_options
const COMPILE_EXTRA_MASK =
EXTRA_ALLOW_SURROGATE_ESCAPES |
EXTRA_ALT_BSUX |
EXTRA_BAD_ESCAPE_IS_LITERAL |
EXTRA_ESCAPED_CR_IS_LF |
EXTRA_MATCH_LINE |
EXTRA_MATCH_WORD
# arguments to match
const EXECUTE_MASK =
# ANCHORED |
# COPY_MATCHED_SUBJECT |
# ENDANCHORED |
NOTBOL |
NOTEMPTY |
NOTEMPTY_ATSTART |
NOTEOL |
# NO_JIT |
NO_START_OPTIMIZE |
NO_UTF_CHECK |
PARTIAL_HARD |
PARTIAL_SOFT
const UNSET = ~Csize_t(0) # Indicates that an output vector element is unset
function info(regex::Ptr{Cvoid}, what::Integer, ::Type{T}) where T
buf = RefValue{T}()
ret = ccall((:pcre2_pattern_info_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, UInt32, Ptr{Cvoid}),
regex, what, buf)
if ret != 0
error(ret == ERROR_NULL ? "PCRE error: NULL regex object" :
ret == ERROR_BADMAGIC ? "PCRE error: invalid regex object" :
ret == ERROR_BADOPTION ? "PCRE error: invalid option flags" :
"PCRE error: unknown error ($ret)")
end
return buf[]
end
function ovec_length(match_data)
n = ccall((:pcre2_get_ovector_count_8, PCRE_LIB), UInt32,
(Ptr{Cvoid},), match_data)
return 2Int(n)
end
function ovec_ptr(match_data)
ptr = ccall((:pcre2_get_ovector_pointer_8, PCRE_LIB), Ptr{Csize_t},
(Ptr{Cvoid},), match_data)
return ptr
end
function compile(pattern::AbstractString, options::Integer)
if !(pattern isa Union{String,SubString{String}})
pattern = String(pattern)
end
errno = RefValue{Cint}(0)
erroff = RefValue{Csize_t}(0)
re_ptr = ccall((:pcre2_compile_8, PCRE_LIB), Ptr{Cvoid},
(Ptr{UInt8}, Csize_t, UInt32, Ref{Cint}, Ref{Csize_t}, Ptr{Cvoid}),
pattern, ncodeunits(pattern), options, errno, erroff, C_NULL)
if re_ptr == C_NULL
error("PCRE compilation error: $(err_message(errno[])) at offset $(erroff[])")
end
return re_ptr
end
function jit_compile(regex::Ptr{Cvoid})
errno = ccall((:pcre2_jit_compile_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, UInt32), regex, JIT_COMPLETE)
errno == 0 && return true
errno == ERROR_JIT_BADOPTION && return false
error("PCRE JIT error: $(err_message(errno))")
end
free_match_data(match_data) =
ccall((:pcre2_match_data_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), match_data)
free_re(re) =
ccall((:pcre2_code_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), re)
free_jit_stack(stack) =
ccall((:pcre2_jit_stack_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), stack)
free_match_context(context) =
ccall((:pcre2_match_context_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), context)
function err_message(errno::Integer)
buffer = Vector{UInt8}(undef, 1024)
ret = ccall((:pcre2_get_error_message_8, PCRE_LIB), Cint,
(Cint, Ptr{UInt8}, Csize_t), errno, buffer, length(buffer))
ret == ERROR_BADDATA && error("PCRE error: invalid errno ($errno)")
# TODO: seems like there should be a better way to get this string
return GC.@preserve buffer unsafe_string(pointer(buffer))
end
exec(re, subject::Union{String,SubString{String}}, offset, options, match_data) =
_exec(re, subject, offset, options, match_data)
exec(re, subject, offset, options, match_data) =
_exec(re, String(subject), offset, options, match_data)
function _exec(re, subject, offset, options, match_data)
rc = ccall((:pcre2_match_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, UInt32, Ptr{Cvoid}, Ptr{Cvoid}),
re, subject, ncodeunits(subject), offset, options, match_data, get_local_match_context())
# rc == -1 means no match, -2 means partial match.
rc < -2 && error("PCRE.exec error: $(err_message(rc))")
return rc >= 0
end
function exec_r(re, subject, offset, options)
match_data = create_match_data(re)
ans = exec(re, subject, offset, options, match_data)
free_match_data(match_data)
return ans
end
function exec_r_data(re, subject, offset, options)
match_data = create_match_data(re)
ans = exec(re, subject, offset, options, match_data)
return ans, match_data
end
function create_match_data(re)
p = ccall((:pcre2_match_data_create_from_pattern_8, PCRE_LIB),
Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}), re, C_NULL)
p == C_NULL && error("PCRE error: could not allocate memory")
return p
end
function substring_number_from_name(re, name)
n = ccall((:pcre2_substring_number_from_name_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, Cstring), re, name)
return Int(n)
end
function substring_length_bynumber(match_data, number)
s = RefValue{Csize_t}()
rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, Cint, Ref{Csize_t}), match_data, number, s)
if rc < 0
rc == ERROR_UNSET && return 0
error("PCRE error: $(err_message(rc))")
end
return Int(s[])
end
function substring_copy_bynumber(match_data, number, buf, buf_size)
s = RefValue{Csize_t}(buf_size)
rc = ccall((:pcre2_substring_copy_bynumber_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, UInt32, Ptr{UInt8}, Ref{Csize_t}),
match_data, number, buf, s)
rc < 0 && error("PCRE error: $(err_message(rc))")
return Int(s[])
end
function capture_names(re)
name_count = info(re, INFO_NAMECOUNT, UInt32)
name_entry_size = info(re, INFO_NAMEENTRYSIZE, UInt32)
nametable_ptr = info(re, INFO_NAMETABLE, Ptr{UInt8})
names = Dict{Int,String}()
for i = 1:name_count
offset = (i-1)*name_entry_size + 1
# The capture group index corresponding to name 'i' is stored as a
# big-endian 16-bit value.
high_byte = UInt16(unsafe_load(nametable_ptr, offset))
low_byte = UInt16(unsafe_load(nametable_ptr, offset+1))
idx = (high_byte << 8) | low_byte
# The capture group name is a null-terminated string located directly
# after the index.
names[idx] = unsafe_string(nametable_ptr+offset+1)
end
return names
end
end # module
Computing file changes ...