https://github.com/JuliaLang/julia
Revision 06a49a392db85e1f55a1810669559389e8ecf37c authored by Simeon David Schaub on 25 April 2022, 12:30:31 UTC, committed by Simeon David Schaub on 25 April 2022, 20:15:55 UTC
Not sure whether putting hints into the lowering pass like this is a great solution. I thought about maybe using Julia-side error hints for this instead, but using some kind of pattern-matching doesn't seem like a great solution either. fixes #45031
1 parent 9d14cb1
Tip revision: 06a49a392db85e1f55a1810669559389e8ecf37c authored by Simeon David Schaub on 25 April 2022, 12:30:31 UTC
improve error message for invalid function args
improve error message for invalid function args
Tip revision: 06a49a3
pcre.jl
# This file is a part of Julia. License is MIT: https://julialang.org/license
## low-level pcre2 interface ##
module PCRE
import ..RefValue
# include($BUILDROOT/base/pcre_h.jl)
include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl"))
const PCRE_LIB = "libpcre2-8"
function create_match_context()
JIT_STACK_START_SIZE = 32768
JIT_STACK_MAX_SIZE = 1048576
jit_stack = ccall((:pcre2_jit_stack_create_8, PCRE_LIB), Ptr{Cvoid},
(Csize_t, Csize_t, Ptr{Cvoid}),
JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, C_NULL)
ctx = ccall((:pcre2_match_context_create_8, PCRE_LIB),
Ptr{Cvoid}, (Ptr{Cvoid},), C_NULL)
ccall((:pcre2_jit_stack_assign_8, PCRE_LIB), Cvoid,
(Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}), ctx, C_NULL, jit_stack)
return ctx
end
THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
PCRE_COMPILE_LOCK = nothing
_tid() = Int(ccall(:jl_threadid, Int16, ())) + 1
_nth() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
function get_local_match_context()
tid = _tid()
ctxs = THREAD_MATCH_CONTEXTS
if length(ctxs) < tid
# slow path to allocate it
l = PCRE_COMPILE_LOCK::Threads.SpinLock
lock(l)
try
ctxs = THREAD_MATCH_CONTEXTS
if length(ctxs) < tid
global THREAD_MATCH_CONTEXTS = ctxs = copyto!(fill(C_NULL, _nth()), ctxs)
end
finally
unlock(l)
end
end
ctx = @inbounds ctxs[tid]
if ctx == C_NULL
# slow path to allocate it
ctx = create_match_context()
THREAD_MATCH_CONTEXTS[tid] = ctx
end
return ctx
end
# supported options for different use cases
# arguments to pcre2_compile
const COMPILE_MASK =
ALT_BSUX |
ALT_CIRCUMFLEX |
ALT_VERBNAMES |
ANCHORED |
# AUTO_CALLOUT |
CASELESS |
DOLLAR_ENDONLY |
DOTALL |
# DUPNAMES |
ENDANCHORED |
EXTENDED |
EXTENDED_MORE |
FIRSTLINE |
LITERAL |
MATCH_INVALID_UTF |
MATCH_UNSET_BACKREF |
MULTILINE |
NEVER_BACKSLASH_C |
NEVER_UCP |
NEVER_UTF |
NO_AUTO_CAPTURE |
NO_AUTO_POSSESS |
NO_DOTSTAR_ANCHOR |
NO_START_OPTIMIZE |
NO_UTF_CHECK |
UCP |
UNGREEDY |
USE_OFFSET_LIMIT |
UTF
# arguments to pcre2_set_newline
const COMPILE_NEWLINE_MASK = (
NEWLINE_CR,
NEWLINE_LF,
NEWLINE_CRLF,
NEWLINE_ANY,
NEWLINE_ANYCRLF,
NEWLINE_NUL)
# arguments to pcre2_set_compile_extra_options
const COMPILE_EXTRA_MASK =
EXTRA_ALLOW_SURROGATE_ESCAPES |
EXTRA_ALT_BSUX |
EXTRA_BAD_ESCAPE_IS_LITERAL |
EXTRA_ESCAPED_CR_IS_LF |
EXTRA_MATCH_LINE |
EXTRA_MATCH_WORD
# arguments to match
const EXECUTE_MASK =
# ANCHORED |
# COPY_MATCHED_SUBJECT |
# ENDANCHORED |
NOTBOL |
NOTEMPTY |
NOTEMPTY_ATSTART |
NOTEOL |
# NO_JIT |
NO_START_OPTIMIZE |
NO_UTF_CHECK |
PARTIAL_HARD |
PARTIAL_SOFT
const UNSET = ~Csize_t(0) # Indicates that an output vector element is unset
function info(regex::Ptr{Cvoid}, what::Integer, ::Type{T}) where T
buf = RefValue{T}()
ret = ccall((:pcre2_pattern_info_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, UInt32, Ptr{Cvoid}),
regex, what, buf)
if ret != 0
error(ret == ERROR_NULL ? "PCRE error: NULL regex object" :
ret == ERROR_BADMAGIC ? "PCRE error: invalid regex object" :
ret == ERROR_BADOPTION ? "PCRE error: invalid option flags" :
"PCRE error: unknown error ($ret)")
end
return buf[]
end
function ovec_length(match_data)
n = ccall((:pcre2_get_ovector_count_8, PCRE_LIB), UInt32,
(Ptr{Cvoid},), match_data)
return 2Int(n)
end
function ovec_ptr(match_data)
ptr = ccall((:pcre2_get_ovector_pointer_8, PCRE_LIB), Ptr{Csize_t},
(Ptr{Cvoid},), match_data)
return ptr
end
function compile(pattern::AbstractString, options::Integer)
if !(pattern isa Union{String,SubString{String}})
pattern = String(pattern)
end
errno = RefValue{Cint}(0)
erroff = RefValue{Csize_t}(0)
re_ptr = ccall((:pcre2_compile_8, PCRE_LIB), Ptr{Cvoid},
(Ptr{UInt8}, Csize_t, UInt32, Ref{Cint}, Ref{Csize_t}, Ptr{Cvoid}),
pattern, ncodeunits(pattern), options, errno, erroff, C_NULL)
if re_ptr == C_NULL
error("PCRE compilation error: $(err_message(errno[])) at offset $(erroff[])")
end
return re_ptr
end
function jit_compile(regex::Ptr{Cvoid})
errno = ccall((:pcre2_jit_compile_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, UInt32), regex, JIT_COMPLETE)
errno == 0 && return true
errno == ERROR_JIT_BADOPTION && return false
error("PCRE JIT error: $(err_message(errno))")
end
free_match_data(match_data) =
ccall((:pcre2_match_data_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), match_data)
free_re(re) =
ccall((:pcre2_code_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), re)
free_jit_stack(stack) =
ccall((:pcre2_jit_stack_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), stack)
free_match_context(context) =
ccall((:pcre2_match_context_free_8, PCRE_LIB), Cvoid, (Ptr{Cvoid},), context)
function err_message(errno::Integer)
buffer = Vector{UInt8}(undef, 1024)
ret = ccall((:pcre2_get_error_message_8, PCRE_LIB), Cint,
(Cint, Ptr{UInt8}, Csize_t), errno, buffer, length(buffer))
ret == ERROR_BADDATA && error("PCRE error: invalid errno ($errno)")
# TODO: seems like there should be a better way to get this string
return GC.@preserve buffer unsafe_string(pointer(buffer))
end
function exec(re, subject, offset, options, match_data)
if !(subject isa Union{String,SubString{String}})
subject = String(subject)
end
rc = ccall((:pcre2_match_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, UInt32, Ptr{Cvoid}, Ptr{Cvoid}),
re, subject, ncodeunits(subject), offset, options, match_data, get_local_match_context())
# rc == -1 means no match, -2 means partial match.
rc < -2 && error("PCRE.exec error: $(err_message(rc))")
return rc >= 0
end
function exec_r(re, subject, offset, options)
match_data = create_match_data(re)
ans = exec(re, subject, offset, options, match_data)
free_match_data(match_data)
return ans
end
function exec_r_data(re, subject, offset, options)
match_data = create_match_data(re)
ans = exec(re, subject, offset, options, match_data)
return ans, match_data
end
function create_match_data(re)
p = ccall((:pcre2_match_data_create_from_pattern_8, PCRE_LIB),
Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}), re, C_NULL)
p == C_NULL && error("PCRE error: could not allocate memory")
return p
end
function substring_number_from_name(re, name)
n = ccall((:pcre2_substring_number_from_name_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, Cstring), re, name)
return Int(n)
end
function substring_length_bynumber(match_data, number)
s = RefValue{Csize_t}()
rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, Cint, Ref{Csize_t}), match_data, number, s)
if rc < 0
rc == ERROR_UNSET && return 0
error("PCRE error: $(err_message(rc))")
end
return Int(s[])
end
function substring_copy_bynumber(match_data, number, buf, buf_size)
s = RefValue{Csize_t}(buf_size)
rc = ccall((:pcre2_substring_copy_bynumber_8, PCRE_LIB), Cint,
(Ptr{Cvoid}, UInt32, Ptr{UInt8}, Ref{Csize_t}),
match_data, number, buf, s)
rc < 0 && error("PCRE error: $(err_message(rc))")
return Int(s[])
end
function capture_names(re)
name_count = info(re, INFO_NAMECOUNT, UInt32)
name_entry_size = info(re, INFO_NAMEENTRYSIZE, UInt32)
nametable_ptr = info(re, INFO_NAMETABLE, Ptr{UInt8})
names = Dict{Int,String}()
for i = 1:name_count
offset = (i-1)*name_entry_size + 1
# The capture group index corresponding to name 'i' is stored as a
# big-endian 16-bit value.
high_byte = UInt16(unsafe_load(nametable_ptr, offset))
low_byte = UInt16(unsafe_load(nametable_ptr, offset+1))
idx = (high_byte << 8) | low_byte
# The capture group name is a null-terminated string located directly
# after the index.
names[idx] = unsafe_string(nametable_ptr+offset+1)
end
return names
end
end # module
Computing file changes ...