https://github.com/JuliaLang/julia
Raw File
Tip revision: 1b4bfa298731c17f691044b5ec879676b9963afd authored by Keno Fischer on 04 October 2023, 16:07:06 UTC
WIP
Tip revision: 1b4bfa2
shell.jl
# This file is a part of Julia. License is MIT: https://julialang.org/license

## shell-like command parsing ##

const shell_special = "#{}()[]<>|&*?~;"

# strips the end but respects the space when the string ends with "\\ "
function rstrip_shell(s::AbstractString)
    c_old = nothing
    for (i, c) in Iterators.reverse(pairs(s))
        i::Int; c::AbstractChar
        ((c == '\\') && c_old == ' ') && return SubString(s, 1, i+1)
        isspace(c) || return SubString(s, 1, i)
        c_old = c
    end
    SubString(s, 1, 0)
end

function shell_parse(str::AbstractString, interpolate::Bool=true;
                     special::AbstractString="", filename="none")
    s = SubString(str, firstindex(str))
    s = rstrip_shell(lstrip(s))

    # N.B.: This is used by REPLCompletions
    last_parse = 0:-1
    isempty(s) && return interpolate ? (Expr(:tuple,:()),last_parse) : ([],last_parse)

    in_single_quotes = false
    in_double_quotes = false

    args = []
    arg = []
    i = firstindex(s)
    st = Iterators.Stateful(pairs(s))

    function push_nonempty!(list, x)
        if !isa(x,AbstractString) || !isempty(x)
            push!(list, x)
        end
        return nothing
    end
    function consume_upto!(list, s, i, j)
        push_nonempty!(list, s[i:prevind(s, j)::Int])
        something(peek(st), lastindex(s)::Int+1 => '\0').first::Int
    end
    function append_2to1!(list, innerlist)
        if isempty(innerlist); push!(innerlist, ""); end
        push!(list, copy(innerlist))
        empty!(innerlist)
    end

    C = eltype(str)
    P = Pair{Int,C}
    for (j, c) in st
        j, c = j::Int, c::C
        if !in_single_quotes && !in_double_quotes && isspace(c)
            i = consume_upto!(arg, s, i, j)
            append_2to1!(args, arg)
            while !isempty(st)
                # We've made sure above that we don't end in whitespace,
                # so updating `i` here is ok
                (i, c) = peek(st)::P
                isspace(c) || break
                popfirst!(st)
            end
        elseif interpolate && !in_single_quotes && c == '$'
            i = consume_upto!(arg, s, i, j)
            isempty(st) && error("\$ right before end of command")
            stpos, c = popfirst!(st)::P
            isspace(c) && error("space not allowed right after \$")
            if startswith(SubString(s, stpos), "var\"")
                # Disallow var"#" syntax in cmd interpolations.
                # TODO: Allow only identifiers after the $ for consistency with
                # string interpolation syntax (see #3150)
                ex, j = :var, stpos+3
            else
                # use parseatom instead of parse to respect filename (#28188)
                ex, j = Meta.parseatom(s, stpos, filename=filename)
            end
            last_parse = (stpos:prevind(s, j)) .+ s.offset
            push_nonempty!(arg, ex)
            s = SubString(s, j)
            Iterators.reset!(st, pairs(s))
            i = firstindex(s)
        else
            if !in_double_quotes && c == '\''
                in_single_quotes = !in_single_quotes
                i = consume_upto!(arg, s, i, j)
            elseif !in_single_quotes && c == '"'
                in_double_quotes = !in_double_quotes
                i = consume_upto!(arg, s, i, j)
            elseif !in_single_quotes && c == '\\'
                if !isempty(st) && (peek(st)::P)[2] in ('\n', '\r')
                    i = consume_upto!(arg, s, i, j) + 1
                    if popfirst!(st)[2] == '\r' && (peek(st)::P)[2] == '\n'
                        i += 1
                        popfirst!(st)
                    end
                    while !isempty(st) && (peek(st)::P)[2] in (' ', '\t')
                        i = nextind(str, i)
                        _ = popfirst!(st)
                    end
                elseif in_double_quotes
                    isempty(st) && error("unterminated double quote")
                    k, c′ = peek(st)::P
                    if c′ == '"' || c′ == '$' || c′ == '\\'
                        i = consume_upto!(arg, s, i, j)
                        _ = popfirst!(st)
                    end
                else
                    isempty(st) && error("dangling backslash")
                    i = consume_upto!(arg, s, i, j)
                    _ = popfirst!(st)
                end
            elseif !in_single_quotes && !in_double_quotes && c in special
                error("parsing command `$str`: special characters \"$special\" must be quoted in commands")
            end
        end
    end

    if in_single_quotes; error("unterminated single quote"); end
    if in_double_quotes; error("unterminated double quote"); end

    push_nonempty!(arg, s[i:end])
    append_2to1!(args, arg)

    interpolate || return args, last_parse

    # construct an expression
    ex = Expr(:tuple)
    for arg in args
        push!(ex.args, Expr(:tuple, arg...))
    end
    return ex, last_parse
end

function shell_split(s::AbstractString)
    parsed = shell_parse(s, false)[1]
    args = String[]
    for arg in parsed
        push!(args, string(arg...))
    end
    args
end

function print_shell_word(io::IO, word::AbstractString, special::AbstractString = "")
    has_single = false
    has_special = false
    for c in word
        if isspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$' || c in special
            has_special = true
            if c == '\''
                has_single = true
            end
        end
    end
    if isempty(word)
        print(io, "''")
    elseif !has_special
        print(io, word)
    elseif !has_single
        print(io, '\'', word, '\'')
    else
        print(io, '"')
        for c in word
            if c == '"' || c == '$'
                print(io, '\\')
            end
            print(io, c)
        end
        print(io, '"')
    end
    nothing
end

function print_shell_escaped(io::IO, cmd::AbstractString, args::AbstractString...;
                             special::AbstractString="")
    print_shell_word(io, cmd, special)
    for arg in args
        print(io, ' ')
        print_shell_word(io, arg, special)
    end
end
print_shell_escaped(io::IO; special::String="") = nothing

"""
    shell_escape(args::Union{Cmd,AbstractString...}; special::AbstractString="")

The unexported `shell_escape` function is the inverse of the unexported `shell_split` function:
it takes a string or command object and escapes any special characters in such a way that calling
`shell_split` on it would give back the array of words in the original command. The `special`
keyword argument controls what characters in addition to whitespace, backslashes, quotes and
dollar signs are considered to be special (default: none).

# Examples
```jldoctest
julia> Base.shell_escape("cat", "/foo/bar baz", "&&", "echo", "done")
"cat '/foo/bar baz' && echo done"

julia> Base.shell_escape("echo", "this", "&&", "that")
"echo this && that"
```
"""
shell_escape(args::AbstractString...; special::AbstractString="") =
    sprint((io, args...) -> print_shell_escaped(io, args..., special=special), args...)


function print_shell_escaped_posixly(io::IO, args::AbstractString...)
    first = true
    for arg in args
        first || print(io, ' ')
        # avoid printing quotes around simple enough strings
        # that any (reasonable) shell will definitely never consider them to be special
        have_single::Bool = false
        have_double::Bool = false
        function isword(c::AbstractChar)
            if '0' <= c <= '9' || 'a' <= c <= 'z' || 'A' <= c <= 'Z'
                # word characters
            elseif c == '_' || c == '/' || c == '+' || c == '-'
                # other common characters
            elseif c == '\''
                have_single = true
            elseif c == '"'
                have_double && return false # switch to single quoting
                have_double = true
            elseif !first && c == '='
                # equals is special if it is first (e.g. `env=val ./cmd`)
            else
                # anything else
                return false
            end
            return true
        end
        if isempty(arg)
            print(io, "''")
        elseif all(isword, arg)
            have_single && (arg = replace(arg, '\'' => "\\'"))
            have_double && (arg = replace(arg, '"' => "\\\""))
            print(io, arg)
        else
            print(io, '\'', replace(arg, '\'' => "'\\''"), '\'')
        end
        first = false
    end
end

"""
    shell_escape_posixly(args::Union{Cmd,AbstractString...})

The unexported `shell_escape_posixly` function
takes a string or command object and escapes any special characters in such a way that
it is safe to pass it as an argument to a posix shell.

# Examples
```jldoctest
julia> Base.shell_escape_posixly("cat", "/foo/bar baz", "&&", "echo", "done")
"cat '/foo/bar baz' '&&' echo done"

julia> Base.shell_escape_posixly("echo", "this", "&&", "that")
"echo this '&&' that"
```
"""
shell_escape_posixly(args::AbstractString...) =
    sprint(print_shell_escaped_posixly, args...)

"""
    shell_escape_csh(args::Union{Cmd,AbstractString...})
    shell_escape_csh(io::IO, args::Union{Cmd,AbstractString...})

This function quotes any metacharacters in the string arguments such
that the string returned can be inserted into a command-line for
interpretation by the Unix C shell (csh, tcsh), where each string
argument will form one word.

In contrast to a POSIX shell, csh does not support the use of the
backslash as a general escape character in double-quoted strings.
Therefore, this function wraps strings that might contain
metacharacters in single quotes, except for parts that contain single
quotes, which it wraps in double quotes instead. It switches between
these types of quotes as needed. Linefeed characters are escaped with
a backslash.

This function should also work for a POSIX shell, except if the input
string contains a linefeed (`"\\n"`) character.

See also: [`shell_escape_posixly`](@ref)
"""
function shell_escape_csh(io::IO, args::AbstractString...)
    first = true
    for arg in args
        first || write(io, ' ')
        first = false
        i = 1
        while true
            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z"sa => "",
                         r"^[^']*\z"sa => "'", r"^[^\$\`\"]*\z"sa => "\"",
                         r"^[^']+"sa  => "'", r"^[^\$\`\"]+"sa  => "\"")
                if ((m = match(r, SubString(arg, i))) !== nothing)
                    write(io, e)
                    write(io, replace(m.match, '\n' => "\\\n"))
                    write(io, e)
                    i += ncodeunits(m.match)
                    break
                end
            end
            i <= lastindex(arg) || break
        end
    end
end
shell_escape_csh(args::AbstractString...) =
    sprint(shell_escape_csh, args...;
           sizehint = sum(sizeof.(args)) + length(args) * 3)

"""
    shell_escape_wincmd(s::AbstractString)
    shell_escape_wincmd(io::IO, s::AbstractString)

The unexported `shell_escape_wincmd` function escapes Windows `cmd.exe` shell
meta characters. It escapes `()!^<>&|` by placing a `^` in front. An `@` is
only escaped at the start of the string. Pairs of `"` characters and the
strings they enclose are passed through unescaped. Any remaining `"` is escaped
with `^` to ensure that the number of unescaped `"` characters in the result
remains even.

Since `cmd.exe` substitutes variable references (like `%USER%`) _before_
processing the escape characters `^` and `"`, this function makes no attempt to
escape the percent sign (`%`), the presence of `%` in the input may cause
severe breakage, depending on where the result is used.

Input strings with ASCII control characters that cannot be escaped (NUL, CR,
LF) will cause an `ArgumentError` exception.

The result is safe to pass as an argument to a command call being processed by
`CMD.exe /S /C " ... "` (with surrounding double-quote pair) and will be
received verbatim by the target application if the input does not contain `%`
(else this function will fail with an ArgumentError). The presence of `%` in
the input string may result in command injection vulnerabilities and may
invalidate any claim of suitability of the output of this function for use as
an argument to cmd (due to the ordering described above), so use caution when
assembling a string from various sources.

This function may be useful in concert with the `windows_verbatim` flag to
[`Cmd`](@ref) when constructing process pipelines.

```julia
wincmd(c::String) =
   run(Cmd(Cmd(["cmd.exe", "/s /c \\" \$c \\""]);
           windows_verbatim=true))
wincmd_echo(s::String) =
   wincmd("echo " * Base.shell_escape_wincmd(s))
wincmd_echo("hello \$(ENV["USERNAME"]) & the \\"whole\\" world! (=^I^=)")
```

But take note that if the input string `s` contains a `%`, the argument list
and echo'ed text may get corrupted, resulting in arbitrary command execution.
The argument can alternatively be passed as an environment variable, which
avoids the problem with `%` and the need for the `windows_verbatim` flag:

```julia
cmdargs = Base.shell_escape_wincmd("Passing args with %cmdargs% works 100%!")
run(setenv(`cmd /C echo %cmdargs%`, "cmdargs" => cmdargs))
```

!!! warning
    The argument parsing done by CMD when calling batch files (either inside
    `.bat` files or as arguments to them) is not fully compatible with the
    output of this function. In particular, the processing of `%` is different.

!!! important
    Due to a peculiar behavior of the CMD parser/interpreter, each command
    after a literal `|` character (indicating a command pipeline) must have
    `shell_escape_wincmd` applied twice since it will be parsed twice by CMD.
    This implies ENV variables would also be expanded twice!
    For example:
    ```julia
    to_print = "All for 1 & 1 for all!"
    to_print_esc = Base.shell_escape_wincmd(Base.shell_escape_wincmd(to_print))
    run(Cmd(Cmd(["cmd", "/S /C \\" break | echo \$(to_print_esc) \\""]), windows_verbatim=true))
    ```

With an I/O stream parameter `io`, the result will be written there,
rather than returned as a string.

See also [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref).

# Example
```jldoctest
julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"")
"a^^\\"^o\\"^^u^\\""
```
"""
function shell_escape_wincmd(io::IO, s::AbstractString)
    # https://stackoverflow.com/a/4095133/1990689
    occursin(r"[\r\n\0]"sa, s) &&
        throw(ArgumentError("control character unsupported by CMD.EXE"))
    i = 1
    len = ncodeunits(s)
    if len > 0 && s[1] == '@'
        write(io, '^')
    end
    while i <= len
        c = s[i]
        if c == '"' && (j = findnext('"', s, nextind(s,i))) !== nothing
            write(io, SubString(s,i,j))
            i = j
        else
            if c in ('"', '(', ')', '!', '^', '<', '>', '&', '|')
                write(io, '^', c)
            else
                write(io, c)
            end
        end
        i = nextind(s,i)
    end
end
shell_escape_wincmd(s::AbstractString) = sprint(shell_escape_wincmd, s;
                                                sizehint = 2*sizeof(s))

"""
    escape_microsoft_c_args(args::Union{Cmd,AbstractString...})
    escape_microsoft_c_args(io::IO, args::Union{Cmd,AbstractString...})

Convert a collection of string arguments into a string that can be
passed to many Windows command-line applications.

Microsoft Windows passes the entire command line as a single string to
the application (unlike POSIX systems, where the shell splits the
command line into a list of arguments). Many Windows API applications
(including julia.exe), use the conventions of the [Microsoft C/C++
runtime](https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments)
to split that command line into a list of strings.

This function implements an inverse for a parser compatible with these rules.
It joins command-line arguments to be passed to a Windows
C/C++/Julia application into a command line, escaping or quoting the
meta characters space, TAB, double quote and backslash where needed.

See also [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref).
"""
function escape_microsoft_c_args(io::IO, args::AbstractString...)
    # http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
    first = true
    for arg in args
        if first
            first = false
        else
            write(io, ' ')  # separator
        end
        if isempty(arg) || occursin(r"[ \t\"]"sa, arg)
            # Julia raw strings happen to use the same escaping convention
            # as the argv[] parser in Microsoft's C runtime library.
            write(io, '"')
            escape_raw_string(io, arg)
            write(io, '"')
        else
            write(io, arg)
        end
    end
end
escape_microsoft_c_args(args::AbstractString...) =
    sprint(escape_microsoft_c_args, args...;
           sizehint = (sum(sizeof.(args)) + 3*length(args)))
back to top