Content - 75c0ff16b327b15e00a49ce09834ed5ef4a45d91 - 9c4dbb1/base/compiler/optimize.jl

visit type:
Tip revision: 6446a706d20c610e0711d8feeeb2faeee0459aac authored by Valentin Churavy on 26 February 2018, 02:06:45 UTC
backport patch for D33311
Tip revision: 6446a70
optimize.jl
# This file is a part of Julia. License is MIT: https://julialang.org/license

#####################
# OptimizationState #
#####################

mutable struct OptimizationState
    linfo::MethodInstance
    vararg_type_container #::Type
    backedges::Vector{Any}
    src::CodeInfo
    mod::Module
    nargs::Int
    next_label::Int # index of the current highest label for this function
    min_valid::UInt
    max_valid::UInt
    params::Params
    function OptimizationState(frame::InferenceState)
        s_edges = frame.stmt_edges[1]
        if s_edges === ()
            s_edges = []
            frame.stmt_edges[1] = s_edges
        end
        next_label = label_counter(frame.src.code) + 1
        return new(frame.linfo, frame.vararg_type_container,
                   s_edges::Vector{Any},
                   frame.src, frame.mod, frame.nargs,
                   next_label, frame.min_valid, frame.max_valid,
                   frame.params)
    end
    function OptimizationState(linfo::MethodInstance, src::CodeInfo,
                               params::Params)
        # prepare src for running optimization passes
        # if it isn't already
        nssavalues = src.ssavaluetypes
        if nssavalues isa Int
            src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
        end
        if src.slottypes === nothing
            nslots = length(src.slotnames)
            src.slottypes = Any[ Any for i = 1:nslots ]
        end
        s_edges = []
        # cache some useful state computations
        toplevel = !isa(linfo.def, Method)
        if !toplevel
            meth = linfo.def
            inmodule = meth.module
            nargs = meth.nargs
        else
            inmodule = linfo.def::Module
            nargs = 0
        end
        next_label = label_counter(src.code) + 1
        vararg_type_container = nothing # if you want something more accurate, set it yourself :P
        return new(linfo, vararg_type_container,
                   s_edges::Vector{Any},
                   src, inmodule, nargs,
                   next_label,
                   min_world(linfo), max_world(linfo),
                   params)
    end
end

function OptimizationState(linfo::MethodInstance, params::Params)
    src = retrieve_code_info(linfo)
    src === nothing && return nothing
    return OptimizationState(linfo, src, params)
end

_topmod(sv::OptimizationState) = _topmod(sv.mod)

genlabel(sv::OptimizationState) = LabelNode(sv.next_label += 1)

function newvar!(sv::OptimizationState, @nospecialize(typ))
    id = length(sv.src.ssavaluetypes)
    push!(sv.src.ssavaluetypes, typ)
    return SSAValue(id)
end

function update_valid_age!(min_valid::UInt, max_valid::UInt, sv::OptimizationState)
    sv.min_valid = max(sv.min_valid, min_valid)
    sv.max_valid = min(sv.max_valid, max_valid)
    @assert(!isa(sv.linfo.def, Method) ||
            (sv.min_valid == typemax(UInt) && sv.max_valid == typemin(UInt)) ||
            sv.min_valid <= sv.params.world <= sv.max_valid,
            "invalid age range update")
    nothing
end

update_valid_age!(li::MethodInstance, sv::OptimizationState) = update_valid_age!(min_world(li), max_world(li), sv)

function add_backedge!(li::MethodInstance, caller::OptimizationState)
    isa(caller.linfo.def, Method) || return # don't add backedges to toplevel exprs
    push!(caller.backedges, li)
    update_valid_age!(li, caller)
    nothing
end

function is_specializable_vararg_slot(@nospecialize(arg), sv::OptimizationState)
    return (isa(arg, Slot) && slot_id(arg) == sv.nargs &&
            isa(sv.vararg_type_container, DataType))
end

###########
# structs #
###########

# This struct contains information about a use of certain value (`SSAValue` or `Slot`)
# This might be a toplevel use for `Slot` in which case the `expr` field is `#undef`,
# and it only happens if the slot might be used before it's defined.
struct ValueUse
    # The statement array where `expr` (or its parent assignment) appears in
    stmts::Vector{Any}
    # The position of the `expr` in the `stmts` array
    stmtidx::Int

    # The position the value appears in `expr`
    exidx::Int
    # The expression the value is used in.
    # If `expr` is undef, the use is a statement level use.
    # This must be one of the following:
    # 1. A statement level `Expr(:(=), ...)`.
    # 2. The RHS of a statement level `Expr(:(=), ...)`.
    # 3. A `&` ccall argument.
    expr::Expr

    ValueUse(stmts, stmtidx, expr, exidx) = new(stmts, stmtidx, exidx, expr)
    ValueUse(stmts, stmtidx) = new(stmts, stmtidx, 0)
end

# This struct contains information about a def of certain value (`SSAValue` or `Slot`)
# The `assign` field is usually an assignment but it might be a `NewvarNode` for `Slot`,
# which can happen if the slot might be used before it's defined.
struct ValueDef
    assign::Union{Expr,NewvarNode}
    # The statement array where `expr` (or its parent assignment) appears in
    stmts::Vector{Any}
    # The position of the `expr` in the `stmts` array
    stmtidx::Int
end

# Uses and defs of a value.
mutable struct ValueInfo
    uses::Vector{ValueUse}
    defs::Vector{ValueDef}
    has_method::Bool
    ValueInfo() = new(EMPTY_USES, EMPTY_DEFS, false)
end

# The def and use information of all variables in a function.
# `slots` is indexed by slot id and `ssas` is indexed by ssa id + 1.
# This structure is indexable by either a `Slot`, a `SSAValue` or a `id=>is_ssa` pair.
struct ValueInfoMap
    slots::Vector{ValueInfo}
    ssas::Vector{ValueInfo}
    ValueInfoMap() = new(ValueInfo[], ValueInfo[])
end

struct StructInfo
    defs::Vector{Any}
    names::Vector{Symbol}
    typ::DataType
    mutable::Bool
    isnew::Bool
end

struct InvokeData
    mt::Core.MethodTable
    entry::Core.TypeMapEntry
    types0
    fexpr
    texpr
end

struct AllocOptContext
    infomap::ValueInfoMap
    sv::OptimizationState
    todo::IdDict{Any,Any}
    changes::IdDict{Any,Any}
    sym_count::IdDict{Any,Any}
    all_fld::IdDict{Any,Any}
    setfield_typ::IdDict{Any,Any}
    undef_fld::IdDict{Any,Any}
    structinfos::Vector{StructInfo}
    function AllocOptContext(infomap::ValueInfoMap, sv::OptimizationState)
        todo = IdDict()
        for i in 1:length(infomap.ssas)
            isassigned(infomap.ssas, i) || continue
            todo[i=>true] = nothing
        end
        for i in 1:length(infomap.slots)
            isassigned(infomap.slots, i) || continue
            i > sv.nargs || continue
            todo[i=>false] = nothing
        end
        return new(infomap, sv, todo, IdDict(), IdDict(),
                   IdDict(), IdDict(), IdDict(), StructInfo[])
    end
end

#############
# constants #
#############

# The slot has uses that are not statically dominated by any assignment
# This is implied by `SLOT_USEDUNDEF`.
# If this is not set, all the uses are (statically) dominated by the defs.
# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
const SLOT_STATICUNDEF  = 1

const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once

const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError

# const SLOT_CALLED      = 64

# known affect-free calls (also effect-free)
const _PURE_BUILTINS = Any[tuple, svec, fieldtype, apply_type, ===, isa, typeof, UnionAll, nfields]

# known effect-free calls (might not be affect-free)
const _PURE_BUILTINS_VOLATILE = Any[getfield, arrayref, isdefined, Core.sizeof]

const TOP_GETFIELD = GlobalRef(Core, :getfield)

const TOP_TUPLE = GlobalRef(Core, :tuple)

const META_POP_LOC = Expr(:meta, :pop_loc)

const ENABLE_VERIFY_VALUEINFO = (tmp = Array{Bool,0}(uninitialized); tmp[] = false; tmp)

# allocation optimization, must not be mutated.
const EMPTY_USES = ValueUse[]
const EMPTY_DEFS = ValueDef[]

#########
# logic #
#########

function isinlineable(m::Method, src::CodeInfo, mod::Module, params::Params, bonus::Int=0)
    # compute the cost (size) of inlining this code
    inlineable = false
    cost_threshold = params.inline_cost_threshold
    if m.module === _topmod(m.module)
        # a few functions get special treatment
        name = m.name
        sig = m.sig
        if ((name === :+ || name === :* || name === :min || name === :max) &&
            isa(sig,DataType) &&
            sig == Tuple{sig.parameters[1],Any,Any,Any,Vararg{Any}})
            inlineable = true
        elseif (name === :next || name === :done || name === :unsafe_convert ||
                name === :cconvert)
            cost_threshold *= 4
        end
    end
    if !inlineable
        inlineable = inline_worthy(src.code, src, mod, params, cost_threshold + bonus)
    end
    return inlineable
end

# converge the optimization work
function optimize(me::InferenceState)
    # annotate fulltree with type information
    type_annotate!(me)

    # run optimization passes on fulltree
    force_noinline = true
    if me.limited && me.cached && me.parent !== nothing
        # a top parent will be cached still, but not this intermediate work
        me.cached = false
        me.linfo.inInference = false
    elseif me.optimize
        opt = OptimizationState(me)
        # This pass is required for the AST to be valid in codegen
        # if any `SSAValue` is created by type inference. Ref issue #6068
        # This (and `reindex_labels!`) needs to be run for `!me.optimize`
        # if we start to create `SSAValue` in type inference when not
        # optimizing and use unoptimized IR in codegen.
        gotoifnot_elim_pass!(opt)
        inlining_pass!(opt, opt.src.propagate_inbounds)
        # Clean up after inlining
        gotoifnot_elim_pass!(opt)
        basic_dce_pass!(opt)
        void_use_elim_pass!(opt)
        copy_duplicated_expr_pass!(opt)
        split_undef_flag_pass!(opt)
        fold_constant_getfield_pass!(opt)
        # Compute escape information
        # and elide unnecessary allocations
        alloc_elim_pass!(opt)
        # Clean up for `alloc_elim_pass!`
        gotoifnot_elim_pass!(opt)
        basic_dce_pass!(opt)
        void_use_elim_pass!(opt)
        # Pop metadata before label reindexing
        let code = opt.src.code::Array{Any,1}
            meta_elim_pass!(code, coverage_enabled())
            filter!(x -> x !== nothing, code)
            force_noinline = peekmeta(code, :noinline)[1]
        end
        reindex_labels!(opt)
        me.min_valid = opt.min_valid
        me.max_valid = opt.max_valid
    end

    # convert all type information into the form consumed by the code-generator
    widen_all_consts!(me.src)

    # compute inlining and other related properties
    me.const_ret = (isa(me.bestguess, Const) || isconstType(me.bestguess))
    if me.const_ret
        proven_pure = false
        # must be proven pure to use const_api; otherwise we might skip throwing errors
        # (issue #20704)
        # TODO: Improve this analysis; if a function is marked @pure we should really
        # only care about certain errors (e.g. method errors and type errors).
        if length(me.src.code) < 10
            proven_pure = true
            for stmt in me.src.code
                if !statement_effect_free(stmt, me.src, me.mod)
                    proven_pure = false
                    break
                end
            end
            if proven_pure
                for fl in me.src.slotflags
                    if (fl & SLOT_USEDUNDEF) != 0
                        proven_pure = false
                        break
                    end
                end
            end
        end
        if proven_pure
            me.src.pure = true
        end

        if proven_pure && !coverage_enabled()
            # use constant calling convention
            # Do not emit `jlcall_api == 2` if coverage is enabled
            # so that we don't need to add coverage support
            # to the `jl_call_method_internal` fast path
            # Still set pure flag to make sure `inference` tests pass
            # and to possibly enable more optimization in the future
            if !(isa(me.bestguess, Const) && !is_inlineable_constant(me.bestguess.val))
                me.const_api = true
            end
            force_noinline || (me.src.inlineable = true)
        end
    end

    # determine and cache inlineability
    if !force_noinline
        # don't keep ASTs for functions specialized on a Union argument
        # TODO: this helps avoid a type-system bug mis-computing sparams during intersection
        sig = unwrap_unionall(me.linfo.specTypes)
        if isa(sig, DataType) && sig.name === Tuple.name
            for P in sig.parameters
                P = unwrap_unionall(P)
                if isa(P, Union)
                    force_noinline = true
                    break
                end
            end
        else
            force_noinline = true
        end
    end
    def = me.linfo.def
    if force_noinline
        me.src.inlineable = false
    elseif !me.src.inlineable && isa(def, Method)
        bonus = 0
        if me.bestguess ⊑ Tuple && !isbits(widenconst(me.bestguess))
            bonus = me.params.inline_tupleret_bonus
        end
        me.src.inlineable = isinlineable(def, me.src, me.mod, me.params, bonus)
    end
    me.src.inferred = true
    if me.optimize && !(me.limited && me.parent !== nothing)
        validate_code_in_debug_mode(me.linfo, me.src, "optimized")
    end
    nothing
end

# inference completed on `me`
# update the MethodInstance and notify the edges
function finish(me::InferenceState)
    if me.cached
        toplevel = !isa(me.linfo.def, Method)
        if !toplevel
            min_valid = me.min_valid
            max_valid = me.max_valid
        else
            min_valid = UInt(0)
            max_valid = UInt(0)
        end

        # check if the existing me.linfo metadata is also sufficient to describe the current inference result
        # to decide if it is worth caching it again (which would also clear any generated code)
        already_inferred = !me.linfo.inInference
        if isdefined(me.linfo, :inferred)
            inf = me.linfo.inferred
            if !isa(inf, CodeInfo) || (inf::CodeInfo).inferred
                if min_world(me.linfo) == min_valid && max_world(me.linfo) == max_valid
                    already_inferred = true
                end
            end
        end

        # don't store inferred code if we've decided to interpret this function
        if !already_inferred && me.linfo.jlcall_api != 4
            const_flags = (me.const_ret) << 1 | me.const_api
            if me.const_ret
                if isa(me.bestguess, Const)
                    inferred_const = (me.bestguess::Const).val
                else
                    @assert isconstType(me.bestguess)
                    inferred_const = me.bestguess.parameters[1]
                end
            else
                inferred_const = nothing
            end
            if me.const_api
                # use constant calling convention
                inferred_result = nothing
            else
                inferred_result = me.src
            end

            if !toplevel
                if !me.const_api
                    def = me.linfo.def::Method
                    keeptree = me.optimize &&
                        (me.src.inlineable ||
                         ccall(:jl_is_cacheable_sig, Int32, (Any, Any, Any), me.linfo.specTypes, def.sig, def) != 0)
                    if keeptree
                        # compress code for non-toplevel thunks
                        inferred_result = ccall(:jl_compress_ast, Any, (Any, Any), def, inferred_result)
                    else
                        inferred_result = nothing
                    end
                end
            end
            cache = ccall(:jl_set_method_inferred, Ref{MethodInstance}, (Any, Any, Any, Any, Int32, UInt, UInt),
                me.linfo, widenconst(me.bestguess), inferred_const, inferred_result,
                const_flags, min_valid, max_valid)
            if cache !== me.linfo
                me.linfo.inInference = false
                me.linfo = cache
                me.result.linfo = cache
            end
        end
        me.linfo.inInference = false
    end

    # finish updating the result struct
    if me.src.inlineable
        me.result.src = me.src # stash a copy of the code (for inlining)
    end
    me.result.result = me.bestguess # record type, and that wip is done and me.linfo can be used as a backedge

    # update all of the callers with real backedges by traversing the temporary list of backedges
    for (i, _) in me.backedges
        add_backedge!(me.linfo, i)
    end

    # finalize and record the linfo result
    me.inferred = true
    nothing
end

function maybe_widen_conditional(vt)
    if isa(vt, Conditional)
        if vt.vtype === Bottom
            vt = Const(false)
        elseif vt.elsetype === Bottom
            vt = Const(true)
        end
    end
    vt
end

function annotate_slot_load!(e::Expr, vtypes::VarTable, sv::InferenceState, undefs::Array{Bool,1})
    head = e.head
    i0 = 1
    e.typ = maybe_widen_conditional(e.typ)
    if is_meta_expr_head(head) || head === :const
        return
    end
    if head === :(=) || head === :method
        i0 = 2
    end
    for i = i0:length(e.args)
        subex = e.args[i]
        if isa(subex, Expr)
            annotate_slot_load!(subex, vtypes, sv, undefs)
        elseif isa(subex, Slot)
            id = slot_id(subex)
            s = vtypes[id]
            vt = maybe_widen_conditional(s.typ)
            if s.undef
                # find used-undef variables
                undefs[id] = true
            end
            #  add type annotations where needed
            if !(sv.src.slottypes[id] ⊑ vt)
                e.args[i] = TypedSlot(id, vt)
            end
        end
    end
end

function record_slot_assign!(sv::InferenceState)
    # look at all assignments to slots
    # and union the set of types stored there
    # to compute a lower bound on the storage required
    states = sv.stmt_types
    body = sv.src.code::Vector{Any}
    slottypes = sv.src.slottypes::Vector{Any}
    for i = 1:length(body)
        expr = body[i]
        st_i = states[i]
        # find all reachable assignments to locals
        if isa(st_i, VarTable) && isa(expr, Expr) && expr.head === :(=)
            lhs = expr.args[1]
            rhs = expr.args[2]
            if isa(lhs, Slot)
                id = slot_id(lhs)
                if isa(rhs, Slot)
                    # exprtype isn't yet computed for slots
                    vt = st_i[slot_id(rhs)].typ
                else
                    vt = exprtype(rhs, sv.src, sv.mod)
                end
                vt = widenconst(vt)
                if vt !== Bottom
                    otherTy = slottypes[id]
                    if otherTy === Bottom
                        slottypes[id] = vt
                    elseif otherTy === Any
                        slottypes[id] = Any
                    else
                        slottypes[id] = tmerge(otherTy, vt)
                    end
                end
            end
        end
    end
end

# annotate types of all symbols in AST
function type_annotate!(sv::InferenceState)
    # remove all unused ssa values
    gt = sv.src.ssavaluetypes
    for j = 1:length(gt)
        if gt[j] === NOT_FOUND
            gt[j] = Union{}
        end
    end

    # compute the required type for each slot
    # to hold all of the items assigned into it
    record_slot_assign!(sv)

    # annotate variables load types
    # remove dead code
    # and compute which variables may be used undef
    src = sv.src
    states = sv.stmt_types
    nargs = sv.nargs
    nslots = length(states[1])
    undefs = fill(false, nslots)
    body = src.code::Array{Any,1}
    nexpr = length(body)
    i = 1
    while i <= nexpr
        st_i = states[i]
        expr = body[i]
        if isa(st_i, VarTable)
            # st_i === ()  =>  unreached statement  (see issue #7836)
            if isa(expr, Expr)
                annotate_slot_load!(expr, st_i, sv, undefs)
            elseif isa(expr, Slot)
                id = slot_id(expr)
                if st_i[id].undef
                    # find used-undef variables in statement position
                    undefs[id] = true
                end
            end
        elseif sv.optimize
            if ((isa(expr, Expr) && is_meta_expr(expr)) ||
                 isa(expr, LineNumberNode))
                # keep any lexically scoped expressions
                i += 1
                continue
            end
            # This can create `Expr(:gotoifnot)` with dangling label, which we
            # will clean up by replacing them with the conditions later.
            deleteat!(body, i)
            deleteat!(states, i)
            nexpr -= 1
            continue
        end
        i += 1
    end

    # finish marking used-undef variables
    for j = 1:nslots
        if undefs[j]
            src.slotflags[j] |= SLOT_USEDUNDEF | SLOT_STATICUNDEF
        end
    end

    # The dead code elimination can delete the target of a reachable node. This
    # must mean that the target is unreachable. Later optimization passes will
    # assume that all branches lead to labels that exist, so we must replace
    # the node with the branch condition (which may have side effects).
    labelmap = get_label_map(body)
    for i in 1:length(body)
        expr = body[i]
        if isa(expr, Expr) && expr.head === :gotoifnot
            labelnum = labelmap[expr.args[2]::Int]
            if labelnum === 0
                body[i] = expr.args[1]
            end
        end
    end
    nothing
end

# widen all Const elements in type annotations
function _widen_all_consts!(e::Expr, untypedload::Vector{Bool}, slottypes::Vector{Any})
    e.typ = widenconst(e.typ)
    for i = 1:length(e.args)
        x = e.args[i]
        if isa(x, Expr)
            _widen_all_consts!(x, untypedload, slottypes)
        elseif isa(x, TypedSlot)
            vt = widenconst(x.typ)
            if !(vt === x.typ)
                if slottypes[x.id] ⊑ vt
                    x = SlotNumber(x.id)
                    untypedload[x.id] = true
                else
                    x = TypedSlot(x.id, vt)
                end
                e.args[i] = x
            end
        elseif isa(x, SlotNumber) && (i != 1 || e.head !== :(=))
            untypedload[x.id] = true
        end
    end
    nothing
end

function widen_all_consts!(src::CodeInfo)
    for i = 1:length(src.ssavaluetypes)
        src.ssavaluetypes[i] = widenconst(src.ssavaluetypes[i])
    end
    for i = 1:length(src.slottypes)
        src.slottypes[i] = widenconst(src.slottypes[i])
    end

    nslots = length(src.slottypes)
    untypedload = fill(false, nslots)
    e = Expr(:body)
    e.args = src.code
    _widen_all_consts!(e, untypedload, src.slottypes)
    for i = 1:nslots
        src.slottypes[i] = widen_slot_type(src.slottypes[i], untypedload[i])
    end
    return src
end

# widen all slots to their optimal storage layout
# we also need to preserve the type for any untyped load of a DataType
# since codegen optimizations of functions like `is` will depend on knowing it
function widen_slot_type(@nospecialize(ty), untypedload::Bool)
    if isa(ty, DataType)
        if untypedload || isbits(ty) || isdefined(ty, :instance)
            return ty
        end
    elseif isa(ty, Union)
        ty_a = widen_slot_type(ty.a, false)
        ty_b = widen_slot_type(ty.b, false)
        if ty_a !== Any || ty_b !== Any
            # TODO: better optimized codegen for unions?
            return ty
        end
    elseif isa(ty, UnionAll)
        if untypedload
            return ty
        end
    end
    return Any
end

# replace slots 1:na with argexprs, static params with spvals, and increment
# other slots by offset.
function substitute!(
        @nospecialize(e), na::Int, argexprs::Vector{Any},
        @nospecialize(spsig), spvals::Vector{Any},
        offset::Int, boundscheck::Symbol)
    if isa(e, Slot)
        id = slot_id(e)
        if 1 <= id <= na
            ae = argexprs[id]
            if isa(e, TypedSlot) && isa(ae, Slot)
                return TypedSlot(ae.id, e.typ)
            end
            return ae
        end
        if isa(e, SlotNumber)
            return SlotNumber(id + offset)
        else
            return TypedSlot(id + offset, e.typ)
        end
    end
    if isa(e, NewvarNode)
        return NewvarNode(substitute!(e.slot, na, argexprs, spsig, spvals, offset, boundscheck))
    end
    if isa(e, Expr)
        e = e::Expr
        head = e.head
        if head === :static_parameter
            return quoted(spvals[e.args[1]])
        elseif head === :foreigncall
            @assert !isa(spsig, UnionAll) || !isempty(spvals)
            for i = 1:length(e.args)
                if i == 2
                    e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], spsig, spvals)
                elseif i == 3
                    argtuple = Any[
                        ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
                        for argt
                        in e.args[3] ]
                    e.args[3] = svec(argtuple...)
                elseif i == 4
                    @assert isa((e.args[4]::QuoteNode).value, Symbol)
                elseif i == 5
                    @assert isa(e.args[5], Int)
                else
                    e.args[i] = substitute!(e.args[i], na, argexprs, spsig, spvals, offset, boundscheck)
                end
            end
        elseif head === :boundscheck
            if boundscheck === :propagate
                return e
            elseif boundscheck === :off
                return false
            else
                return true
            end
        elseif !is_meta_expr_head(head)
            for i = 1:length(e.args)
                e.args[i] = substitute!(e.args[i], na, argexprs, spsig, spvals, offset, boundscheck)
            end
        end
    end
    return e
end

# whether `f` is pure for inference
function is_pure_intrinsic_infer(f::IntrinsicFunction)
    return !(f === Intrinsics.pointerref || # this one is volatile
             f === Intrinsics.pointerset || # this one is never effect-free
             f === Intrinsics.llvmcall ||   # this one is never effect-free
             f === Intrinsics.arraylen ||   # this one is volatile
             f === Intrinsics.sqrt_llvm ||  # this one may differ at runtime (by a few ulps)
             f === Intrinsics.cglobal)  # cglobal lookup answer changes at runtime
end

# whether `f` is pure for optimizations
function is_pure_intrinsic_optim(f::IntrinsicFunction)
    return !(f === Intrinsics.pointerref || # this one is volatile
             f === Intrinsics.pointerset || # this one is never effect-free
             f === Intrinsics.llvmcall ||   # this one is never effect-free
             f === Intrinsics.arraylen ||   # this one is volatile
             f === Intrinsics.checked_sdiv_int ||  # these may throw errors
             f === Intrinsics.checked_udiv_int ||
             f === Intrinsics.checked_srem_int ||
             f === Intrinsics.checked_urem_int ||
             f === Intrinsics.cglobal)  # cglobal throws an error for symbol-not-found
end

function is_pure_builtin(@nospecialize(f))
    if isa(f, IntrinsicFunction)
        return is_pure_intrinsic_optim(f)
    elseif isa(f, Builtin)
        return (contains_is(_PURE_BUILTINS, f) ||
                contains_is(_PURE_BUILTINS_VOLATILE, f))
    else
        return f === return_type
    end
end

function statement_effect_free(@nospecialize(e), src::CodeInfo, mod::Module)
    if isa(e, Expr)
        if e.head === :(=)
            return !isa(e.args[1], GlobalRef) && effect_free(e.args[2], src, mod, false)
        elseif e.head === :gotoifnot
            return effect_free(e.args[1], src, mod, false)
        end
    elseif isa(e, LabelNode) || isa(e, GotoNode)
        return true
    end
    return effect_free(e, src, mod, false)
end

# detect some important side-effect-free calls (allow_volatile=true)
# and some affect-free calls (allow_volatile=false) -- affect_free means the call
# cannot be affected by previous calls, except assignment nodes
function effect_free(@nospecialize(e), src::CodeInfo, mod::Module, allow_volatile::Bool)
    if isa(e, GlobalRef)
        return (isdefined(e.mod, e.name) && (allow_volatile || isconst(e.mod, e.name)))
    elseif isa(e, Symbol)
        return allow_volatile
    elseif isa(e, Slot)
        return src.slotflags[slot_id(e)] & SLOT_USEDUNDEF == 0
    elseif isa(e, Expr)
        e = e::Expr
        head = e.head
        if is_meta_expr_head(head)
            return true
        end
        if head === :static_parameter
            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
            return isa(e.typ, Const) || issingletontype(widenconst(e.typ))
        end
        if e.typ === Bottom
            return false
        end
        ea = e.args
        if head === :call
            if is_known_call_p(e, is_pure_builtin, src, mod)
                if !allow_volatile
                    if is_known_call(e, arrayref, src, mod) || is_known_call(e, arraylen, src, mod)
                        return false
                    elseif is_known_call(e, getfield, src, mod)
                        nargs = length(ea)
                        (3 < nargs < 4) || return false
                        et = exprtype(e, src, mod)
                        # TODO: check ninitialized
                        if !isa(et, Const) && !isconstType(et)
                            # first argument must be immutable to ensure e is affect_free
                            a = ea[2]
                            typ = unwrap_unionall(widenconst(exprtype(a, src, mod)))
                            if isType(typ)
                                # all fields of subtypes of Type are effect-free
                                # (including the non-inferrable uid field)
                            elseif !isa(typ, DataType) || typ.abstract || (typ.mutable && length(typ.types) > 0)
                                return false
                            end
                        end
                    end
                end
                # fall-through
            elseif is_known_call(e, _apply, src, mod) && length(ea) > 1
                ft = exprtype(ea[2], src, mod)
                if !isa(ft, Const) || (!contains_is(_PURE_BUILTINS, ft.val) &&
                                       ft.val !== Core.sizeof)
                    return false
                end
                # fall-through
            else
                return false
            end
        elseif head === :new
            a = ea[1]
            typ = exprtype(a, src, mod)
            # `Expr(:new)` of unknown type could raise arbitrary TypeError.
            typ, isexact = instanceof_tfunc(typ)
            isexact || return false
            isconcretetype(typ) || return false
            !iskindtype(typ) || return false
            typ = typ::DataType
            if !allow_volatile && typ.mutable
                return false
            end
            fieldcount(typ) >= length(ea) - 1 || return false
            for fld_idx in 1:(length(ea) - 1)
                exprtype(ea[fld_idx + 1], src, mod) ⊑ fieldtype(typ, fld_idx) || return false
            end
            # fall-through
        elseif head === :return
            # fall-through
        elseif head === :isdefined
            return allow_volatile
        elseif head === :the_exception
            return allow_volatile
        elseif head === :copyast
            return true
        else
            return false
        end
        for a in ea
            if !effect_free(a, src, mod, allow_volatile)
                return false
            end
        end
    elseif isa(e, LabelNode) || isa(e, GotoNode)
        return false
    end
    return true
end

function inline_as_constant(@nospecialize(val), argexprs::Vector{Any}, sv::OptimizationState, @nospecialize(invoke_data))
    if invoke_data === nothing
        invoke_fexpr = nothing
        invoke_texpr = nothing
    else
        invoke_data = invoke_data::InvokeData
        invoke_fexpr = invoke_data.fexpr
        invoke_texpr = invoke_data.texpr
    end
    # check if any arguments aren't effect_free and need to be kept around
    stmts = invoke_fexpr === nothing ? [] : Any[invoke_fexpr]
    for i = 1:length(argexprs)
        arg = argexprs[i]
        if !effect_free(arg, sv.src, sv.mod, false)
            push!(stmts, arg)
        end
        if i == 1 && !(invoke_texpr === nothing)
            push!(stmts, invoke_texpr)
        end
    end
    return (quoted(val), stmts)
end

function countunionsplit(atypes)
    nu = 1
    for ti in atypes
        if isa(ti, Union)
            nu *= unionlen(ti::Union)
        end
    end
    return nu
end

function get_spec_lambda(@nospecialize(atypes), sv::OptimizationState, @nospecialize(invoke_data))
    if invoke_data === nothing
        return ccall(:jl_get_spec_lambda, Any, (Any, UInt), atypes, sv.params.world)
    else
        invoke_data = invoke_data::InvokeData
        atypes <: invoke_data.types0 || return nothing
        return ccall(:jl_get_invoke_lambda, Any, (Any, Any, Any, UInt),
                     invoke_data.mt, invoke_data.entry, atypes, sv.params.world)
    end
end

function invoke_NF(argexprs, @nospecialize(etype), atypes::Vector{Any}, sv::OptimizationState,
                   @nospecialize(atype_unlimited), @nospecialize(invoke_data))
    # converts a :call to :invoke
    nu = countunionsplit(atypes)
    nu > sv.params.MAX_UNION_SPLITTING && return NOT_FOUND
    if invoke_data === nothing
        invoke_fexpr = nothing
        invoke_texpr = nothing
    else
        invoke_data = invoke_data::InvokeData
        invoke_fexpr = invoke_data.fexpr
        invoke_texpr = invoke_data.texpr
    end

    if nu > 1
        stmts = []

        spec_miss = nothing
        error_label = nothing
        ex = Expr(:call)
        ex.typ = etype
        ex.args = copy(argexprs)
        invoke_texpr === nothing || insert!(stmts, 2, invoke_texpr)
        invoke_fexpr === nothing || pushfirst!(stmts, invoke_fexpr)

        local ret_var, merge, invoke_ex, spec_hit
        ret_var = add_slot!(sv.src, widenconst(etype), false)
        merge = genlabel(sv)
        invoke_ex = copy(ex)
        invoke_ex.head = :invoke
        pushfirst!(invoke_ex.args, nothing)
        spec_hit = false

        function splitunion(atypes::Vector{Any}, i::Int)
            if i == 0
                local sig = argtypes_to_type(atypes)
                local li = get_spec_lambda(sig, sv, invoke_data)
                li === nothing && return false
                add_backedge!(li, sv)
                local stmt = []
                invoke_ex = copy(invoke_ex)
                invoke_ex.args[1] = li
                push!(stmt, Expr(:(=), ret_var, invoke_ex))
                push!(stmt, GotoNode(merge.label))
                spec_hit = true
                return stmt
            else
                local ti = atypes[i]
                if isa(ti, Union)
                    local all = true
                    local stmts = []
                    local aei = ex.args[i]
                    for ty in uniontypes(ti::Union)
                        local ty
                        atypes[i] = ty
                        local match = splitunion(atypes, i - 1)
                        if match !== false
                            after = genlabel(sv)
                            isa_var = newvar!(sv, Bool)
                            isa_ty = Expr(:call, GlobalRef(Core, :isa), aei, ty)
                            isa_ty.typ = Bool
                            pushfirst!(match, Expr(:gotoifnot, isa_var, after.label))
                            pushfirst!(match, Expr(:(=), isa_var, isa_ty))
                            append!(stmts, match)
                            push!(stmts, after)
                        else
                            all = false
                        end
                    end
                    if all
                        error_label === nothing && (error_label = genlabel(sv))
                        push!(stmts, GotoNode(error_label.label))
                    else
                        spec_miss === nothing && (spec_miss = genlabel(sv))
                        push!(stmts, GotoNode(spec_miss.label))
                    end
                    atypes[i] = ti
                    return isempty(stmts) ? false : stmts
                else
                    return splitunion(atypes, i - 1)
                end
            end
        end
        local match = splitunion(atypes, length(atypes))
        if match !== false && spec_hit
            append!(stmts, match)
            if error_label !== nothing
                push!(stmts, error_label)
                error_call = Expr(:call, GlobalRef(_topmod(sv.mod), :error), "fatal error in type inference (type bound)")
                error_call.typ = Union{}
                push!(stmts, error_call)
            end
            if spec_miss !== nothing
                push!(stmts, spec_miss)
                push!(stmts, Expr(:(=), ret_var, ex))
                push!(stmts, GotoNode(merge.label))
            end
            push!(stmts, merge)
            return (ret_var, stmts)
        end
    else
        local cache_linfo = get_spec_lambda(atype_unlimited, sv, invoke_data)
        cache_linfo === nothing && return NOT_FOUND
        add_backedge!(cache_linfo, sv)
        argexprs = copy(argexprs)
        pushfirst!(argexprs, cache_linfo)
        ex = Expr(:invoke)
        ex.args = argexprs
        ex.typ = etype
        if invoke_texpr === nothing
            if invoke_fexpr === nothing
                return ex
            else
                return ex, Any[invoke_fexpr]
            end
        end
        newvar = newvar!(sv, atypes[1])
        stmts = Any[invoke_fexpr,
                    :($newvar = $(argexprs[2])),
                    invoke_texpr]
        argexprs[2] = newvar
        return ex, stmts
    end
    return NOT_FOUND
end

# inline functions whose bodies are "inline_worthy"
# where the function body doesn't contain any argument more than once.
# static parameters are ok if all the static parameter values are leaf types,
# meaning they are fully known.
# `ft` is the type of the function. `f` is the exact function if known, or else `nothing`.
# `pending_stmts` is an array of statements from functions inlined so far, so
# we can estimate the total size of the enclosing function after inlining.
function inlineable(@nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector{Any},
                    pending_stmt::Vector{Any}, boundscheck::Symbol,
                    sv::OptimizationState)
    argexprs = e.args

    if (f === typeassert || ft ⊑ typeof(typeassert)) && length(atypes) == 3
        # typeassert(x::S, T) => x, when S<:T
        a3 = atypes[3]
        if (isType(a3) && !has_free_typevars(a3) && atypes[2] ⊑ a3.parameters[1]) ||
            (isa(a3, Const) && isa(a3.val, Type) && atypes[2] ⊑ a3.val)
            return (argexprs[2], ())
        end
    end
    topmod = _topmod(sv)
    # special-case inliners for known pure functions that compute types
    if sv.params.inlining
        if isa(e.typ, Const) # || isconstType(e.typ)
            val = e.typ.val
            if (f === apply_type || f === fieldtype || f === typeof || f === (===) ||
                f === Core.sizeof || f === isdefined ||
                istopfunction(topmod, f, :typejoin) ||
                istopfunction(topmod, f, :isbits) ||
                istopfunction(topmod, f, :promote_type) ||
                (f === Core.kwfunc && length(argexprs) == 2) ||
                (is_inlineable_constant(val) &&
                 (contains_is(_PURE_BUILTINS, f) ||
                  (f === getfield && effect_free(e, sv.src, sv.mod, false)) ||
                  (isa(f, IntrinsicFunction) && is_pure_intrinsic_optim(f)))))
                return inline_as_constant(val, argexprs, sv, nothing)
            end
        end
    end
    invoke_data = nothing
    invoke_fexpr = nothing
    invoke_texpr = nothing
    if f === Core.invoke && length(atypes) >= 3
        ft = widenconst(atypes[2])
        invoke_tt = widenconst(atypes[3])
        if !(isconcretetype(ft) || ft <: Type) || !isType(invoke_tt) ||
                has_free_typevars(invoke_tt) || has_free_typevars(ft) || (ft <: Builtin)
            # TODO: this is really aggressive at preventing inlining of closures. maybe drop `isconcretetype` requirement?
            return NOT_FOUND
        end
        if !(isa(invoke_tt.parameters[1], Type) &&
             invoke_tt.parameters[1] <: Tuple)
            return NOT_FOUND
        end
        invoke_tt = invoke_tt.parameters[1]
        invoke_types = rewrap_unionall(Tuple{ft, unwrap_unionall(invoke_tt).parameters...}, invoke_tt)
        invoke_entry = ccall(:jl_gf_invoke_lookup, Any, (Any, UInt),
                             invoke_types, sv.params.world)
        invoke_entry === nothing && return NOT_FOUND
        invoke_fexpr = argexprs[1]
        invoke_texpr = argexprs[3]
        if effect_free(invoke_fexpr, sv.src, sv.mod, false)
            invoke_fexpr = nothing
        end
        if effect_free(invoke_texpr, sv.src, sv.mod, false)
            invoke_fexpr = nothing
        end
        invoke_data = InvokeData(ft.name.mt, invoke_entry,
                                 invoke_types, invoke_fexpr, invoke_texpr)
        atype0 = atypes[2]
        argexpr0 = argexprs[2]
        atypes = atypes[4:end]
        argexprs = argexprs[4:end]
        pushfirst!(atypes, atype0)
        pushfirst!(argexprs, argexpr0)
        f = isdefined(ft, :instance) ? ft.instance : nothing
    elseif isa(f, IntrinsicFunction) || ft ⊑ IntrinsicFunction ||
            isa(f, Builtin) || ft ⊑ Builtin
        return NOT_FOUND
    end

    atype_unlimited = argtypes_to_type(atypes)
    if !(invoke_data === nothing)
        invoke_data = invoke_data::InvokeData
        # TODO emit a type check and proceed for this case
        atype_unlimited <: invoke_data.types0 || return NOT_FOUND
    end
    if !sv.params.inlining
        return invoke_NF(argexprs, e.typ, atypes, sv, atype_unlimited,
                         invoke_data)
    end

    if length(atypes) == 3 && istopfunction(topmod, f, :!==)
        # special-case inliner for !== that precedes _methods_by_ftype union splitting
        # and that works, even though inference generally avoids inferring the `!==` Method
        if isa(e.typ, Const)
            return inline_as_constant(e.typ.val, argexprs, sv, nothing)
        end
        is_var = newvar!(sv, Bool)
        stmts = Any[ Expr(:(=), is_var, Expr(:call, GlobalRef(Core, :(===)), argexprs[2], argexprs[3])) ]
        stmts[1].args[2].typ = Bool
        not_is = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), is_var)
        not_is.typ = Bool
        return (not_is, stmts)
    elseif length(atypes) == 3 && istopfunction(topmod, f, :(>:))
        # special-case inliner for issupertype
        # that works, even though inference generally avoids inferring the `>:` Method
        if isa(e.typ, Const)
            return inline_as_constant(e.typ.val, argexprs, sv, nothing)
        end
        arg_T1 = argexprs[2]
        arg_T2 = argexprs[3]
        issubtype_stmts = ()
        if !effect_free(arg_T2, sv.src, sv.mod, false)
            # spill first argument to preserve order-of-execution
            issubtype_vnew = newvar!(sv, widenconst(exprtype(arg_T1, sv.src, sv.mod)))
            issubtype_stmts = Any[ Expr(:(=), issubtype_vnew, arg_T1) ]
            arg_T1 = issubtype_vnew
        end
        issubtype_expr = Expr(:call, GlobalRef(Core, :(<:)), arg_T2, arg_T1)
        issubtype_expr.typ = Bool
        return (issubtype_expr, issubtype_stmts)
    end

    if length(atype_unlimited.parameters) - 1 > sv.params.MAX_TUPLETYPE_LEN
        atype = limit_tuple_type(atype_unlimited, sv.params)
    else
        atype = atype_unlimited
    end
    if invoke_data === nothing
        min_valid = UInt[typemin(UInt)]
        max_valid = UInt[typemax(UInt)]
        meth = _methods_by_ftype(atype, 1, sv.params.world, min_valid, max_valid)
        if meth === false || length(meth) != 1
            return invoke_NF(argexprs, e.typ, atypes, sv,
                             atype_unlimited, invoke_data)
        end
        meth = meth[1]::SimpleVector
        metharg = meth[1]::Type
        methsp = meth[2]::SimpleVector
        method = meth[3]::Method
    else
        invoke_data = invoke_data::InvokeData
        method = invoke_data.entry.func
        (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any),
                                  atype_unlimited, method.sig)::SimpleVector
        methsp = methsp::SimpleVector
    end

    methsig = method.sig
    if !(atype <: metharg)
        return invoke_NF(argexprs, e.typ, atypes, sv, atype_unlimited,
                         invoke_data)
    end

    # check whether call can be inlined to just a quoted constant value
    if isa(f, widenconst(ft)) && !isdefined(method, :generator)
        if f === return_type
            if isconstType(e.typ)
                return inline_as_constant(e.typ.parameters[1], argexprs, sv, invoke_data)
            elseif isa(e.typ, Const)
                return inline_as_constant(e.typ.val, argexprs, sv, invoke_data)
            end
        elseif method.pure && isa(e.typ, Const) && e.typ.actual && is_inlineable_constant(e.typ.val)
            return inline_as_constant(e.typ.val, argexprs, sv, invoke_data)
        end
    end

    argexprs0 = argexprs
    atypes0 = atypes
    na = Int(method.nargs)
    # check for vararg function
    isva = false
    if na > 0 && method.isva
        @assert length(argexprs) >= na - 1
        # construct tuple-forming expression for argument tail
        vararg = mk_tuplecall(argexprs[na:end], sv)
        argexprs = Any[argexprs[1:(na - 1)]..., vararg]
        atypes = Any[atypes[1:(na - 1)]..., vararg.typ]
        isva = true
    elseif na != length(argexprs)
        # we have a method match only because an earlier
        # inference step shortened our call args list, even
        # though we have too many arguments to actually
        # call this function
        return NOT_FOUND
    end

    @assert na == length(argexprs)

    for i = 1:length(methsp)
        isa(methsp[i], TypeVar) && return NOT_FOUND
    end

    # see if the method has been previously inferred (and cached)
    linfo = code_for_method(method, metharg, methsp, sv.params.world, true) # Union{Nothing, MethodInstance}
    isa(linfo, MethodInstance) || return invoke_NF(argexprs0, e.typ, atypes0, sv,
                                                   atype_unlimited, invoke_data)
    linfo = linfo::MethodInstance
    if linfo.jlcall_api == 2
        # in this case function can be inlined to a constant
        add_backedge!(linfo, sv)
        return inline_as_constant(linfo.inferred_const, argexprs, sv, invoke_data)
    end

    # see if the method has a InferenceResult in the current cache
    # or an existing inferred code info store in `.inferred`
    haveconst = false
    for i in 1:length(atypes)
        a = atypes[i]
        if isa(a, Const) && !isdefined(typeof(a.val), :instance) && !(isa(a.val, Type) && issingletontype(a.val))
            # have new information from argtypes that wasn't available from the signature
            haveconst = true
            break
        end
    end
    if haveconst
        inf_result = cache_lookup(linfo, atypes, sv.params.cache) # Union{Nothing, InferenceResult}
    else
        inf_result = nothing
    end
    if isa(inf_result, InferenceResult) && isa(inf_result.src, CodeInfo)
        linfo = inf_result.linfo
        result = inf_result.result
        if (inf_result.src::CodeInfo).pure
            if isa(result, Const)
                inferred_const = result.val
            elseif isconstType(result)
                inferred_const = result.parameters[1]
            end
            if @isdefined(inferred_const) && is_inlineable_constant(inferred_const)
                add_backedge!(linfo, sv)
                return inline_as_constant(inferred_const, argexprs, sv, invoke_data)
            end
        end
        inferred = inf_result.src
        rettype = widenconst(result)
    elseif isdefined(linfo, :inferred)
        inferred = linfo.inferred
        rettype = linfo.rettype
    else
        rettype = Any
        inferred = nothing
    end

    # check that the code is inlineable
    if inferred === nothing
        src_inferred = src_inlineable = false
    else
        src_inferred = ccall(:jl_ast_flag_inferred, Bool, (Any,), inferred)
        src_inlineable = ccall(:jl_ast_flag_inlineable, Bool, (Any,), inferred)
    end
    if !src_inferred || !src_inlineable
        return invoke_NF(argexprs0, e.typ, atypes0, sv, atype_unlimited,
                         invoke_data)
    end

    # create the backedge
    add_backedge!(linfo, sv)

    # prepare the code object for mutation
    if isa(inferred, CodeInfo)
        src = inferred
        ast = copy_exprargs(inferred.code)
    else
        src = ccall(:jl_uncompress_ast, Any, (Any, Any), method, inferred::Vector{UInt8})::CodeInfo
        ast = src.code
    end
    ast = ast::Array{Any,1}
    nm = length(unwrap_unionall(metharg).parameters)
    body = Expr(:block)
    body.args = ast

    # see if each argument occurs only once in the body expression
    stmts = []
    prelude_stmts = []
    stmts_free = true # true = all entries of stmts are effect_free

    argexprs = copy(argexprs)
    if isva
        # move constructed vararg tuple to an ssavalue
        varargvar = newvar!(sv, atypes[na])
        push!(prelude_stmts, Expr(:(=), varargvar, argexprs[na]))
        argexprs[na] = varargvar
    end
    for i = na:-1:1 # stmts_free needs to be calculated in reverse-argument order
        #args_i = args[i]
        aei = argexprs[i]
        aeitype = argtype = widenconst(exprtype(aei, sv.src, sv.mod))
        if i == 1 && !(invoke_texpr === nothing)
            pushfirst!(prelude_stmts, invoke_texpr)
        end

        # ok for argument to occur more than once if the actual argument
        # is a symbol or constant, or is not affected by previous statements
        # that will exist after the inlining pass finishes
        affect_free = stmts_free  # false = previous statements might affect the result of evaluating argument
        occ = 0
        for j = length(body.args):-1:1
            b = body.args[j]
            if occ < 6
                occ += occurs_more(b, x->(isa(x, Slot) && slot_id(x) == i), 6)
            end
            if occ > 0 && affect_free && !effect_free(b, src, method.module, true)
                #TODO: we might be able to short-circuit this test better by memoizing effect_free(b) in the for loop over i
                affect_free = false
            end
            if occ > 5 && !affect_free
                break
            end
        end
        free = effect_free(aei, sv.src, sv.mod, true)
        if ((occ==0 && aeitype===Bottom) || (occ > 1 && !inline_worthy(aei, sv.src, sv.mod, sv.params)) ||
                (affect_free && !free) || (!affect_free && !effect_free(aei, sv.src, sv.mod, false)))
            if occ != 0
                vnew = newvar!(sv, aeitype)
                argexprs[i] = vnew
                pushfirst!(prelude_stmts, Expr(:(=), vnew, aei))
                stmts_free &= free
            elseif !free && !isType(aeitype)
                pushfirst!(prelude_stmts, aei)
                stmts_free = false
            end
        end
    end
    invoke_fexpr === nothing || pushfirst!(prelude_stmts, invoke_fexpr)

    # re-number the SSAValues and copy their type-info to the new ast
    ssavalue_types = src.ssavaluetypes
    if !isempty(ssavalue_types)
        incr = length(sv.src.ssavaluetypes)
        if incr != 0
            body = ssavalue_increment(body, incr)
        end
        append!(sv.src.ssavaluetypes, ssavalue_types)
    end

    # ok, substitute argument expressions for argument names in the body
    body = substitute!(body, na, argexprs, method.sig, Any[methsp...], length(sv.src.slotnames) - na, boundscheck)
    append!(sv.src.slotnames, src.slotnames[(na + 1):end])
    append!(sv.src.slottypes, src.slottypes[(na + 1):end])
    append!(sv.src.slotflags, src.slotflags[(na + 1):end])

    # make labels / goto statements unique
    # relocate inlining information
    newlabels = zeros(Int, label_counter(body.args))
    for i = 1:length(body.args)
        a = body.args[i]
        if isa(a, LabelNode)
            newlabel = genlabel(sv)
            newlabels[a.label] = newlabel.label
            body.args[i] = newlabel
        end
    end
    for i = 1:length(body.args)
        a = body.args[i]
        if isa(a, GotoNode)
            body.args[i] = GotoNode(newlabels[a.label])
        elseif isa(a, Expr)
            if a.head === :enter
                a.args[1] = newlabels[a.args[1]::Int]
            elseif a.head === :gotoifnot
                a.args[2] = newlabels[a.args[2]::Int]
            end
        end
    end

    # convert return statements into a series of goto's
    retstmt = genlabel(sv)
    local retval
    multiret = false
    lastexpr = pop!(body.args)
    if isa(lastexpr, LabelNode)
        push!(body.args, lastexpr)
        error_call = Expr(:call, GlobalRef(topmod, :error), "fatal error in type inference (lowering)")
        error_call.typ = Union{}
        push!(body.args, error_call)
        lastexpr = nothing
    elseif !(isa(lastexpr, Expr) && lastexpr.head === :return)
        # code sometimes ends with a meta node, e.g. inbounds pop
        push!(body.args, lastexpr)
        lastexpr = nothing
    end
    for a in body.args
        push!(stmts, a)
        if isa(a, Expr)
            if a.head === :return
                if !multiret
                    # create slot first time
                    retval = add_slot!(sv.src, rettype, false)
                end
                multiret = true
                pushfirst!(a.args, retval)
                a.head = :(=)
                push!(stmts, GotoNode(retstmt.label))
            end
        end
    end

    if multiret
        if lastexpr !== nothing
            pushfirst!(lastexpr.args, retval)
            lastexpr.head = :(=)
            push!(stmts, lastexpr)
        end
        push!(stmts, retstmt)
        expr = retval
    else
        # Dead code elimination can leave a non-return statement at the end
        if lastexpr === nothing
            expr = nothing
        else
            expr = lastexpr.args[1]
        end
    end

    inlining_ignore = function (@nospecialize(stmt),)
        isa(stmt, Expr) && return is_meta_expr(stmt::Expr)
        isa(stmt, LineNumberNode) && return true
        stmt === nothing && return true
        return false
    end

    do_coverage = coverage_enabled()
    line::Int = method.line
    file = method.file
    if !isempty(stmts)
        if !do_coverage && all(inlining_ignore, stmts)
            empty!(stmts)
        elseif isa(stmts[1], LineNumberNode)
            linenode = popfirst!(stmts)::LineNumberNode
            line = linenode.line
            isa(linenode.file, Symbol) && (file = linenode.file)
        end
    end
    if do_coverage
        # Check if we are switching module, which is necessary to catch user
        # code inlined into `Base` with `--code-coverage=user`.
        # Assume we are inlining directly into `enclosing` instead of another
        # function inlined in it
        mod = method.module
        if mod === sv.mod
            pushfirst!(stmts, Expr(:meta, :push_loc, file,
                                 method.name, line))
        else
            pushfirst!(stmts, Expr(:meta, :push_loc, file,
                                 method.name, line, mod))
        end
        push!(stmts, Expr(:meta, :pop_loc))
    elseif !isempty(stmts)
        pushfirst!(stmts, Expr(:meta, :push_loc, file,
                             method.name, line))
        if isa(stmts[end], LineNumberNode)
            stmts[end] = Expr(:meta, :pop_loc)
        else
            push!(stmts, Expr(:meta, :pop_loc))
        end
    end

    if isa(expr, Expr)
        old_t = e.typ
        if old_t ⊑ expr.typ
            # if we had better type information than the content being inlined,
            # change the return type now to use the better type
            expr.typ = old_t
        end
    end
    if !isempty(prelude_stmts)
        stmts = append!(prelude_stmts, stmts)
    end
    return (expr, stmts)
end

## Computing the cost of a function body

# saturating sum (inputs are nonnegative), prevents overflow with typemax(Int) below
plus_saturate(x, y) = max(x, y, x+y)

# known return type
isknowntype(@nospecialize T) = (T == Union{}) || isconcretetype(T)

function statement_cost(ex::Expr, line::Int, src::CodeInfo, mod::Module, params::Params)
    head = ex.head
    if is_meta_expr(ex) || head == :copyast # not sure if copyast is right
        return 0
    end
    argcost = 0
    for a in ex.args
        if a isa Expr
            argcost = plus_saturate(argcost, statement_cost(a, line, src, mod, params))
        end
    end
    if head == :return || head == :(=)
        return argcost
    end
    if head == :call
        extyp = exprtype(ex.args[1], src, mod)
        if isa(extyp, Type)
            return argcost
        end
        if isa(extyp, Const)
            f = (extyp::Const).val
            if isa(f, IntrinsicFunction)
                iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
                if !isassigned(T_IFUNC_COST, iidx)
                    # unknown/unhandled intrinsic
                    return plus_saturate(argcost, params.inline_nonleaf_penalty)
                end
                return plus_saturate(argcost, T_IFUNC_COST[iidx])
            end
            if isa(f, Builtin)
                # The efficiency of operations like a[i] and s.b
                # depend strongly on whether the result can be
                # inferred, so check ex.typ
                if f == Main.Core.getfield || f == Main.Core.tuple
                    # we might like to penalize non-inferrability, but
                    # tuple iteration/destructuring makes that
                    # impossible
                    # return plus_saturate(argcost, isknowntype(ex.typ) ? 1 : params.inline_nonleaf_penalty)
                    return argcost
                elseif f == Main.Core.arrayref
                    return plus_saturate(argcost, isknowntype(ex.typ) ? 4 : params.inline_nonleaf_penalty)
                end
                fidx = findfirst(x->x===f, T_FFUNC_KEY)
                if fidx === nothing
                    # unknown/unhandled builtin or anonymous function
                    # Use the generic cost of a direct function call
                    return plus_saturate(argcost, 20)
                end
                return plus_saturate(argcost, T_FFUNC_COST[fidx])
            end
        end
        return plus_saturate(argcost, params.inline_nonleaf_penalty)
    elseif head == :foreigncall || head == :invoke
        # Calls whose "return type" is Union{} do not actually return:
        # they are errors. Since these are not part of the typical
        # run-time of the function, we omit them from
        # consideration. This way, non-inlined error branches do not
        # prevent inlining.
        return ex.typ == Union{} ? 0 : plus_saturate(20, argcost)
    elseif head == :llvmcall
        return plus_saturate(10, argcost) # a wild guess at typical cost
    elseif head == :enter
        # try/catch is a couple function calls,
        # but don't inline functions with try/catch
        # since these aren't usually performance-sensitive functions,
        # and llvm is more likely to miscompile them when these functions get large
        return typemax(Int)
    elseif head == :gotoifnot
        target = ex.args[2]::Int
        # loops are generally always expensive
        # but assume that forward jumps are already counted for from
        # summing the cost of the not-taken branch
        return target < line ? plus_saturate(40, argcost) : argcost
    end
    return argcost
end

function inline_worthy(body::Array{Any,1}, src::CodeInfo, mod::Module,
                       params::Params,
                       cost_threshold::Integer=params.inline_cost_threshold)
    bodycost = 0
    for line = 1:length(body)
        stmt = body[line]
        if stmt isa Expr
            thiscost = statement_cost(stmt, line, src, mod, params)::Int
        elseif stmt isa GotoNode
            # loops are generally always expensive
            # but assume that forward jumps are already counted for from
            # summing the cost of the not-taken branch
            thiscost = stmt.label < line ? 40 : 0
        else
            continue
        end
        bodycost = plus_saturate(bodycost, thiscost)
        bodycost == typemax(Int) && return false
    end
    return bodycost <= cost_threshold
end

function inline_worthy(body::Expr, src::CodeInfo, mod::Module, params::Params,
                       cost_threshold::Integer=params.inline_cost_threshold)
    bodycost = statement_cost(body, typemax(Int), src, mod, params)
    return bodycost <= cost_threshold
end

function inline_worthy(@nospecialize(body), src::CodeInfo, mod::Module, params::Params,
                       cost_threshold::Integer=params.inline_cost_threshold)
    newbody = exprtype(body, src, mod)
    !isa(newbody, Expr) && return true
    return inline_worthy(newbody, src, mod, params, cost_threshold)
end

ssavalue_increment(@nospecialize(body), incr) = body
ssavalue_increment(body::SSAValue, incr) = SSAValue(body.id + incr)
function ssavalue_increment(body::Expr, incr)
    if is_meta_expr(body)
        return body
    end
    for i in 1:length(body.args)
        body.args[i] = ssavalue_increment(body.args[i], incr)
    end
    return body
end

function mk_getfield(texpr, i, T)
    e = Expr(:call, TOP_GETFIELD, texpr, i)
    e.typ = T
    return e
end

function mk_tuplecall(args, sv::OptimizationState)
    e = Expr(:call, TOP_TUPLE, args...)
    e.typ = tuple_tfunc(Tuple{Any[widenconst(exprtype(x, sv.src, sv.mod)) for x in args]...})
    return e
end

function inlining_pass!(sv::OptimizationState, propagate_inbounds::Bool)
    # Also handles bounds check elision:
    #
    #    1. If check_bounds is always on, set `Expr(:boundscheck)` true
    #    2. If check_bounds is always off, set `Expr(:boundscheck)` false
    #    3. If check_bounds is default, figure out whether each boundscheck
    #         is true, false, or propagate based on the enclosing inbounds directives
    _opt_check_bounds = JLOptions().check_bounds
    opt_check_bounds = (_opt_check_bounds == 0 ? :default :
                        _opt_check_bounds == 1 ? :on :
                        :off)
    # Number of stacked inbounds
    inbounds_depth = 0

    eargs = sv.src.code
    i = 1
    stmtbuf = []
    while i <= length(eargs)
        ei = eargs[i]
        if isa(ei, Expr)
            if ei.head === :inbounds
                eargs[i] = nothing
                arg1 = ei.args[1]
                if arg1 === true # push
                    inbounds_depth += 1
                elseif arg1 === false # clear
                    inbounds_depth = 0
                elseif inbounds_depth > 0 # pop
                    inbounds_depth -= 1
                end
            else
                if opt_check_bounds === :off
                     boundscheck = :off
                elseif opt_check_bounds === :on
                     boundscheck = :on
                elseif inbounds_depth > 0
                     boundscheck = :off
                elseif propagate_inbounds
                     boundscheck = :propagate
                else
                     boundscheck = :on
                end
                eargs[i] = inline_expr(ei, sv, stmtbuf, boundscheck)
                if !isempty(stmtbuf)
                    splice!(eargs, i:(i - 1), stmtbuf)
                    i += length(stmtbuf)
                    empty!(stmtbuf)
                end
            end
        end
        i += 1
    end
end


function inline_expr(e::Expr, sv::OptimizationState, stmts::Vector{Any}, boundscheck::Symbol)
    if e.head === :call
        return inline_call(e, sv, stmts, boundscheck)
    elseif e.head === :isdefined
        isa(e.typ, Const) && return e.typ.val
    elseif e.head === :(=) && isa(e.args[2], Expr)
        e.args[2] = inline_expr(e.args[2], sv, stmts, boundscheck)
    elseif e.head === :return && isa(e.args[1], Expr)
        e.args[1] = inline_expr(e.args[1], sv, stmts, boundscheck)
    end
    return e
end

function finddef(v::SSAValue, stmts::Vector{Any})
    for s in stmts
        if isa(s,Expr) && s.head === :(=) && s.args[1] === v
            return s
        end
    end
    return nothing
end

# return inlined replacement for call `e`, inserting new needed statements in `stmts`.
function inline_call(e::Expr, sv::OptimizationState, stmts::Vector{Any}, boundscheck::Symbol)
    eargs = e.args
    if length(eargs) < 1
        return e
    end
    arg1 = eargs[1]

    ft = exprtype(arg1, sv.src, sv.mod)
    if isa(ft, Const)
        f = ft.val
    elseif isa(ft, Conditional)
        f = nothing
        ft = Bool
    else
        f = nothing
        if !(isconcretetype(ft) || (widenconst(ft) <: Type)) || has_free_typevars(ft)
            # TODO: this is really aggressive at preventing inlining of closures. maybe drop `isconcretetype` requirement?
            return e
        end
    end

    ins = 1

    # TODO: determine whether this is really necessary
#=
    if sv.params.inlining
        if isdefined(Main, :Base) &&
            ((isdefined(Main.Base, :^) && f === Main.Base.:^) ||
             (isdefined(Main.Base, :.^) && f === Main.Base.:.^)) &&
            length(e.args) == 3

            a2 = e.args[3]
            if isa(a2, Symbol) || isa(a2, Slot) || isa(a2, SSAValue)
                ta2 = exprtype(a2, sv.src, sv.mod)
                if isa(ta2, Const)
                    a2 = ta2.val
                end
            end

            square = (a2 === Int32(2) || a2 === Int64(2))
            triple = (a2 === Int32(3) || a2 === Int64(3))
            if square || triple
                a1 = e.args[2]
                basenumtype = Union{CoreNumType, Main.Base.ComplexF32, Main.Base.ComplexF64, Main.Base.Rational}
                if isa(a1, basenumtype) || ((isa(a1, Symbol) || isa(a1, Slot) || isa(a1, SSAValue)) &&
                                           exprtype(a1, sv.src, sv.mod) ⊑ basenumtype)
                    if square
                        e.args = Any[GlobalRef(Main.Base,:*), a1, a1]
                        res = inlining_pass(e, sv, stmts, ins, boundscheck)
                    else
                        e.args = Any[GlobalRef(Main.Base,:*), Expr(:call, GlobalRef(Main.Base,:*), a1, a1), a1]
                        e.args[2].typ = e.typ
                        res = inlining_pass(e, sv, stmts, ins, boundscheck)
                    end
                    return res
                end
            end
        end
    end
=#
    for ninline = 1:100
        ata = Vector{Any}(uninitialized, length(e.args))
        ata[1] = ft
        for i = 2:length(e.args)
            a = exprtype(e.args[i], sv.src, sv.mod)
            (a === Bottom || isvarargtype(a)) && return e
            ata[i] = a
        end
        res = inlineable(f, ft, e, ata, stmts, boundscheck, sv)
        if isa(res,Tuple)
            if isa(res[2],Array) && !isempty(res[2])
                splice!(stmts, ins:ins-1, res[2])
                ins += length(res[2])
            end
            res = res[1]
        end

        if res !== NOT_FOUND
            # iteratively inline apply(f, tuple(...), tuple(...), ...) in order
            # to simplify long vararg lists as in multi-arg +
            if isa(res,Expr) && is_known_call(res, _apply, sv.src, sv.mod)
                e = res::Expr
                f = _apply; ft = AbstractEvalConstant(f)
            else
                return res
            end
        end

        if f === _apply
            na = length(e.args)
            newargs = Vector{Any}(uninitialized, na-2)
            newstmts = Any[]
            effect_free_upto = 0
            for i = 3:na
                aarg = e.args[i]
                argt = exprtype(aarg, sv.src, sv.mod)
                t = widenconst(argt)
                if isa(argt, Const) && (isa(argt.val, Tuple) || isa(argt.val, SimpleVector)) &&
                        effect_free(aarg, sv.src, sv.mod, true)
                    val = argt.val
                    newargs[i - 2] = Any[ quoted(val[i]) for i in 1:(length(val)::Int) ] # avoid making a tuple Generator here!
                elseif isa(aarg, Tuple) || (isa(aarg, QuoteNode) && (isa(aarg.value, Tuple) || isa(aarg.value, SimpleVector)))
                    if isa(aarg, QuoteNode)
                        aarg = aarg.value
                    end
                    newargs[i - 2] = Any[ quoted(aarg[i]) for i in 1:(length(aarg)::Int) ] # avoid making a tuple Generator here!
                elseif isa(t, DataType) && t.name === Tuple.name && !isvatuple(t) &&
                         length(t.parameters) <= sv.params.MAX_TUPLE_SPLAT
                    for k = (effect_free_upto + 1):(i - 3)
                        as = newargs[k]
                        for kk = 1:length(as)
                            ak = as[kk]
                            if !effect_free(ak, sv.src, sv.mod, true)
                                tmpv = newvar!(sv, widenconst(exprtype(ak, sv.src, sv.mod)))
                                push!(newstmts, Expr(:(=), tmpv, ak))
                                as[kk] = tmpv
                            end
                        end
                    end
                    effect_free_upto = i - 3
                    if effect_free(aarg, sv.src, sv.mod, true)
                        # apply(f,t::(x,y)) => f(t[1],t[2])
                        tmpv = aarg
                    else
                        # apply(f,t::(x,y)) => tmp=t; f(tmp[1],tmp[2])
                        tmpv = newvar!(sv, t)
                        push!(newstmts, Expr(:(=), tmpv, aarg))
                    end
                    if is_specializable_vararg_slot(aarg, sv)
                        tp = sv.vararg_type_container.parameters
                    else
                        tp = t.parameters
                    end
                    ntp = length(tp)::Int
                    if isa(aarg,SSAValue) && any(p->(p === DataType || p === UnionAll || p === Union || p === Type), tp)
                        # replace element type from Tuple{DataType} with more specific type if possible
                        def = finddef(aarg, sv.src.code)
                        if def !== nothing
                            defex = def.args[2]
                            if isa(defex, Expr) && is_known_call(defex, tuple, sv.src, sv.mod)
                                tp = collect(Any, tp)
                                for j = 1:ntp
                                    specific_type = exprtype(defex.args[j+1], sv.src, sv.mod)
                                    if (tp[j] <: Type) && specific_type ⊑ tp[j]
                                        tp[j] = specific_type
                                    end
                                end
                            end
                        end
                    end
                    fldvars = Any[ newvar!(sv, tp[j]) for j in 1:ntp ]
                    for j = 1:ntp
                        push!(newstmts, Expr(:(=), fldvars[j], mk_getfield(tmpv, j, tp[j])))
                    end
                    newargs[i - 2] = fldvars
                else
                    # not all args expandable
                    return e
                end
            end
            splice!(stmts, ins:(ins - 1), newstmts)
            ins += length(newstmts)
            e.args = [Any[e.args[2]]; newargs...]

            # now try to inline the simplified call
            ft = exprtype(e.args[1], sv.src, sv.mod)
            if isa(ft, Const)
                f = ft.val
            elseif isa(ft, Conditional)
                f = nothing
                ft = Bool
            else
                f = nothing
                if !(isconcretetype(ft) || (widenconst(ft) <: Type)) || has_free_typevars(ft)
                    # TODO: this is really aggressive at preventing inlining of closures. maybe drop `isconcretetype` requirement?
                    return e
                end
            end
        else
            return e
        end
    end
    return e
end


function add_slot!(src::CodeInfo, @nospecialize(typ), is_sa::Bool, name::Symbol=COMPILER_TEMP_SYM)
    @assert !isa(typ, Const) && !isa(typ, Conditional)
    id = length(src.slotnames) + 1
    push!(src.slotnames, name)
    push!(src.slottypes, typ)
    push!(src.slotflags, is_sa * SLOT_ASSIGNEDONCE)
    return SlotNumber(id)
end

function is_known_call(e::Expr, @nospecialize(func), src::CodeInfo, mod::Module)
    if e.head !== :call
        return false
    end
    f = exprtype(e.args[1], src, mod)
    return isa(f, Const) && f.val === func
end

function is_known_call_p(e::Expr, @nospecialize(pred), src::CodeInfo, mod::Module)
    if e.head !== :call
        return false
    end
    f = exprtype(e.args[1], src, mod)
    return (isa(f, Const) && pred(f.val)) || (isType(f) && pred(f.parameters[1]))
end

symequal(x::SSAValue, y::SSAValue) = x.id === y.id
symequal(x::Slot    , y::Slot)     = x.id === y.id
symequal(@nospecialize(x)     , @nospecialize(y))      = x === y

function void_use_elim_pass!(sv::OptimizationState)
    # Remove top level SSAValue and slots that is `!usedUndef`.
    # Also remove some `nothing` while we are at it....
    not_void_use = function (@nospecialize(ex),)
        if isa(ex, SSAValue)
            # Explicitly listed here for clarity
            return false
        elseif isa(ex, Slot)
            return sv.src.slotflags[slot_id(ex)] & SLOT_USEDUNDEF != 0
        elseif isa(ex, GlobalRef)
            ex = ex::GlobalRef
            return !isdefined(ex.mod, ex.name)
        elseif isa(ex, Expr)
            h = ex.head
            if h === :return || h === :(=) || h === :gotoifnot || is_meta_expr_head(h)
                return true
            elseif h === :isdefined || h === :copyast || h === :the_exception
                return false
            end
            return !effect_free(ex, sv.src, sv.mod, false)
        elseif (isa(ex, GotoNode) || isa(ex, LineNumberNode) ||
                isa(ex, NewvarNode) || isa(ex, Symbol) || isa(ex, LabelNode))
            # This is a list of special types handled by the compiler
            return true
        end
        return false
    end
    filter!(not_void_use, sv.src.code::Array{Any,1})
    nothing
end

function meta_elim_pass!(code::Array{Any,1}, do_coverage::Bool)
    # 1. Remove place holders
    #
    # 2. If coverage is off, remove line number nodes that don't mark any
    #    real expressions.
    #
    # 3. Remove top level SSAValue
    #

    # Position of the last line number node without any non-meta expressions
    # in between.
    prev_dbg_stack = Int[0] # always non-empty
    # Whether there's any non-meta exprs after the enclosing `push_loc`
    push_loc_pos_stack = Int[0] # always non-empty

    for i in 1:length(code)
        ex = code[i]
        if ex === nothing
            continue
        elseif isa(ex, SSAValue)
            code[i] = nothing
            continue
        elseif isa(ex, LabelNode)
            prev_dbg_stack[end] = 0
            push_loc_pos_stack[end] = 0
            continue
        elseif !do_coverage && isa(ex, LineNumberNode)
            prev_label = prev_dbg_stack[end]
            if prev_label != 0 && code[prev_label].file === ex.file
                code[prev_label] = nothing
            end
            prev_dbg_stack[end] = i
            continue
        elseif !isa(ex, Expr)
            prev_dbg_stack[end] = 0
            push_loc_pos_stack[end] = 0
            continue
        end
        ex = ex::Expr
        args = ex.args
        head = ex.head
        if head !== :meta
            prev_dbg_stack[end] = 0
            push_loc_pos_stack[end] = 0
            continue
        end
        nargs = length(args)
        if do_coverage || nargs == 0
            continue
        end
        arg1 = args[1]
        if arg1 === :push_loc
            push!(prev_dbg_stack, 0)
            push!(push_loc_pos_stack, i)
        elseif arg1 === :pop_loc
            npops = (nargs > 1 ? args[2]::Int : 1)
            for pop in 1:npops
                prev_dbg = if length(prev_dbg_stack) > 1
                    pop!(prev_dbg_stack)
                else
                    prev_dbg_stack[end]
                end
                if prev_dbg > 0
                    code[prev_dbg] = nothing
                end
                push_loc = if length(push_loc_pos_stack) > 1
                    pop!(push_loc_pos_stack)
                else
                    push_loc_pos_stack[end]
                end
                if push_loc > 0
                    code[push_loc] = nothing
                    npops -= 1
                else
                    prev_dbg_stack[end] = 0
                    push_loc_pos_stack[end] = 0
                end
            end
            if npops > 1
                code[i] = Expr(:meta, :pop_loc, npops)
            elseif npops == 1
                code[i] = Expr(:meta, :pop_loc)
            else
                code[i] = nothing
            end
        else
            continue
        end
    end

    # combine consecutive :pop_loc instructions
    lastpop = nothing
    npops = 0
    for i in 1:length(code)
        ex = code[i]
        if isa(ex, Expr) && ex.head === :meta && length(ex.args) > 0 && ex.args[1] == :pop_loc
            npops += (length(ex.args) > 1 ? ex.args[2]::Int : 1)
            if lastpop === nothing
                lastpop = i
            else
                code[i] = nothing
            end
        elseif ex !== nothing && lastpop !== nothing
            if npops > 1
                popex = code[lastpop]
                if length(popex.args) == 1
                    code[lastpop] = Expr(:meta, :pop_loc, npops)
                else
                    popex.args[2] = npops
                end
            end
            lastpop = nothing
            npops = 0
        end
    end
end

# Replace branches with constant conditions with unconditional branches
function gotoifnot_elim_pass!(sv::OptimizationState)
    body = sv.src.code
    i = 1
    while i < length(body)
        expr = body[i]
        i += 1
        isa(expr, Expr) || continue
        expr.head === :gotoifnot || continue
        cond = expr.args[1]
        condt = exprtype(cond, sv.src, sv.mod)
        isa(condt, Const) || continue
        val = (condt::Const).val
        # Codegen should emit an unreachable if val is not a Bool so
        # we don't need to do anything (also, type inference currently
        # doesn't recognize the error for strictly non-Bool condition)
        if isa(val, Bool)
            # in case there's side effects... (like raising `UndefVarError`)
            body[i - 1] = cond
            if val === false
                insert!(body, i, GotoNode(expr.args[2]))
                i += 1
            end
        end
    end
end

# basic dead-code-elimination of unreachable statements
function basic_dce_pass!(sv::OptimizationState)
    body = sv.src.code
    labelmap = get_label_map(body)
    reachable = BitSet()
    W = BitSet()
    push!(W, 1)
    while !isempty(W)
        pc = pop!(W)
        pc in reachable && continue
        push!(reachable, pc)
        expr = body[pc]
        pc´ = pc + 1 # next program-counter (after executing instruction)
        if isa(expr, GotoNode)
            pc´ = labelmap[expr.label]
        elseif isa(expr, Expr)
            if expr.head === :gotoifnot
                push!(W, labelmap[expr.args[2]::Int])
            elseif expr.head === :enter
                push!(W, labelmap[expr.args[1]::Int])
            elseif expr.head === :return
                continue
            end
        end
        pc´ <= length(body) && push!(W, pc´)
    end
    for i in 1:length(body)
        expr = body[i]
        if !(i in reachable ||
             (isa(expr, Expr) && is_meta_expr(expr)) ||
             isa(expr, LineNumberNode))
            body[i] = nothing
        end
    end
end

# Run on linear IR before the `alloc_elim_pass!` to fold away
# getfield on constant immutable objects. Any chained getfield will then be optimized
# away by the `alloc_elim_pass!`.
# Also expect undefined variable access check to be lifted into flags.
function fold_constant_getfield_pass!(sv::OptimizationState)
    body = sv.src.code
    nexpr = length(body)
    for i in 1:nexpr
        ex = body[i]
        isa(ex, Expr) || continue
        head = ex.head
        parent = body
        pidx = i
        if head === :(=)
            parent = ex.args
            pidx = 2
            ex = ex.args[2]
            isa(ex, Expr) || continue
            head = ex.head
        end
        (head === :call && 2 < length(ex.args) < 5) || continue
        is_known_call(ex, getfield, sv.src, sv.mod) || continue
        # No side effect can happen for linear IR
        obj = exprtype(ex.args[2], sv.src, sv.mod)
        isa(obj, Const) || continue
        obj = obj.val
        is_inlineable_constant(obj) || continue
        fld = ex.args[3]
        if isa(fld, Int)
            fld = fld
        elseif isa(fld, QuoteNode)
            fld = fld.value
        else
            continue
        end
        isa(fld, Symbol) || isa(fld, Int) || continue
        if isdefined(obj, fld)
            parent[pidx] = quoted(getfield(obj, fld))
        end
    end
end

function get_undef_flag_slot(src::CodeInfo, flagslots, id)
    flag_id = flagslots[id]
    flag_id != 0 && return SlotNumber(flag_id)
    slot = add_slot!(src, Nothing, src.slotflags[id] & SLOT_ASSIGNEDONCE != 0, src.slotnames[id])
    flag_id = slot_id(slot)
    src.slotflags[flag_id] |= SLOT_STATICUNDEF | SLOT_USEDUNDEF
    flagslots[id] = flag_id
    return slot
end

# Run on linear IR before the `alloc_elim_pass!` to make sure all expression arguments are
# effect-free while maintaining the original def-use chain so that assignments of allocation
# to a UsedUndef slot can still be optimized.
# After this pass, we are sure that no `Expr` arguments have side-effects.
function split_undef_flag_pass!(sv::OptimizationState)
    body = sv.src.code
    len = length(body)
    next_i = 1
    norigslots = length(sv.src.slotflags)
    flagslots = zeros(Int, norigslots)
    while next_i <= len
        i = next_i
        next_i += 1
        ex = body[i]
        if isa(ex, Slot)
            id = slot_id(ex)
            var_has_undef(sv.src, id) || continue
            body[i] = get_undef_flag_slot(sv.src, flagslots, id)
            continue
        elseif isa(ex, NewvarNode)
            id = slot_id(ex.slot)
            var_has_undef(sv.src, id) || continue
            body[i] = NewvarNode(get_undef_flag_slot(sv.src, flagslots, id))
            continue
        elseif !isa(ex, Expr)
            continue
        end
        head = ex.head
        if head === :(=)
            rhs = ex.args[2]
            lhs = ex.args[1]
            if isa(lhs, Slot)
                id = slot_id(lhs)
                if var_has_undef(sv.src, id)
                    flagslot = get_undef_flag_slot(sv.src, flagslots, id)
                    insert!(body, i + 1, :($flagslot = $nothing))
                    next_i += 1
                    len += 1
                end
            end
            if isa(rhs, Expr)
                ex = rhs
                head = ex.head
            else
                if isa(rhs, Slot)
                    id = slot_id(rhs)
                    if var_has_undef(sv.src, id)
                        insert!(body, i, get_undef_flag_slot(sv.src, flagslots, id))
                        i += 1
                        next_i += 1
                        len += 1
                    end
                end
                continue
            end
        end
        eargs = ex.args
        if head === :const || is_meta_expr_head(head)
            continue
        elseif head === :isdefined
            @assert length(eargs) == 1
            ea1 = eargs[1]
            if isa(ea1, Slot)
                id = slot_id(ea1)
                if var_has_undef(sv.src, id)
                    eargs[1] = get_undef_flag_slot(sv.src, flagslots, id)
                end
            end
            continue
        end
        isccall = head === :foreigncall
        for j in 1:length(eargs)
            ea = eargs[j]
            if isccall && isa(ea, Expr) && ea.head === :&
                ea = ea.args[1]
            end
            if isa(ea, Slot)
                id = slot_id(ea)
                if j == 1 && head === :method
                    flagslot = get_undef_flag_slot(sv.src, flagslots, id)
                    insert!(body, i + 1, :($flagslot = $nothing))
                    next_i += 1
                    len += 1
                    continue
                end
                if var_has_undef(sv.src, id)
                    insert!(body, i, get_undef_flag_slot(sv.src, flagslots, id))
                    i += 1
                    next_i += 1
                    len += 1
                end
            end
        end
    end
    for i in 1:norigslots
        if flagslots[i] != 0
            sv.src.slotflags[i] = (sv.src.slotflags[i] | SLOT_STATICUNDEF) & ~UInt8(SLOT_USEDUNDEF)
        end
    end
end


# Check if the use is still valid.
# The code that invalidate this use is responsible for adding new use(s) if any.
function check_valid(use::ValueUse, changes::IdDict)
    haskey(changes, use.stmts=>use.stmtidx) && return false
    isdefined(use, :expr) && haskey(changes, use.expr) && return false
    return true
end


# Check if the use is still valid.
# The code that invalidate this use is responsible for adding new def(s) if any.
check_valid(def::ValueDef, changes::IdDict) = !haskey(changes, def.stmts=>def.stmtidx)


function remove_invalid!(info::ValueInfo, changes::IdDict)
    if isempty(changes)
        return
    end
    cb = x->check_valid(x, changes)
    filter!(cb, info.uses)
    filter!(cb, info.defs)
    return
end

function add_def(info::ValueInfo, def::ValueDef)
    if info.defs === EMPTY_DEFS
        info.defs = [def]
    else
        push!(info.defs, def)
    end
    return
end
function add_use(info::ValueInfo, use::ValueUse)
    if info.uses === EMPTY_USES
        info.uses = [use]
    else
        push!(info.uses, use)
    end
    return
end

@inline get_info_entry(infomap::ValueInfoMap, slot::Slot) = (infomap.slots, slot_id(slot))
@inline get_info_entry(infomap::ValueInfoMap, ssa::SSAValue) = (infomap.ssas, ssa.id + 1)
@inline get_info_entry(infomap::ValueInfoMap, pair::Pair{Int,Bool}) =
    (pair.second ? infomap.ssas : infomap.slots, pair.first)
isassigned(infomap::ValueInfoMap, var) = isassigned(get_info_entry(infomap, var)...)
function delete!(infomap::ValueInfoMap, var)
    infos, idx = get_info_entry(infomap, var)
    ccall(:jl_arrayunset, Cvoid, (Any, Csize_t), infos, (idx - 1) % UInt)
end
function getindex(infomap::ValueInfoMap, var)
    infos, idx = get_info_entry(infomap, var)
    if isassigned(infos, idx)
        return infos[idx]
    else
        idx > length(infos) && resize!(infos, idx)
        info = ValueInfo()
        infos[idx] = info
        return info
    end
end
function setindex!(infomap::ValueInfoMap, info::ValueInfo, var)
    ary, idx = get_info_entry(infomap, var)
    idx > length(ary) && resize!(ary, idx)
    ary[idx] = info
end
add_def(infomap::ValueInfoMap, var, def::ValueDef) = add_def(infomap[var], def)
add_use(infomap::ValueInfoMap, var, use::ValueUse) = add_use(infomap[var], use)

@inline function var_has_static_undef(src, id, ssa=false)
    ssa && return false
    flags = src.slotflags[id]
    # The check for `UsedUndef` shouldn't be necessary but doesn't hurt
    return flags & SLOT_USEDUNDEF != 0 || flags & SLOT_STATICUNDEF != 0
end

@inline function var_has_undef(src, id, ssa=false)
    ssa && return false
    flags = src.slotflags[id]
    return flags & SLOT_USEDUNDEF != 0
end

@inline function var_is_ssa(src, id, ssa=false)
    ssa && return true
    flags = src.slotflags[id]
    # The check for `UsedUndef` shouldn't be necessary but doesn't hurt
    return flags & (SLOT_USEDUNDEF | SLOT_STATICUNDEF) == 0 && flags & SLOT_ASSIGNEDONCE != 0
end

function scan_expr_use!(infomap, body, i, ex, src)
    if isa(ex, Vector{Any})
        # Shouldn't happen but just to be safe since we treat this as nested blocks
        # in the `alloc_elim_pass!`
        body[i] = nothing
        return
    elseif isa(ex, SSAValue)
        body[i] = nothing
        return
    elseif isa(ex, Slot)
        if var_has_undef(src, slot_id(ex))
            add_use(infomap, ex, ValueUse(body, i))
        else
            body[i] = nothing
        end
        return
    elseif isa(ex, NewvarNode)
        # Clear newvarnode if the variable is never used undefined
        if var_has_undef(src, slot_id(ex.slot))
            add_def(infomap, ex.slot, ValueDef(ex, body, i))
        else
            body[i] = nothing
        end
        return
    elseif !isa(ex, Expr)
        return
    end
    head = ex.head
    is_meta_expr_head(head) && return
    if head === :(=)
        rhs = ex.args[2]
        lhs = ex.args[1]
        lhs_slot = isa(lhs, Slot)
        rhs_slot = isa(rhs, Slot)
        if lhs_slot && rhs_slot && symequal(lhs, rhs)
            # Self assignment
            if var_has_undef(src, slot_id(lhs))
                body[i] = rhs
                add_use(infomap, rhs, ValueUse(body, i))
            else
                body[i] = nothing
            end
            return
        end
        if lhs_slot || isa(lhs, SSAValue)
            add_def(infomap, lhs, ValueDef(ex, body, i))
        end
        if isa(rhs, Expr)
            ex = rhs
            head = ex.head
        else
            if rhs_slot || isa(rhs, SSAValue)
                add_use(infomap, rhs, ValueUse(body, i, ex, 2))
            end
            return
        end
    end
    eargs = ex.args
    isccall = head === :foreigncall
    for j in 1:length(eargs)
        ea = eargs[j]
        if j == 1 && head === :method
            if isa(ea, Slot)
                infomap[ea].has_method = true
                continue
            end
        end
        if isccall && isa(ea, Expr) && ea.head === :&
            ea2 = ea.args[1]
            if isa(ea2, Slot) || isa(ea2, SSAValue)
                add_use(infomap, ea2, ValueUse(body, i, ea, 1))
            end
        elseif isa(ea, Slot) || isa(ea, SSAValue)
            add_use(infomap, ea, ValueUse(body, i, ex, j))
        end
    end
end

# Collect infomation about all the defs and uses of variables in the function
# Also do simple cleanups as we do a linear scan over the code
function collect_value_infos(body::Vector{Any}, src::CodeInfo, nargs::Int)
    infomap = ValueInfoMap()
    nexpr = length(body)
    for i in 1:nexpr
        ex = body[i]
        scan_expr_use!(infomap, body, i, ex, src)
    end

    # Remove arguments from slot uses since they are useless
    slotsinfo = infomap.slots
    for i in 1:length(slotsinfo)
        if isassigned(slotsinfo, i)
            info = slotsinfo[i]
            if i > nargs && length(info.defs) == 1 && isa(info.defs[1].assign, Expr)
                src.slotflags[i] |= SLOT_ASSIGNEDONCE
            end
        end
    end

    return infomap
end

function delete_valueinfo!(ctx::AllocOptContext, key)
    # Slot
    if !key.second
        ctx.sv.src.slotflags[key.first] = 0
    end
    delete!(ctx.infomap, key)
    return
end

function add_allocopt_todo(ctx::AllocOptContext, id, is_ssa)
    ctx.todo[id=>is_ssa] = nothing
end

@inline function add_allocopt_todo(ctx::AllocOptContext, @nospecialize(var))
    if isa(var, SSAValue)
        ctx.todo[(var.id + 1)=>true] = nothing
    elseif isa(var, Pair{Int,Bool})
        ctx.todo[var] = nothing
    else
        ctx.todo[slot_id(var)=>false] = nothing
    end
end

@inline function maybe_add_allocopt_todo(ctx::AllocOptContext, @nospecialize(var))
    if isa(var, SSAValue)
        ctx.todo[(var.id + 1)=>true] = nothing
    elseif isa(var, Slot)
        ctx.todo[slot_id(var)=>false] = nothing
    end
end

function delete_value!(ctx::AllocOptContext, info, key)
    # No use is left for this value, delete the assignment
    for def in info.defs
        ctx.changes[def.assign] = nothing
        assign = def.assign
        if isa(assign, NewvarNode)
            def.stmts[def.stmtidx] = nothing
            continue
        end
        defex = assign.args[2]
        if isa(defex, SSAValue)
            add_allocopt_todo(ctx, defex)
            def.stmts[def.stmtidx] = nothing
        elseif isa(defex, Slot)
            if var_has_undef(ctx.sv.src, slot_id(defex))
                add_use(ctx.infomap, defex, ValueUse(def.stmts, def.stmtidx))
                def.stmts[def.stmtidx] = defex
            else
                add_allocopt_todo(ctx, defex)
                def.stmts[def.stmtidx] = nothing
            end
        elseif isa(defex, Vector{Any})
            def.stmts[def.stmtidx] = nothing
        else
            # Update this to add todo
            # when adding cases where this might enable further optimizations.
            def.stmts[def.stmtidx] = defex
        end
    end
    delete_valueinfo!(ctx, key)
end

function merge_value_ssa!(ctx::AllocOptContext, info, key)
    # There are other cases that we can merge
    # but those require control flow analysis.
    var_has_static_undef(ctx.sv.src, key.first, key.second) && return false
    local defkey
    for def in info.defs
        # No NewvarNode def for variables that aren't used undefined
        defex = (def.assign::Expr).args[2]
        if isa(defex, SSAValue)
            new_key = (defex.id + 1)=>true
            if @isdefined(defkey) && defkey != new_key
                return true
            else
                defkey = new_key
            end
        elseif isa(defex, Slot)
            id = slot_id(defex)
            new_key = id=>false
            defslot_is_ssa = id <= ctx.sv.nargs || var_is_ssa(ctx.sv.src, id)
            if !defslot_is_ssa || (@isdefined(defkey) && defkey != new_key)
                return true
            else
                defkey = new_key
            end
        else
            return @isdefined(defkey)
        end
    end

    if defkey.second || defkey.first > ctx.sv.nargs
        add_allocopt_todo(ctx, defkey)
    end

    # don't replace TypedSlots with SSAValues
    # TODO: introduce extra SSAValue for each differently-typed use.
    if defkey.second
        for use in info.uses
            if isdefined(use, :expr) && use.expr.args[use.exidx] isa TypedSlot
                return false
            end
        end
    end

    definfo = ctx.infomap[defkey]
    for def in info.defs
        ctx.changes[def.assign] = nothing
        def.stmts[def.stmtidx] = nothing
    end
    if defkey.second
        # SSAValue def
        replace_v = SSAValue(defkey.first - 1)
    else
        replace_v = SlotNumber(defkey.first)
    end
    for use in info.uses
        if isdefined(use, :expr)
            this_use = use.expr.args[use.exidx]
            if !defkey.second && this_use isa TypedSlot
                use.expr.args[use.exidx] = TypedSlot(replace_v.id, this_use.typ)
            else
                use.expr.args[use.exidx] = replace_v
            end
            add_use(definfo, use)
        else
            # This variable is never used undef, ignore statement level use
            use.stmts[use.stmtidx] = nothing
        end
    end
    delete_valueinfo!(ctx, key)
    return true
end

function replace_use_expr_with!(ctx::AllocOptContext, use::ValueUse, expr,
                                delete_old=true, update_var_use=false)
    # Not supported on ccall & expression
    oldexpr = use.expr
    stmt = use.stmts[use.stmtidx]::Expr
    if stmt === oldexpr
        use.stmts[use.stmtidx] = expr
        if update_var_use && (isa(expr, Slot) || isa(expr, SSAValue))
            add_use(ctx.infomap, expr, ValueUse(use.stmts, use.stmtidx))
        end
    else
        @assert stmt.head === :(=) && stmt.args[2] === oldexpr
        stmt.args[2] = expr
        if update_var_use && (isa(expr, Slot) || isa(expr, SSAValue))
            add_use(ctx.infomap, expr, ValueUse(use.stmts, use.stmtidx, stmt, 2))
        end
        maybe_add_allocopt_todo(ctx, stmt.args[1])
    end
    if delete_old
        ctx.changes[oldexpr] = nothing
    end
end

function propagate_const_def!(ctx::AllocOptContext, info, key)
    local constv
    for def in info.defs
        # No NewvarNode def for variables that aren't used undefined
        defex = (def.assign::Expr).args[2]
        # We don't want to probagate this constant since this is a token.
        isa(defex, Expr) && defex.head === :gc_preserve_begin && return false
        v = exprtype(defex, ctx.sv.src, ctx.sv.mod)
        (isa(v, Const) && is_inlineable_constant(v.val)) || return false
        v = v.val
        @isdefined(constv) && constv !== v && return false
        constv = v
    end
    for def in info.defs
        defex = (def.assign::Expr).args[2]
        if isa(defex, Expr)
            # Expr def can have side effects
            def.stmts[def.stmtidx] = defex
        else
            def.stmts[def.stmtidx] = nothing
        end
        ctx.changes[def.assign] = nothing
    end
    replace_ex = quoted(constv)
    is_immutable = !typeof(constv).mutable || isa(constv, DataType)
    for use in info.uses
        if !isdefined(use, :expr)
            use.stmts[use.stmtidx] = nothing
            continue
        elseif is_immutable
            if use.exidx == 2 && is_known_call(use.expr, getfield, ctx.sv.src, ctx.sv.mod) &&
                2 < length(use.expr.args) < 5

                fld = use.expr.args[3]
                if isa(fld, Int)
                    fld = fld
                elseif isa(fld, QuoteNode)
                    fld = fld.value
                else
                    fld = nothing
                end
                if isa(fld, Symbol) || isa(fld, Int)
                    if isdefined(constv, fld)
                        fieldv = getfield(constv, fld)
                        replace_use_expr_with!(ctx, use, quoted(fieldv))
                        continue
                    end
                end
            end
        end
        use.expr.args[use.exidx] = replace_ex
        if use.expr.head === :(=)
            maybe_add_allocopt_todo(ctx, use.expr.args[1])
        end
    end
    delete_valueinfo!(ctx, key)
    return true
end

function split_disjoint_assign!(ctx::AllocOptContext, info, key)
    key.second && return false
    isdispatchelem(widenconst(ctx.sv.src.slottypes[key.first])) && return false # no splitting can be necessary
    alltypes = IdDict()
    ndefs = length(info.defs)
    deftypes = Vector{Any}(uninitialized, ndefs)
    for i in 1:ndefs
        def = info.defs[i]
        defex = (def.assign::Expr).args[2]
        rhstyp = widenconst(exprtype(defex, ctx.sv.src, ctx.sv.mod))
        isdispatchelem(rhstyp) || return false
        alltypes[rhstyp] = nothing
        deftypes[i] = rhstyp
    end
    need_tag = false
    for use in info.uses
        usex = use.expr
        slot = usex.args[use.exidx]
        if isa(slot, TypedSlot)
            usetyp = widenconst(slot.typ)
            if isdispatchelem(usetyp)
                alltypes[usetyp] = nothing
                continue
            end
        end
        if usex.head === :(=) || usex.head === :&
            return false
        else
            ty = exprtype(usex, ctx.sv.src, ctx.sv.mod)
            if isa(ty, Const)
            elseif isa(ty, Conditional) && isa(ty.var, Slot) && slot_id(ty.var) == key.first
                if isa(ty.vtype, Const) && !isdefined(typeof(ty.vtype.val), :instance)
                    return false
                end
                need_tag = true
            else
                return false
            end
            effect_free(usex, ctx.sv.src, ctx.sv.mod, false) || return false
        end
    end
    name = ctx.sv.src.slotnames[key.first]
    flags = ctx.sv.src.slotflags[key.first]
    if need_tag
        tag_var = add_slot!(ctx.sv.src, Int, false, name)
        ctx.sv.src.slotflags[tag_var.id] = flags
    end
    for i in 1:ndefs
        def = info.defs[i]
        assign = def.assign::Expr
        defex = assign.args[2]
        rhstyp = deftypes[i]
        new_slot = alltypes[rhstyp]
        if !isa(new_slot, SlotNumber)
            new_slot = add_slot!(ctx.sv.src, rhstyp, false, name)
            ctx.sv.src.slotflags[new_slot.id] = flags | SLOT_STATICUNDEF
            alltypes[rhstyp] = new_slot
            add_allocopt_todo(ctx, new_slot)
        end
        assign.args[1] = new_slot
        if need_tag
            stmts = Any[:($tag_var = $(new_slot.id)), assign]
            add_def(ctx.infomap, tag_var, ValueDef(stmts[1], stmts, 1))
            scan_expr_use!(ctx.infomap, stmts, 2, assign, ctx.sv.src)
            ctx.changes[def.stmts=>def.stmtidx] = nothing
            def.stmts[def.stmtidx] = stmts
        else
            add_def(ctx.infomap, new_slot, def)
        end
    end
    for use in info.uses
        usex = use.expr
        slot = usex.args[use.exidx]
        if isa(slot, TypedSlot)
            usetyp = widenconst(slot.typ)
            if isdispatchelem(usetyp)
                usetyp = widenconst(slot.typ)
                new_slot = alltypes[usetyp]
                if !isa(new_slot, SlotNumber)
                    new_slot = add_slot!(ctx.sv.src, usetyp, false, name)
                    ctx.sv.src.slotflags[new_slot.id] = flags | SLOT_STATICUNDEF
                    alltypes[usetyp] = new_slot
                end
                new_slot = new_slot::SlotNumber
                use.expr.args[use.exidx] = new_slot
                add_use(ctx.infomap, new_slot, use)
                continue
            end
        end
        ty = exprtype(usex, ctx.sv.src, ctx.sv.mod)
        if isa(ty, Const) && is_inlineable_constant(ty.val)
            replace_use_expr_with!(ctx, use, quoted(ty.val))
        elseif isa(ty, Conditional)
            exprs = []
            vars = []
            for (t, v) in alltypes
                isa(v, SlotNumber) || continue
                if t ⊑ widenconst(ty.vtype)
                    new_var = newvar!(ctx.sv, Bool)
                    new_val = :($(GlobalRef(Core, :(===)))($tag_var, $(v.id)))
                    new_val.typ = Bool
                    new_ex = :($new_var = $new_val)
                    push!(exprs, new_ex)
                    push!(vars, new_var)
                    add_def(ctx.infomap, new_var, ValueDef(new_ex, exprs, length(exprs)))
                    add_use(ctx.infomap, tag_var, ValueUse(exprs, length(exprs), new_val, 2))
                end
            end
            if isempty(vars)
                replace_use_expr_with!(ctx, use, quoted(false))
            else
                var = vars[1]
                for var_i in 2:length(vars)
                    new_var = newvar!(ctx.sv, Bool)
                    new_val = :($(GlobalRef(Core.Intrinsics, :and_int))($var, $(vars[var_i])))
                    new_val.typ = Bool
                    new_ex = :($new_var = $new_val)
                    push!(exprs, new_ex)
                    add_def(ctx.infomap, new_var, ValueDef(new_ex, exprs, length(exprs)))
                    add_use(ctx.infomap, var, ValueUse(exprs, length(exprs), new_val, 2))
                    add_use(ctx.infomap, vars[var_i], ValueUse(exprs, length(exprs), new_val, 3))
                    var = new_var
                end
                replace_use_expr_with!(ctx, use, var, false)
                old_expr = use.stmts[use.stmtidx]
                push!(exprs, old_expr)
                use.stmts[use.stmtidx] = exprs
                scan_expr_use!(ctx.infomap, exprs, length(exprs), old_expr, ctx.sv.src)
                ctx.changes[use.stmts=>use.stmtidx] = nothing
            end
        end
    end
    delete_valueinfo!(ctx, key)
    return true
end

function show_info(io::IO, use::ValueUse, ctx)
    if isdefined(use, :expr)
        print(io, "ValueUse(expr=`", use.expr, "`, exidx=", use.exidx,
              ", stmts[", use.stmtidx, "]=`", use.stmts[use.stmtidx], "`)")
    else
        print(io, "ValueUse(stmts[", use.stmtidx, "]=", use.stmts[use.stmtidx], ")")
    end
    if isa(ctx, AllocOptContext) && !check_valid(use, ctx.changes)
        print(io, " Invalidated")
    end
end
function show_info(io::IO, def::ValueDef, ctx)
    print(io, "ValueDef(assign=`", def.assign, "`, stmts=..., stmtidx=", def.stmtidx, ")")
    if isa(ctx, AllocOptContext) && !check_valid(def, ctx.changes)
        print(io, " Invalidated")
    end
end
function show_info(io::IO, info::ValueInfo, ctx, prefix="")
    println(io, prefix, "ValueInfo:")
    if !isempty(info.defs)
        println(io, prefix, "  Defs:")
        for def in info.defs
            print(io, prefix, "    ")
            show_info(io, def, ctx)
            println(io)
        end
    end
    if !isempty(info.uses)
        println(io, prefix, "  Uses:")
        for use in info.uses
            print(io, prefix, "    ")
            show_info(io, use, ctx)
            println(io)
        end
    end
end
function show_info(io::IO, info::ValueInfoMap, ctx)
    for i in 1:length(info.ssas)
        isassigned(info.ssas, i) || continue
        println(io, "SSAValue(", i - 1, "):")
        show_info(io, info.ssas[i], ctx, "  ")
    end
    for i in 1:length(info.slots)
        isassigned(info.slots, i) || continue
        println(io, "Slot(", i, "):")
        show_info(io, info.slots[i], ctx, "  ")
    end
end

function structinfo_constant(ctx::AllocOptContext, @nospecialize(v), vt::DataType)
    nf = fieldcount(vt)
    if vt <: Tuple
        si = StructInfo(Vector{Any}(uninitialized, nf), Symbol[], vt, false, false)
    else
        si = StructInfo(Vector{Any}(uninitialized, nf), collect(Symbol, fieldnames(vt)),
                        vt, false, false)
    end
    for i in 1:nf
        if isdefined(v, i)
            si.defs[i] = quoted(getfield(v, i))
        else
            ctx.undef_fld[i] = nothing
            ctx.undef_fld[si.names[i]] = nothing
        end
    end
    return si
end

structinfo_tuple(ex::Expr) = StructInfo(ex.args[2:end], Symbol[], Tuple, false, false)
function structinfo_new(ctx::AllocOptContext, ex::Expr, vt::DataType)
    nf = fieldcount(vt)
    si = StructInfo(Vector{Any}(uninitialized, nf), collect(Symbol, fieldnames(vt)),
                    vt, vt.mutable, true)
    ninit = length(ex.args) - 1
    for i in 1:nf
        if i <= ninit
            si.defs[i] = ex.args[i + 1]
        else
            ft = fieldtype(vt, i)
            if isbits(ft)
                ex = Expr(:new, ft)
                ex.typ = ft
                si.defs[i] = ex
            else
                ctx.undef_fld[i] = nothing
                ctx.undef_fld[si.names[i]] = nothing
            end
        end
    end
    return si
end

function split_struct_alloc!(ctx::AllocOptContext, info, key)
    # Collect information about each struct/tuple allocation
    min_nf = typemax(Int)
    max_nf = 0
    n_immut = 0
    ndef = length(info.defs)
    empty!(ctx.sym_count)
    empty!(ctx.undef_fld)
    empty!(ctx.structinfos)
    for def in info.defs
        local defex, si, def_nf
        defex = (def.assign::Expr).args[2]
        if isa(defex, Expr)
            defex = defex::Expr
            if is_known_call(defex, tuple, ctx.sv.src, ctx.sv.mod)
                si = structinfo_tuple(defex)
            elseif defex.head === :new
                typ = widenconst(exprtype(defex, ctx.sv.src, ctx.sv.mod))
                # typ <: Tuple shouldn't happen but just in case someone generated invalid AST
                if !isa(typ, DataType) || !isdispatchelem(typ) || typ <: Tuple
                    return false
                end
                si = structinfo_new(ctx, defex, typ)
            else
                return false
            end
        else
            v = exprtype(defex, ctx.sv.src, ctx.sv.mod)
            isa(v, Const) || return false
            v = v.val
            vt = typeof(v)
            if (vt.mutable && !isa(v, DataType)) || fieldcount(vt) == 0
                # allowing DataType as immutable can change the behavior if the user calls
                # `setfield!` on it but that's invalid anyway and throwing an error is actually
                # better than letting it work.
                return false
            end
            si = structinfo_constant(ctx, v, vt)
        end
        if !si.mutable
            n_immut += 1
        end
        def_nf = length(si.defs)
        if def_nf < min_nf
            min_nf = def_nf
        end
        if def_nf > max_nf
            max_nf = def_nf
        end
        for fld_idx in 1:length(si.names)
            sym = si.names[fld_idx]
            prev_count = get(ctx.sym_count, sym, 0=>0)
            if isa(prev_count, Pair{Int,Int})
                if first(prev_count) == 0
                    ctx.sym_count[sym] = 1=>fld_idx
                elseif last(prev_count) == fld_idx
                    ctx.sym_count[sym] = (first(prev_count) + 1)=>fld_idx
                else
                    ctx.sym_count[sym] = (first(prev_count) + 1)=>[last(prev_count), fld_idx]
                end
            else
                prev_count = prev_count::Pair{Int,Vector{Int}}
                ctx.sym_count[sym] = (first(prev_count) + 1)=>push!(last(prev_count), fld_idx)
            end
        end
        push!(ctx.structinfos, si)
    end
    if ndef > 1
        allowed_idx = trues(min_nf)
        idx_count = zeros(Int, min_nf)
        for v in values(ctx.sym_count)
            # Ignore field names that will not be allowed to be optimized
            if isa(v, Pair{Int,Int})
                first(v) == ndef || continue
                if last(v) <= min_nf
                    idx_count[last(v)] += 1
                end
            else
                v = v::Pair{Int,Vector{Int}}
                first(v) == ndef || continue
                for fld_idx in last(v)
                    allowed_idx[fld_idx] = false
                end
            end
        end
        for fld_idx in 1:min_nf
            if idx_count[fld_idx] > 1
                allowed_idx[fld_idx] = false
            end
        end
    end

    # Find the set of valid operations for each kind of use
    #
    # * `isdefined`
    #
    #     Field name or index must be constant and unambiguous.
    #     Field that are known or unknown for all defs are allowed.
    #     Unambiguous means that each field must be only accessed by index or only by name
    #     that maps to the same index for all defs.
    #     Currently this requirement is raised to be only based on the def and not the use
    #
    # * `getfield`
    #
    #     Requires all conditions for `isdefined`.
    #     Field index must be within the intersect of all the defs.
    #     Field name must be known for all defs.
    #
    # * `setfield!`
    #
    #     Requires all conditions for `getfield`.
    #     Additionally require all defs to be mutable.
    #
    # * preserved objects (`GC.@preserve` and `ccall` roots)
    #
    #     No `setfield!` should be called for mutable defs on the NULL sites.
    #     This is because it's currently unclear how conditional undefined root slots
    #     can be represented. It's possible that we can just change the requirement in codegen
    #     for preserved objects.
    #     Currently also require single assignment.
    #     Lifting this requirement is certainly possible but harder to implement....

    has_preserve = false
    has_setfield_undef = false
    empty!(ctx.all_fld)
    empty!(ctx.setfield_typ)
    for use in info.uses
        isdefined(use, :expr) || continue
        local fld
        local expr = use.expr
        local head = expr.head
        if head === :isdefined
            # May or may not be needed but trivial to handle
            continue
        elseif head === :gc_preserve_begin
            if ndef > 1
                # This is certainly overkill, but too hard for initial version ;-p
                return false
            end
            has_setfield_undef && return false
            has_preserve = true
            continue
        elseif head === :foreigncall
            if ndef > 1
                # This is certainly overkill, but too hard for initial version ;-p
                return false
            end
            nccallargs = expr.args[5]::Int
            if use.exidx <= 5 + nccallargs
                return false
            end
            has_setfield_undef && return false
            has_preserve = true
            continue
        elseif head === :call
            if use.exidx != 2 || length(expr.args) < 3
                return false
            end
            fld = expr.args[3]
            if is_known_call(expr, isdefined, ctx.sv.src, ctx.sv.mod)
                use_kind = 0
            elseif is_known_call(expr, getfield, ctx.sv.src, ctx.sv.mod)
                use_kind = 1
            elseif is_known_call(expr, setfield!, ctx.sv.src, ctx.sv.mod)
                if n_immut != 0 || length(expr.args) < 4
                    return false
                end
                use_kind = 2
            else
                return false
            end
        else
            return false
        end
        if isa(fld, Int)
            fld = fld
        elseif isa(fld, QuoteNode)
            fld = fld.value
        else
            return false
        end
        if isa(fld, Int)
            if 0 < fld <= min_nf
                ndef == 1 || allowed_idx[fld] || return false
            elseif min_nf < fld <= max_nf
                return false
            elseif use_kind == 0
                # We can handle this but don't record in `all_fld`
                continue
            else
                return false
            end
        elseif isa(fld, Symbol)
            v = get(ctx.sym_count, fld, 0=>0)
            fld_count = isa(v, Pair{Int,Int}) ? first(v) : first(v::Pair{Int,Vector})
            if use_kind == 0 && fld_count == 0
                # We can handle this but don't record in `all_fld`
                continue
            elseif fld_count != ndef
                return false
            end
        else
            return false
        end
        if use_kind == 2
            if haskey(ctx.undef_fld, fld)
                has_preserve && return false
                has_setfield_undef = true
            end
            fld_typ = widenconst(exprtype(expr.args[4], ctx.sv.src, ctx.sv.mod))
            ctx.setfield_typ[fld] = Union{fld_typ, get(ctx.setfield_typ, fld, Union{})}
        end
        ctx.all_fld[fld] = nothing
    end
    if ndef == 1
        split_struct_alloc_single!(ctx, info, key, min_nf, has_preserve, has_setfield_undef)
    else
        split_struct_alloc_multi!(ctx, info, key)
    end
    delete_valueinfo!(ctx, key)
    return true
end

function split_struct_alloc_multi!(ctx::AllocOptContext, info, key)
    # If there are multiple assignments, we have to create slots for each values
    # We currently only allow ccall root with a single assignment so we only need to handle
    # isdefined, setfield! and getfield here.

    # First, assign a slot to each variable.
    # The slot types at this point is determined only by the setfield that are applied.
    # We'll include the initialization type as we go through the defs
    vars = IdDict()
    create_struct_field_slots!(ctx, key, vars)

    # Now, for each def. Assign all the slots.
    # Also `Union` the RHS of the assignments into the slot type as we scan through the defs.
    replace_struct_defs!(ctx, info, vars)

    # Finally replace all uses
    replace_struct_uses!(ctx, info, vars)
end

function replace_struct_uses!(ctx, info, vars)
    for use in info.uses
        if !isdefined(use, :expr)
            # Top level dummy use, remove
            use.stmts[use.stmtidx] = nothing
            continue
        end
        local fld
        local use_kind
        expr = use.expr
        head = expr.head
        if head === :isdefined
            replace_use_expr_with!(ctx, use, quoted(true))
            continue
        elseif head === :call
            fld = expr.args[3]
            if is_known_call(expr, isdefined, ctx.sv.src, ctx.sv.mod)
                use_kind = 0
            elseif is_known_call(expr, getfield, ctx.sv.src, ctx.sv.mod)
                use_kind = 1
            elseif is_known_call(expr, setfield!, ctx.sv.src, ctx.sv.mod)
                use_kind = 2
            end
        end
        if isa(fld, QuoteNode)
            fld = fld.value
        end
        slot_id = get(vars, fld, 0)
        if slot_id == 0
            @assert use_kind == 0
            replace_use_expr_with!(ctx, use, quoted(false))
            continue
        end

        if use_kind == 0
            # isdefined
            if haskey(ctx.undef_fld, fld)
                replace_use_expr_with!(ctx, use, SlotNumber(slot_id + 1), true, true)
            else
                replace_use_expr_with!(ctx, use, quoted(true))
            end
        elseif use_kind == 1
            # getfield
            if !haskey(ctx.undef_fld, fld)
                replace_use_expr_with!(ctx, use, SlotNumber(slot_id), true, true)
            else
                replace_var = SlotNumber(slot_id)
                flag_slot = SlotNumber(slot_id + 1)
                check_expr = Expr(:call, throw_undefreferror_ifnot, flag_slot)
                stmts = Any[check_expr]
                add_use(ctx.infomap, flag_slot, ValueUse(stmts, 1, check_expr, 2))
                stmt = use.stmts[use.stmtidx]::Expr
                if stmt !== expr
                    @assert stmt.head === :(=) && stmt.args[2] === expr
                    stmt.args[2] = replace_var
                    push!(stmts, stmt)
                    scan_expr_use!(ctx.infomap, stmts, length(stmts), stmt, ctx.sv.src)
                end
                ctx.changes[use.stmts=>use.stmtidx] = nothing
                use.stmts[use.stmtidx] = stmts
            end
        else
            @assert use_kind == 2
            # setfield!
            replace_var = SlotNumber(slot_id)
            val = expr.args[4]
            stmts = Any[]
            if haskey(ctx.undef_fld, fld)
                flag_slot = SlotNumber(slot_id + 1)
                assign_flag_ex = :($flag_slot = true)
                push!(stmts, assign_flag_ex)
                add_def(ctx.infomap, flag_slot, ValueDef(assign_flag_ex, stmts, length(stmts)))
            end
            assign_ex = :($replace_var = $val)
            push!(stmts, assign_ex)
            scan_expr_use!(ctx.infomap, stmts, length(stmts), assign_ex, ctx.sv.src)
            stmt = use.stmts[use.stmtidx]::Expr
            if stmt !== expr
                @assert stmt.head === :(=) && stmt.args[2] === expr
                extra_assign_ex = :($(stmt.args[1]) = $val)
                push!(stmts, extra_assign_ex)
                scan_expr_use!(ctx.infomap, stmts, length(stmts), extra_assign_ex, ctx.sv.src)
            end
            use.stmts[use.stmtidx] = stmts
            ctx.changes[use.stmts=>use.stmtidx] = nothing
        end
    end
end

function check_new_field_type(@nospecialize(val), @nospecialize(typ))
    if !isa(val, typ)
        ccall(:jl_type_error_new_expr, Union{}, (Any, Any), typ, val)
    end
end

function replace_struct_defs!(ctx, info, vars)
    for i in 1:length(info.defs)
        si = ctx.structinfos[i]
        has_name = !isempty(si.names)
        exprs = []
        for fld_idx in 1:length(si.defs)
            local slot_id
            local fld_def
            need_flag = false
            if isassigned(si.defs, fld_idx)
                fld_def = si.defs[fld_idx]
            end
            has_def = @isdefined(fld_def)
            # First check field name
            if has_name
                fname = si.names[fld_idx]
                if haskey(vars, fname)
                    slot_id = vars[fname]
                    need_flag = haskey(ctx.undef_fld, fname)
                end
            end
            if !@isdefined(slot_id)
                # Then check field index
                if haskey(vars, fld_idx)
                    slot_id = vars[fld_idx]
                    need_flag = haskey(ctx.undef_fld, fld_idx)
                else
                    # The field is not used and we don't need to deal with any side effects
                    if has_def
                        if si.isnew && !(exprtype(fld_def, ctx.sv.src,
                                                  ctx.sv.mod) ⊑ fieldtype(si.typ, fld_idx))
                            check_ex = Expr(:call, check_new_field_type, fld_def,
                                            quoted(fieldtype(si.typ, fld_idx)))
                            push!(exprs, check_ex)
                            scan_expr_use!(ctx.infomap, exprs, length(exprs), check_ex, ctx.sv.src)
                        else
                            maybe_add_allocopt_todo(ctx, fld_def)
                        end
                    end
                    continue
                end
            end

            if need_flag
                flag_slot = SlotNumber(slot_id + 1)
                flag_ex = :($flag_slot = $has_def)
                push!(exprs, flag_ex)
                add_def(ctx.infomap, flag_slot, ValueDef(flag_ex, exprs, length(exprs)))
            end
            if has_def
                fld_ty = widenconst(exprtype(fld_def, ctx.sv.src, ctx.sv.mod))
                if si.isnew && !(fld_ty ⊑ fieldtype(si.typ, fld_idx))
                    struct_fld_ty = fieldtype(si.typ, fld_idx)
                    check_ex = Expr(:call, check_new_field_type, fld_def,
                                    quoted(struct_fld_ty))
                    push!(exprs, check_ex)
                    scan_expr_use!(ctx.infomap, exprs, length(exprs), check_ex, ctx.sv.src)
                    fld_ty = typeintersect(struct_fld_ty, fld_ty)
                else
                    maybe_add_allocopt_todo(ctx, fld_def)
                end
                assign_ex = :($(SlotNumber(slot_id)) = $fld_def)
                push!(exprs, assign_ex)
                scan_expr_use!(ctx.infomap, exprs, length(exprs), assign_ex, ctx.sv.src)
                slot_type = ctx.sv.src.slottypes[slot_id]
                ctx.sv.src.slottypes[slot_id] = Union{slot_type,fld_ty}
            end
        end
        def = info.defs[i]
        ctx.changes[def.stmts=>def.stmtidx] = nothing
        def.stmts[def.stmtidx] = exprs
    end
end

function create_struct_field_slots!(ctx, key, vars)
    slot_flag = var_has_static_undef(ctx.sv.src, key.first, key.second) * SLOT_STATICUNDEF
    for fld in keys(ctx.all_fld)
        haskey(vars, fld) && continue
        local fldidx
        local slot_id
        slot_type = get(ctx.setfield_typ, fld, Union{})
        # We shouldn't need to check both field index and field name for undef
        # since the two should agree for unambiguous fields.
        if isa(fld, Symbol)
            slot_name = fld
            v = get(ctx.sym_count, fld, 0=>0)
            if isa(v, Pair{Int,Int}) && first(v) != 0
                # single field index
                fldidx = last(v)
                slot_type = Union{slot_type,get(ctx.setfield_typ, fldidx, Union{})}
                if haskey(vars, fldidx)
                    slot_id = first(vars[fldidx]::Int)
                    ctx.sv.src.slottypes[slot_id] = slot_type
                    ctx.sv.src.slotnames[slot_id] = slot_name
                end
            end
        else
            slot_name = :field_slot
        end

        if !@isdefined(slot_id)
            slot_id = add_slot!(ctx.sv.src, slot_type, false, slot_name).id
            add_allocopt_todo(ctx, slot_id, false)
            if haskey(ctx.undef_fld, fld)
                ctx.sv.src.slotflags[end] = SLOT_STATICUNDEF
                undef_slot = add_slot!(ctx.sv.src, Bool, false, :field_flag)
                ctx.sv.src.slotflags[end] = slot_flag
                add_allocopt_todo(ctx, undef_slot)
                @assert undef_slot.id == slot_id + 1
            else
                ctx.sv.src.slotflags[end] = slot_flag
            end
            if @isdefined(fldidx)
                vars[fldidx] = slot_id
            end
        end
        vars[fld] = slot_id
    end
end

function throw_undefreferror_ifnot(cond::Bool)
    if !cond
        throw(UndefRefError())
    end
    return
end

function split_struct_alloc_single!(ctx::AllocOptContext, info, key, nf, has_preserve,
                                    has_setfield_undef)
    # If there's only a single def, there's a lot more tricks we can do that's impossible
    # when there are multiple ones.
    #
    # 1. Using SSAValue instead of Slot if the variable itself is SSA and there isn't
    #    any setfield on the value
    # 2. Forward SSAValue or constant field value to use directly when there isn't setfield
    # 3. (For now) Splitting preserved objects
    def = info.defs[1]
    si = ctx.structinfos[1]
    if !isempty(ctx.undef_fld) && has_setfield_undef
        flag_vars = Vector{SlotNumber}(uninitialized, nf)
    end
    vars = Vector{Any}(uninitialized, nf)
    is_ssa = !var_has_static_undef(ctx.sv.src, key.first, key.second)
    def_exprs = Any[]
    if has_preserve
        preserved_vars = Any[]
    end
    # First go through each field and decide the variable name and create assignments
    # expressions.
    for i in 1:nf
        local fld_name, orig_def
        if !isempty(si.names)
            fld_name = si.names[i]
        end
        has_fld_use = haskey(ctx.all_fld, i) || (@isdefined(fld_name) &&
                                                 haskey(ctx.all_fld, fld_name))
        has_def = isassigned(si.defs, i)
        has_setfld_use = false
        if has_def
            orig_def = si.defs[i]
            field_typ = widenconst(exprtype(orig_def, ctx.sv.src, ctx.sv.mod))
            if si.isnew && !(field_typ ⊑ fieldtype(si.typ, i))
                struct_fld_ty = fieldtype(si.typ, i)
                check_ex = Expr(:call, check_new_field_type, orig_def,
                                quoted(struct_fld_ty))
                push!(def_exprs, check_ex)
                scan_expr_use!(ctx.infomap, def_exprs, length(def_exprs), check_ex, ctx.sv.src)
                field_typ = typeintersect(struct_fld_ty, field_typ)
            else
                maybe_add_allocopt_todo(ctx, orig_def)
            end
        else
            field_typ = Union{}
        end
        if has_fld_use && !isempty(ctx.setfield_typ)
            if haskey(ctx.setfield_typ, i)
                has_setfld_use = true
                field_typ = Union{field_typ,ctx.setfield_typ[i]}
            end
            if @isdefined(fld_name) && haskey(ctx.setfield_typ, fld_name)
                has_setfld_use = true
                field_typ = Union{field_typ,ctx.setfield_typ[fld_name]}
            end
        end
        if !@isdefined(fld_name)
            fld_name = :struct_field
        end
        need_preserved_root = has_preserve && !isbits(field_typ)
        local var_slot
        if !has_def
            # If there's no direct use of the field
            # (and we know that there's no use in preserved root) we can ignore this field
            # Similarly, unless someone set the field, it is always #undef and we can
            # replace all uses with that
            has_setfld_use || continue
            # OK, so someone calls setfield! on this =(
            # We need to allocate the variable **AND** the undef tag variable
            flag_slot = add_slot!(ctx.sv.src, Bool, false, fld_name)
            ctx.sv.src.slotflags[end] = is_ssa ? 0 : SLOT_STATICUNDEF
            flag_vars[i] = flag_slot
            add_allocopt_todo(ctx, flag_slot)

            var_slot = add_slot!(ctx.sv.src, field_typ, false, fld_name)
            ctx.sv.src.slotflags[end] = SLOT_STATICUNDEF
            vars[i] = var_slot
            add_allocopt_todo(ctx, var_slot)

            newvar_flag = :($flag_slot = false)
            push!(def_exprs, newvar_flag)
            add_def(ctx.infomap, flag_slot, ValueDef(newvar_flag, def_exprs, length(def_exprs)))
            continue
        elseif has_setfld_use
            var_slot = add_slot!(ctx.sv.src, field_typ, false, fld_name)
            ctx.sv.src.slotflags[end] = is_ssa ? 0 : SLOT_STATICUNDEF
            def_ex = :($var_slot = $orig_def)
            push!(def_exprs, def_ex)
            scan_expr_use!(ctx.infomap, def_exprs, length(def_exprs), def_ex, ctx.sv.src)
            vars[i] = var_slot
            add_allocopt_todo(ctx, var_slot)
        else
            # OK so no setfield
            # First check if the field was a constant
            cv = exprtype(orig_def, ctx.sv.src, ctx.sv.mod)
            if isa(cv, Const) && is_inlineable_constant(cv.val)
                vars[i] = quoted(cv.val)
                # Constants don't need to be preserved
                continue
            end
            if has_fld_use || need_preserved_root
                need_assign = true
                if is_ssa
                    if isa(orig_def, SSAValue)
                        var_slot = orig_def
                        need_assign = false
                    else
                        var_slot = newvar!(ctx.sv, field_typ)
                    end
                else
                    var_slot = add_slot!(ctx.sv.src, field_typ, false, fld_name)
                    ctx.sv.src.slotflags[end] = SLOT_STATICUNDEF
                end
                if need_assign
                    def_ex = :($var_slot = $orig_def)
                    push!(def_exprs, def_ex)
                    scan_expr_use!(ctx.infomap, def_exprs, length(def_exprs), def_ex, ctx.sv.src)
                end
                vars[i] = var_slot
                add_allocopt_todo(ctx, var_slot)
            end
            # No side effect allowed in `Expr` arguments at this point
        end
        if need_preserved_root
            push!(preserved_vars, var_slot)
        end
    end
    # OK, so now we've created all the variables and assignments needed, replace the def.
    ctx.changes[def.stmts=>def.stmtidx] = nothing
    def.stmts[def.stmtidx] = def_exprs
    # Now fix up uses =)
    for use in info.uses
        local fld
        local use_kind
        if !isdefined(use, :expr)
            # Top level dummy use, remove
            use.stmts[use.stmtidx] = nothing
            continue
        end
        expr = use.expr
        head = expr.head
        if head === :isdefined
            replace_use_expr_with!(ctx, use, quoted(true))
            continue
        elseif head === :foreigncall || head === :gc_preserve_begin
            # Splicing in the new ccall roots without moving existing other roots
            npreserved_vars = length(preserved_vars)
            if npreserved_vars == 0
                use.expr.args[use.exidx] = nothing
            else
                cvar = preserved_vars[1]
                expr.args[use.exidx] = cvar
                add_use(ctx.infomap, cvar, use)
                if npreserved_vars > 1
                    old_nargs = length(expr.args)
                    resize!(expr.args, old_nargs + npreserved_vars - 1)
                    for i in 2:npreserved_vars
                        cvar = preserved_vars[i]
                        expr.args[old_nargs + i - 1] = cvar
                        add_use(ctx.infomap, cvar, ValueUse(use.stmts, use.stmtidx,
                                                            expr, old_nargs + i - 1))
                    end
                end
            end
            continue
        elseif head === :call
            fld = expr.args[3]
            if is_known_call(expr, isdefined, ctx.sv.src, ctx.sv.mod)
                use_kind = 0
            elseif is_known_call(expr, getfield, ctx.sv.src, ctx.sv.mod)
                use_kind = 1
            elseif is_known_call(expr, setfield!, ctx.sv.src, ctx.sv.mod)
                use_kind = 2
            end
        end
        if isa(fld, QuoteNode)
            fld = fld.value
        end
        if !isa(fld, Int)
            v = get(ctx.sym_count, fld, 0=>0)::Pair{Int,Int}
            if first(v) == 0
                @assert use_kind == 0
                replace_use_expr_with!(ctx, use, quoted(false))
                continue
            end
            fld = last(v)
        end
        if fld > nf || fld <= 0
            @assert use_kind == 0
            replace_use_expr_with!(ctx, use, quoted(false))
            continue
        end
        if use_kind == 0
            # isdefined
            if isassigned(si.defs, fld)
                replace_use_expr_with!(ctx, use, quoted(true))
                continue
            end
            flag_slot = flag_vars[fld]
            replace_use_expr_with!(ctx, use, flag_slot, true, true)
        elseif use_kind == 1
            # getfield
            if isassigned(si.defs, fld)
                replace_var = vars[fld]
                replace_use_expr_with!(ctx, use, replace_var, true, true)
            elseif haskey(ctx.setfield_typ, fld) || (!isempty(si.names) &&
                                                     haskey(ctx.setfield_typ, si.names[fld]))
                replace_var = vars[fld]
                flag_slot = flag_vars[fld]
                check_expr = Expr(:call, throw_undefreferror_ifnot, flag_slot)
                stmts = Any[check_expr]
                add_use(ctx.infomap, flag_slot, ValueUse(stmts, 1, check_expr, 2))
                stmt = use.stmts[use.stmtidx]::Expr
                if stmt !== expr
                    @assert stmt.head === :(=) && stmt.args[2] === expr
                    stmt.args[2] = replace_var
                    push!(stmts, stmt)
                    scan_expr_use!(ctx.infomap, stmts, length(stmts), stmt, ctx.sv.src)
                end
                ctx.changes[use.stmts=>use.stmtidx] = nothing
                use.stmts[use.stmtidx] = stmts
            else
                throw_err = Expr(:call, GlobalRef(Core, :throw), UndefRefError())
                throw_err.typ = Union{}
                # We can't simply delete the assignment since it can break assumptions downstream
                # If we are assigning to anything, just assign the throw instead.
                stmt = use.stmts[use.stmtidx]::Expr
                if stmt === expr
                    use.stmts[use.stmtidx] = Any[throw_err]
                else
                    @assert stmt.head === :(=) && stmt.args[2] === expr
                    stmt.args[2] = throw_err
                    stmts = Any[stmt]
                    use.stmts[use.stmtidx] = stmts
                    lhs = stmt.args[1]
                    if isa(lhs, SSAValue) || isa(lhs, Slot)
                        add_def(ctx.infomap, lhs, ValueDef(stmt, stmts, 1))
                    end
                end
                ctx.changes[use.stmts=>use.stmtidx] = nothing
            end
        else
            @assert use_kind == 2
            # setfield!
            replace_var = vars[fld]
            val = expr.args[4]
            stmts = Any[]
            if !isassigned(si.defs, fld)
                flag_slot = flag_vars[fld]
                assign_flag_ex = :($flag_slot = true)
                push!(stmts, assign_flag_ex)
                add_def(ctx.infomap, flag_slot, ValueDef(assign_flag_ex, stmts, length(stmts)))
            end
            assign_ex = :($replace_var = $val)
            push!(stmts, assign_ex)
            scan_expr_use!(ctx.infomap, stmts, length(stmts), assign_ex, ctx.sv.src)
            stmt = use.stmts[use.stmtidx]::Expr
            if stmt !== expr
                @assert stmt.head === :(=) && stmt.args[2] === expr
                extra_assign_ex = :($(stmt.args[1]) = $val)
                push!(stmts, extra_assign_ex)
                scan_expr_use!(ctx.infomap, stmts, length(stmts), extra_assign_ex, ctx.sv.src)
            end
            use.stmts[use.stmtidx] = stmts
            ctx.changes[use.stmts=>use.stmtidx] = nothing
        end
    end
end

macro check_ast(ctx, ex)
    eex = esc(ex)
    ectx = esc(ctx)
    qex = QuoteNode(ex)
    quote
        ctx = $ectx
        if !$eex
            println("Check failed: ", $qex)
            println("Code:")
            println(ctx.sv.src)
            println("Value Info Map:")
            show_info(stdout, ctx.infomap, ctx)
            ccall(:abort, Union{}, ())
        end
    end
end

function verify_value_infomap(ctx::AllocOptContext)
    seen = IdDict()
    in_methods = IdDict()
    infomap = ctx.infomap
    all_stmts = IdDict()
    for i in 1:length(infomap.ssas)
        isassigned(infomap.ssas, i) || continue
        info = infomap.ssas[i]
        ssav = SSAValue(i - 1)
        @check_ast(ctx, !info.has_method)
        ndef = 0
        for def in info.defs
            check_valid(def, ctx.changes) || continue
            ndef += 1
            @check_ast(ctx, !haskey(seen, def))
            seen[def] = nothing
            all_stmts[def.stmts] = nothing

            assign = def.assign
            @check_ast(ctx, isa(assign, Expr))
            @check_ast(ctx, assign.head === :(=))
            @check_ast(ctx, def.stmts[def.stmtidx] === assign)
            @check_ast(ctx, assign.args[1] === ssav)
        end
        @check_ast(ctx, ndef <= 1)
        nuse = 0
        for use in info.uses
            check_valid(use, ctx.changes) || continue
            nuse += 1
            @check_ast(ctx, !haskey(seen, use))
            seen[use] = nothing
            all_stmts[use.stmts] = nothing
            stmt = use.stmts[use.stmtidx]
            if !isdefined(use, :expr)
                @check_ast(ctx, stmt === ssav)
            else
                expr = use.expr
                @check_ast(ctx, expr.args[use.exidx] === ssav)
                if stmt === expr
                    if expr.head === :(=)
                        @check_ast(ctx, use.exidx != 1)
                    end
                elseif expr.head === :&
                    @check_ast(ctx, use.exidx === 1)
                    ccall_expr = stmt::Expr
                    if ccall_expr.head === :(=)
                        ccall_expr = ccall_expr.args[2]::Expr
                    end
                    @check_ast(ctx, ccall_expr.head === :foreigncall)
                    nccallargs = ccall_expr.args[5]::Int
                    found_ccallarg = false
                    for argi in 6:(5 + nccallargs)
                        if ccall_expr.args[argi] === expr
                            found_ccallarg = true
                            break
                        end
                    end
                    @check_ast(ctx, found_ccallarg)
                else
                    @check_ast(ctx, stmt.head === :(=) && stmt.args[2] === expr)
                end
            end
        end
        if ndef == 0
            @check_ast(ctx, nuse == 0)
        end
    end
    for i in 1:length(infomap.slots)
        isassigned(infomap.slots, i) || continue
        info = infomap.slots[i]
        slotv = SlotNumber(i)
        if info.has_method
            in_methods[slotv] = nothing
        end
        ndef = 0
        for def in info.defs
            check_valid(def, ctx.changes) || continue
            @check_ast(ctx, !haskey(seen, def))
            seen[def] = nothing
            all_stmts[def.stmts] = nothing

            assign = def.assign
            @check_ast(ctx, def.stmts[def.stmtidx] === assign)
            if isa(assign, NewvarNode)
                @check_ast(ctx, var_has_undef(ctx.sv.src, i))
                @check_ast(ctx, assign.slot === slotv)
            else
                ndef += 1
                @check_ast(ctx, assign.head === :(=))
                @check_ast(ctx, assign.args[1] === slotv)
            end
        end
        if ctx.sv.src.slotflags[i] & SLOT_ASSIGNEDONCE != 0
            @check_ast(ctx, ndef <= 1)
        end
        for use in info.uses
            check_valid(use, ctx.changes) || continue
            @check_ast(ctx, !haskey(seen, use))
            seen[use] = nothing
            all_stmts[use.stmts] = nothing
            stmt = use.stmts[use.stmtidx]

            if !isdefined(use, :expr)
                @check_ast(ctx, symequal(stmt, slotv))
            else
                expr = use.expr
                @check_ast(ctx, symequal(expr.args[use.exidx], slotv))
                if stmt === expr
                    if expr.head === :(=)
                        @check_ast(ctx, use.exidx != 1)
                    end
                elseif expr.head === :&
                    @check_ast(ctx, use.exidx === 1)
                    ccall_expr = stmt::Expr
                    if ccall_expr.head === :(=)
                        ccall_expr = ccall_expr.args[2]::Expr
                    end
                    @check_ast(ctx, ccall_expr.head === :foreigncall)
                    nccallargs = ccall_expr.args[5]::Int
                    found_ccallarg = false
                    for argi in 6:(5 + nccallargs)
                        if ccall_expr.args[argi] === expr
                            found_ccallarg = true
                            break
                        end
                    end
                    @check_ast(ctx, found_ccallarg)
                else
                    @check_ast(ctx, stmt.head === :(=) && stmt.args[2] === expr)
                end
            end
        end
    end
    verify_value_infomap_rescan(ctx, ctx.sv.src.code, seen, in_methods, all_stmts)
    @check_ast(ctx, isempty(all_stmts))
    @check_ast(ctx, isempty(seen))
end

function verify_seen_info(ctx::AllocOptContext, seen, def_or_use)
    @check_ast(ctx, haskey(seen, def_or_use))
    delete!(seen, def_or_use)
end

function verify_value_infomap_rescan(ctx::AllocOptContext, stmts, seen, in_methods, all_stmts)
    if haskey(all_stmts, stmts)
        delete!(all_stmts, stmts)
    end
    for i in 1:length(stmts)
        ex = stmts[i]
        if isa(ex, Vector{Any})
            verify_value_infomap_rescan(ctx, ex, seen, in_methods, all_stmts)
            continue
        elseif isa(ex, SSAValue)
            verify_seen_info(ctx, seen, ValueUse(stmts, i))
            continue
        elseif isa(ex, Slot)
            verify_seen_info(ctx, seen, ValueUse(stmts, i))
            continue
        elseif isa(ex, NewvarNode)
            verify_seen_info(ctx, seen, ValueDef(ex, stmts, i))
            continue
        elseif !isa(ex, Expr)
            continue
        end
        head = ex.head
        is_meta_expr_head(head) && continue
        if head === :(=)
            rhs = ex.args[2]
            lhs = ex.args[1]
            lhs_slot = isa(lhs, Slot)
            rhs_slot = isa(rhs, Slot)
            if lhs_slot && rhs_slot
                @check_ast(ctx, !symequal(lhs, rhs))
            end
            if lhs_slot || isa(lhs, SSAValue)
                verify_seen_info(ctx, seen, ValueDef(ex, stmts, i))
            end
            if isa(rhs, Expr)
                ex = rhs
                head = ex.head
            else
                if rhs_slot || isa(rhs, SSAValue)
                    verify_seen_info(ctx, seen, ValueUse(stmts, i, ex, 2))
                end
                continue
            end
        end
        eargs = ex.args
        isccall = head === :foreigncall
        for j in 1:length(eargs)
            ea = eargs[j]
            if j == 1 && head === :method
                if isa(ea, Slot)
                    @check_ast(ctx, haskey(in_methods, SlotNumber(slot_id(ea))))
                    continue
                end
            end
            if isccall && isa(ea, Expr)
                @check_ast(ctx, ea.head === :& || ea.head === :boundscheck)
                ea2 = ea.args[1]
                if isa(ea2, Slot) || isa(ea2, SSAValue)
                    verify_seen_info(ctx, seen, ValueUse(stmts, i, ea, 1))
                end
            elseif isa(ea, Slot) || isa(ea, SSAValue)
                verify_seen_info(ctx, seen, ValueUse(stmts, i, ex, j))
            end
        end
    end
end

function optimize_value!(ctx::AllocOptContext, key)
    info = ctx.infomap[key]
    if info.has_method
        return
    end
    remove_invalid!(info, ctx.changes)
    if isempty(info.uses)
        delete_value!(ctx, info, key)
        return
    elseif isempty(info.defs)
        # Undefined but used variable these can only be variables whose uses are guarded by
        # conditions that will never be true. Ignore them for now.
        # TODO: slots introduced by inlining multiple-`return` functions might have their
        # defs removed by DCE but not be marked StaticUndef.
        #@assert ((!key.second && key.first <= ctx.sv.nargs) ||
        #         var_has_static_undef(ctx.sv.src, key.first, key.second))
        return
    elseif var_has_undef(ctx.sv.src, key.first, key.second)
        return
    end
    # Split assignments of leaftypes that do no have overlaping uses with each other
    split_disjoint_assign!(ctx, info, key) && return
    # If we've found SSAValue or Slot as def, no need to try other optimizations
    merge_value_ssa!(ctx, info, key) && return
    # Check if it's just a constant
    propagate_const_def!(ctx, info, key) && return
    # Split/eliminate allocation
    split_struct_alloc!(ctx, info, key) && return
    return
end



# Simplify the AST and eliminate unnecessary allocations
# This does the following optimizations iteratively until there's no changes to be made
# 1. Remove def of variables that is never used
# 2. Merge variables that have identical SSA definitions
# 3. Replace variables that always take the same constant definition with the constant
# 4. Deconstruct variables with defs that are only constant or inlined allocations
#    and with only known non-escape uses into its fields.
# This pass requires the IR to be linear, in particular, the only allowed nested `Expr` are
# RHS of a `Expr(:(=))` and the `Expr(:(&))` argument of a `ccall`.
function alloc_elim_pass!(sv::OptimizationState)
    body = sv.src.code
    infomap = collect_value_infos(body, sv.src, sv.nargs)
    ctx = AllocOptContext(infomap, sv)
    ENABLE_VERIFY_VALUEINFO[] && verify_value_infomap(ctx)
    while !isempty(ctx.todo)
        k, v = first(ctx.todo)
        k = k::Pair{Int,Bool}
        delete!(ctx.todo, k)
        optimize_value!(ctx, k)
        ENABLE_VERIFY_VALUEINFO[] && verify_value_infomap(ctx)
    end
    len = length(body)
    next_i = 1
    while next_i <= len
        i = next_i
        next_i += 1
        ex = body[i]
        if isa(ex, Vector{Any})
            len += length(ex) - 1
            next_i = i
            splice!(body, i, ex)
        elseif (isa(ex, Expr) && ex.head === :call && length(ex.args) == 2 &&
                ex.args[1] === throw_undefreferror_ifnot)
            len += 3
            next_i = i + 4
            flag_slot = ex.args[2]
            _growat!(body, i, 3)

            not_flag = newvar!(ctx.sv, Bool)
            not_flag_val = :($(GlobalRef(Core.Intrinsics, :not_int))($flag_slot))
            not_flag_val.typ = Bool
            body[i] = :($not_flag = $not_flag_val)

            after_lbl = genlabel(ctx.sv)
            body[i + 1] = Expr(:gotoifnot, not_flag, after_lbl.label)

            throw_err = Expr(:call, GlobalRef(Core, :throw), UndefRefError())
            throw_err.typ = Union{}
            body[i + 2] = throw_err
            body[i + 3] = after_lbl
        elseif (isa(ex, Expr) && ex.head === :call && length(ex.args) == 3 &&
                ex.args[1] === check_new_field_type)
            val_ex = ex.args[2]
            typ_ex = ex.args[3]

            len += 4
            next_i = i + 5
            _growat!(body, i, 4)

            flag = newvar!(ctx.sv, Bool)
            flag_val = :($(GlobalRef(Core, :isa))($val_ex, $typ_ex))
            flag_val.typ = Bool
            body[i] = :($flag = $flag_val)

            not_flag = newvar!(ctx.sv, Bool)
            not_flag_val = :($(GlobalRef(Core.Intrinsics, :not_int))($flag))
            not_flag_val.typ = Bool
            body[i + 1] = :($not_flag = $not_flag_val)

            after_lbl = genlabel(ctx.sv)
            body[i + 2] = Expr(:gotoifnot, not_flag, after_lbl.label)

            throw_err = Expr(:foreigncall, QuoteNode(:jl_type_error_new_expr),
                             Union{}, Core.svec(Any, Any), QuoteNode(:ccall), 2, typ_ex, val_ex)
            throw_err.typ = Union{}
            body[i + 3] = throw_err
            body[i + 4] = after_lbl
        end
    end
end


function copy_expr_in_array!(ary, seen)
    for i in 1:length(ary)
        ex = ary[i]
        isa(ex, Expr) || continue
        ex = ex::Expr
        if ex.head === :meta
            # Try to save some memory by using the same object for all `:pop_loc` meta node
            if ex !== META_POP_LOC && length(ex.args) == 1 && ex.args[1] === :pop_loc
                ary[i] = META_POP_LOC
            end
            continue # No need to copy meta expressions
        end
        if haskey(seen, ex)
            newex = Expr(ex.head)
            append!(newex.args, ex.args)
            newex.typ = ex.typ
            ary[i] = ex = newex
            # No need to add to `seen`, there's no way there can be another one of the copied
            # version in the AST....
        else
            seen[ex] = nothing
            if haskey(seen, ex.args)
                # Haven't actually seen this happen but it's pretty easy to check
                ex.args = copy(ex.args)
            else
                seen[ex.args] = nothing
            end
        end
        copy_expr_in_array!(ex.args, seen)
    end
end

# Clone expressions that appears multiple times in the code
function copy_duplicated_expr_pass!(sv::OptimizationState)
    copy_expr_in_array!(sv.src.code, IdDict())
end

# fix label numbers to always equal the statement index of the label
function reindex_labels!(sv::OptimizationState)
    body = sv.src.code
    mapping = get_label_map(body)
    for i = 1:length(body)
        el = body[i]
        # For goto and enter, the statement and the target has to be
        # both reachable or both not.
        if isa(el, LabelNode)
            labelnum = mapping[el.label]
            @assert labelnum !== 0
            body[i] = LabelNode(labelnum)
        elseif isa(el, GotoNode)
            labelnum = mapping[el.label]
            @assert labelnum !== 0
            body[i] = GotoNode(labelnum)
        elseif isa(el, Expr)
            if el.head === :gotoifnot
                labelnum = mapping[el.args[2]::Int]
                @assert labelnum !== 0
                el.args[2] = labelnum
            elseif el.head === :enter
                labelnum = mapping[el.args[1]::Int]
                @assert labelnum !== 0
                el.args[1] = labelnum
            end
        end
    end
end

function return_type(@nospecialize(f), @nospecialize(t))
    params = Params(ccall(:jl_get_tls_world_age, UInt, ()))
    rt = Union{}
    if isa(f, Builtin)
        rt = builtin_tfunction(f, Any[t.parameters...], nothing, params)
        if isa(rt, TypeVar)
            rt = rt.ub
        else
            rt = widenconst(rt)
        end
    else
        for m in _methods(f, t, -1, params.world)
            ty = typeinf_type(m[3], m[1], m[2], true, params)
            ty === nothing && return Any
            rt = tmerge(rt, ty)
            rt === Any && break
        end
    end
    return rt
end
Browse the archive

https://github.com/JuliaLang/julia