Revision 530496eee918e69b99da6319b3dfd4fcb5a0f870 authored by Fredrik Ekre on 26 April 2017, 13:23:48 UTC, committed by Fredrik Ekre on 26 April 2017, 21:42:35 UTC
1 parent 98414b9
Raw File
llvm-gcroot.cpp
// This file is a part of Julia. License is MIT: https://julialang.org/license

#define DEBUG_TYPE "lower_gcroot"
#undef DEBUG

#include "llvm-version.h"
#include <llvm/ADT/SmallBitVector.h>
#include <llvm/IR/Value.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Intrinsics.h>
#if JL_LLVM_VERSION >= 30700
#include <llvm/IR/LegacyPassManager.h>
#else
#include <llvm/PassManager.h>
#endif
#include <llvm/Support/raw_ostream.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#if JL_LLVM_VERSION >= 30600
#include <llvm/IR/DIBuilder.h>
#include <llvm/IR/DebugInfoMetadata.h>
#endif
#include <llvm/IR/MDBuilder.h>

#include "fix_llvm_assert.h"

#include <vector>
#include <queue>
#include <set>
#include <functional>
#include <utility>

#include "julia.h"

#if JL_LLVM_VERSION >= 30700
#define LLVM37_param(x) (x),
#else
#define LLVM37_param(x)
#endif

using namespace llvm;

extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false);

namespace {

#ifndef JL_NDEBUG
static struct {
    unsigned count;
    unsigned locals;
    unsigned temp;
} jl_gc_frame_stats = {0};
#endif

typedef std::pair<CallInst*, unsigned> frame_register;
struct liveness {
    typedef unsigned id;
    enum {
        // an assignment to a gcroot exists in the basic-block
        // (potentially no live-in from the predecessor basic-blocks)
        assign = 1<<0,
        // a use of a gcroot exists in the basic-block
        // (potentially a "kill" and no live-out to the successor basic-blocks)
        kill   = 1<<1,
        // the gcroot is live over the entire basic-block
        // (the assign/kill are not dominating of entry/exit)
        live   = 1<<2

        // live | kill | assign:
        //     a usage and assignment exist, but it is also live on exit,
        //     the entry liveness depends on whether a store or use is
        //     encountered first
        // live | kill:
        //     a usage exists,
        //     but the value must be live for the entire basic-block
        //     since it is not the terminal usage in the domination tree
        // kill | assign:
        //     a usage and definition exist in domination order,
        //     so the actual lifetime is only a subset of the basic-block
        // live | assign:
        //     impossible (this would be strange)
    };
};

static void tbaa_decorate_gcframe(Instruction *inst,
                                  std::set<Instruction*> &visited,
                                  MDNode *tbaa_gcframe)
{
    if (!visited.insert(inst).second)
        return;
#if JL_LLVM_VERSION >= 30500
    Value::user_iterator I = inst->user_begin(), E = inst->user_end();
#else
    Value::use_iterator I = inst->use_begin(), E = inst->use_end();
#endif
    for (;I != E;++I) {
        Instruction *user = dyn_cast<Instruction>(*I);
        if (!user) {
            continue;
        }
        else if (isa<GetElementPtrInst>(user)) {
            if (__likely(user->getOperand(0) == inst)) {
                tbaa_decorate_gcframe(user, visited, tbaa_gcframe);
            }
        }
        else if (isa<StoreInst>(user)) {
            if (user->getOperand(1) == inst) {
                user->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe);
            }
        }
        else if (isa<LoadInst>(user)) {
            user->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe);
        }
        else if (isa<BitCastInst>(user)) {
            tbaa_decorate_gcframe(user, visited, tbaa_gcframe);
        }
    }
}

static void tbaa_decorate_gcframe(Instruction *inst, MDNode *tbaa_gcframe)
{
    std::set<Instruction*> visited;
    tbaa_decorate_gcframe(inst, visited, tbaa_gcframe);
}

class JuliaGCAllocator {
public:
    JuliaGCAllocator(Function &F, CallInst *ptlsStates,
                     Type *T_pjlvalue, MDNode *tbaa) :
        F(F),
        M(*F.getParent()),
        T_int1(Type::getInt1Ty(F.getContext())),
        T_int8(Type::getInt8Ty(F.getContext())),
        T_int32(Type::getInt32Ty(F.getContext())),
        T_int64(Type::getInt64Ty(F.getContext())),
        V_null(T_pjlvalue ? Constant::getNullValue(T_pjlvalue) : nullptr),
        ptlsStates(ptlsStates),
#if JL_LLVM_VERSION >= 50000
        gcframe(ptlsStates ? new AllocaInst(T_pjlvalue, 0, ConstantInt::get(T_int32, 0)) : nullptr),
#else
        gcframe(ptlsStates ? new AllocaInst(T_pjlvalue, ConstantInt::get(T_int32, 0)) : nullptr),
#endif
        gcroot_func(M.getFunction("julia.gc_root_decl")),
        gckill_func(M.getFunction("julia.gc_root_kill")),
        jlcall_frame_func(M.getFunction("julia.jlcall_frame_decl")),
        gcroot_flush_func(M.getFunction("julia.gcroot_flush")),
        except_enter_func(M.getFunction("julia.except_enter")),
        jlleave_func(M.getFunction("jl_pop_handler")),
        tbaa_gcframe(tbaa)
    {
/* Algorithm sketch:
 *  Compute liveness for each basic block
 *    liveness computed at the basic-block level for <inst, arg-offset> pairs
 *  Propagate liveness from each basic block to its predecessors
 *  Allocate argument slot for each jlcall frame
 */
        if (gcframe) {
#ifdef JL_DEBUG_BUILD
            gcframe->setName("gcrootframe");
#endif
            gcframe->insertAfter(ptlsStates);
        }
    }

private:
    Function &F;
    Module &M;
    Type *const T_int1;
    Type *const T_int8;
    Type *const T_int32;
    Type *const T_int64;
    Value *const V_null;
    CallInst *const ptlsStates;
    AllocaInst *const gcframe;
    Function *const gcroot_func;
    Function *const gckill_func;
    Function *const jlcall_frame_func;
    Function *const gcroot_flush_func;
    Function *const except_enter_func;
    Function *const jlleave_func;
    MDNode *const tbaa_gcframe;

    Instruction *get_pgcstack(Instruction *ptlsStates);
    frame_register get_gcroot(Value *ptr);
    void collapseRedundantRoots();
    bool record_usage(CallInst *callInst,
        std::map<BasicBlock*, std::map<frame_register, liveness::id> > &bb_uses,
        std::map<BasicBlock*, SmallBitVector> &regs_used,
        unsigned &offset, bool commit=true);
    unsigned find_space_for(CallInst *callInst,
        std::map<BasicBlock*, std::map<frame_register, liveness::id> > &bb_uses,
        std::map<BasicBlock*, SmallBitVector> &regs_used);
    void rearrangeRoots();
    void lowerHandlers();
public:
    void allocate_frame();
};

struct HandlerData {
    // Pairs of <jl_pop_handle call inst>, number of pops left after popping
    // this frame.
    std::vector<std::pair<CallInst*,uint64_t>> leaves;
    // enters that are directly nested in this frame
    std::set<CallInst*> nested;
    std::unique_ptr<std::vector<CallInst*>> parent_vec;
    CallInst *parent{nullptr};
    bool processed{false};
#ifdef _COMPILER_MICROSOFT_
    // MSVC 2013 seems to call the copy version instead of the move verion
    // without this, which triggers compilation error since `std::unique_ptr`
    // is not copyable.
    HandlerData() = default;
    HandlerData(HandlerData &&other)
        : HandlerData()
    {
        operator=(std::move(other));
    }
    HandlerData(const HandlerData&) = delete;
    HandlerData &operator=(HandlerData &&other)
    {
        std::swap(leaves, other.leaves);
        std::swap(nested, other.nested);
        std::swap(parent_vec, other.parent_vec);
        std::swap(parent, other.parent);
        std::swap(processed, other.processed);
        return *this;
    }
    HandlerData &operator=(const HandlerData&) = delete;
#endif
};

void JuliaGCAllocator::lowerHandlers()
{
    if (!except_enter_func)
        return;
    auto jlenter_func = M.getNamedValue("jl_enter_handler");
    auto setjmp_func = M.getNamedValue(jl_setjmp_name);
    // Collect all exception enters
    std::map<CallInst*,HandlerData> handlers;
    for (auto &BB: F) {
        for (auto &I: BB) {
            auto call = dyn_cast<CallInst>(&I);
            if (call && call->getCalledValue() == except_enter_func) {
                handlers[call] = HandlerData();
            }
        }
    }

    if (handlers.empty())
        return;

    typedef std::map<CallInst*,HandlerData>::iterator hdlr_iter_t;
    // For each exception enter, compute the life time of the enter, find
    // the corresponding leaves and collect a list of nested exception frames.
    // This assumes the exception frames have simple structure. E.g.
    // there's no recursion and different frames do not share the same leave.
    std::function<void(hdlr_iter_t)> process_handler = [&] (hdlr_iter_t hdlr) {
        auto enter = hdlr->first;
        auto &data = hdlr->second;
        if (data.processed)
            return;
        data.processed = true;
        std::vector<BasicBlock::iterator> frontier = {
            ++BasicBlock::iterator(enter)
        };
        // Start from the enter, the enter is live until it hits a leave
        // or the control flow terminates.
        std::set<Instruction*> visited;
        auto add_inst = [&] (BasicBlock::iterator it) {
            if (visited.insert(&*it).second) {
                frontier.push_back(it);
            }
        };
        while (!frontier.empty()) {
            BasicBlock::iterator IT = frontier.back();
            Instruction *I = &*IT;
            ++IT;
            frontier.pop_back();
            if (I->isTerminator()) {
                BasicBlock *bb = I->getParent();
                for (auto S = succ_begin(bb), E = succ_end(bb); S != E; S++)
                    add_inst(S->getFirstInsertionPt());
                continue;
            }
            auto call = dyn_cast<CallInst>(I);
            if (!call) {
                add_inst(IT);
                continue;
            }
            auto callee = call->getCalledValue();
            if (callee == except_enter_func) {
                // Nested frame, make sure the frame is processed and skip to
                // its leaves.
                auto other_it = handlers.find(call);
                assert(other_it != handlers.end());
                process_handler(other_it);
                auto &other_data = other_it->second;
                data.nested.insert(call);
                // Record a reference to parent frame, there should be only
                // one parent in most cases although more than one parent is
                // also possible if LLVM split the enter call.
                if (__unlikely(other_data.parent_vec)) {
                    other_data.parent_vec->push_back(enter);
                }
                else if (__unlikely(other_data.parent)) {
                    other_data.parent_vec.reset(
                        new std::vector<CallInst*>{other_data.parent, enter});
                }
                else {
                    other_data.parent = enter;
                }
                for (auto leave: other_it->second.leaves) {
                    if (leave.second > 0) {
                        data.leaves.push_back(std::make_pair(leave.first,
                                                             leave.second - 1));
                    }
                    else {
                        add_inst(++BasicBlock::iterator(leave.first));
                    }
                }
            }
            else if (callee == jlleave_func) {
                // Leave, record how many pops are left on this leave.
                assert(call->getNumArgOperands() == 1);
                auto arg = cast<ConstantInt>(call->getArgOperand(0));
                uint64_t pop_count = arg->getLimitedValue();
                assert(pop_count >= 1);
                data.leaves.push_back(std::make_pair(call, pop_count - 1));
            }
            else {
                // Otherwise, go to the next instruction.
                add_inst(IT);
            }
        }
    };

    // Make sure all frames are processed.
    for (auto IT = handlers.begin(), E = handlers.end(); IT != E; ++IT)
        process_handler(IT);

    std::set<CallInst*> processing;
    Value *handler_sz = ConstantInt::get(T_int32, sizeof(jl_handler_t));
    // Now allocate the stack slots.
    // At each iteration, we allocate a new handler and assign all the remaining
    // frames that don't have a non-processed child to this handler.
    Instruction *firstInst = &F.getEntryBlock().front();
    while (!handlers.empty()) {
        processing.clear();
#if JL_LLVM_VERSION >= 50000
        auto buff = new AllocaInst(T_int8, 0, handler_sz, "", firstInst);
#else
        auto buff = new AllocaInst(T_int8, handler_sz, "", firstInst);
#endif
        buff->setAlignment(16);
        // Collect the list of frames to process.
        for (auto &hdlr: handlers) {
            auto enter = hdlr.first;
            auto &nested = hdlr.second.nested;
            if (nested.empty()) {
                processing.insert(enter);
            }
        }
        // There shouldn't be loops so there has to be leafs
        assert(!processing.empty());
        for (auto enter: processing) {
            // Lower the enter
            auto new_enter = CallInst::Create(jlenter_func, buff, "", enter);
#ifndef _OS_WINDOWS_
            // For LLVM 3.3 compatibility
            Value *args[] = {buff, ConstantInt::get(T_int32,0)};
            auto sj = CallInst::Create(setjmp_func, args, "", enter);
#else
            auto sj = CallInst::Create(setjmp_func, buff, "", enter);
#endif
            // We need to mark this on the call site as well. See issue #6757
            sj->setCanReturnTwice();
#if JL_LLVM_VERSION >= 30600
            if (auto dbg = enter->getMetadata(LLVMContext::MD_dbg)) {
                new_enter->setMetadata(LLVMContext::MD_dbg, dbg);
                sj->setMetadata(LLVMContext::MD_dbg, dbg);
            }
#else
            (void)new_enter;
#endif
            // Remove it from all the parents.
            auto it = handlers.find(enter);
            if (__unlikely(it->second.parent_vec)) {
                for (auto parent: *it->second.parent_vec) {
                    handlers.find(parent)->second.nested.erase(enter);
                }
            }
            else if (it->second.parent) {
                handlers.find(it->second.parent)->second.nested.erase(enter);
            }
            // Delete it.
            handlers.erase(it);
            enter->replaceAllUsesWith(sj);
            enter->eraseFromParent();
        }
    }
}

Instruction *JuliaGCAllocator::get_pgcstack(Instruction *ptlsStates)
{
    Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*));
    return GetElementPtrInst::Create(LLVM37_param(NULL)
                                     ptlsStates,
                                     ArrayRef<Value*>(offset),
                                     "jl_pgcstack");
}

frame_register JuliaGCAllocator::get_gcroot(Value *ptr)
{
    frame_register frame;
    frame.first = dyn_cast<CallInst>(ptr);
    frame.second = 0;
    if (frame.first == NULL) {
        // also try to look through GEP for jlcall_frame_func
        if (GetElementPtrInst *gepInst = dyn_cast<GetElementPtrInst>(ptr)) {
            if (gepInst->getNumIndices() == 1) {
                frame.first = dyn_cast<CallInst>(gepInst->getPointerOperand());
                if (frame.first && frame.first->getCalledValue() == jlcall_frame_func)
                    frame.second = cast<ConstantInt>(gepInst->idx_begin()->get())->getZExtValue();
                else
                    frame.first = NULL;
            }
        }
    }
    return frame;
}

void JuliaGCAllocator::collapseRedundantRoots()
{
    for (BasicBlock::iterator I = gcframe->getParent()->begin(), E(gcframe); I != E; ) {
        CallInst* callInst = dyn_cast<CallInst>(&*I);
        ++I;
        if (callInst && callInst->getCalledValue() == gcroot_func) {
            // see if a root is only used briefly for `store -> load -> store other` pattern or `store, store other`
            // such that the first store can be trivially replaced with just "other" and delete the chain
            // or if is used for store, but the value is never needed
            StoreInst *theStore = NULL;
            unsigned n_stores = 0;
            bool variable_slot = true; // whether this gc-root is only used as a variable-slot; e.g. whether theLoad is theValue
            LoadInst *theLoad = NULL;
            for (User::use_iterator use = callInst->use_begin(), usee = callInst->use_end(); use != usee; ) {
#if JL_LLVM_VERSION >= 30500
                User *user = use->getUser();
#else
                User *user = use.getUse().getUser();
#endif
                ++use;
                if (StoreInst *storeInst = dyn_cast<StoreInst>(user)) {
                    if (n_stores == 0)
                        theStore = storeInst;
                    else
                        theStore = NULL;
                    Value *theValue = storeInst->getValueOperand();
                    if (!theValue->hasOneUse()) { // not just the store
                        variable_slot = false; // this gc-root is used as more than just a variable-slot (aka phi node)
                    }
                    n_stores++;
                }
                else if (LoadInst *loadInst = dyn_cast<LoadInst>(user)) {
                    if (loadInst->use_empty()) {
                        // dead load?
                        loadInst->eraseFromParent();
                    }
                    else {
                        if (theLoad) {
                            // multiple live loads, this is hard to optimize, so skip it
                            n_stores = 0;
                            break;
                        }
                        theLoad = loadInst;
                    }
                }
                else {
                    // what is this? oh, well. skip trying to optimize this gc-root
                    n_stores = 0;
                    break;
                }
            }

            if (n_stores == 0)
                continue;

            if (theLoad == NULL) {
                // this gc-root is never loaded from, so we don't need it as a variable location
                // delete any stores to this gc-root that would be keeping an otherwise-unused value alive
                for (User::use_iterator use = callInst->use_begin(), usee = callInst->use_end(); use != usee; ) {
#if JL_LLVM_VERSION >= 30500
                    User *user = use->getUser();
#else
                    User *user = use.getUse().getUser();
#endif
                    StoreInst *theStore = cast<StoreInst>(user);
                    ++use;
                    Value *theValue = theStore->getValueOperand();
                    if (theValue->hasOneUse()) { // just the store
                        if (&*I == theStore) ++I;
                        theStore->eraseFromParent();
                    }
                }
                if (callInst->use_empty()) {
                    callInst->eraseFromParent();
                    continue;
                }
                else if (callInst->hasOneUse()) {
                    User::use_iterator use = callInst->use_begin();
#if JL_LLVM_VERSION >= 30500
                    theStore = cast<StoreInst>(use->getUser());
#else
                    theStore = cast<StoreInst>(use.getUse().getUser());
#endif
                }
            }

            if ((theLoad != NULL && variable_slot) ||
                (theLoad == NULL && theStore != NULL)) {
                Value *theValue = theLoad ? theLoad : theStore->getValueOperand();
                if (theValue->hasNUses(theLoad ? 1 : 2)) { // only uses are theStore and theLoad and theOther
                    // check if this value is only used for a store to another gcroot
                    User::use_iterator value_use = theValue->use_begin();
                    if (theLoad && *value_use == theStore)
                        ++value_use;
#if JL_LLVM_VERSION >= 30500
                    StoreInst *theOther = dyn_cast<StoreInst>(value_use->getUser());
                    unsigned OperandNo = value_use->getOperandNo();
#else
                    StoreInst *theOther = dyn_cast<StoreInst>(value_use.getUse().getUser());
                    unsigned OperandNo = value_use.getOperandNo();
#endif
                    if (theOther && OperandNo != StoreInst::getPointerOperandIndex()) {
                        // test whether this store is valid as a gc-root
                        bool patternMatchSuccess = false;
                        frame_register gcroot_other_gep = get_gcroot(theOther->getPointerOperand());
                        CallInst *gcroot_other = gcroot_other_gep.first;
                        // it could be a gcroot...
                        if (gcroot_other && gcroot_other->getCalledValue() == gcroot_func && theStore != NULL) {
                            // need to make sure there aren't any other uses of gcroot_other (including gckill)
                            // between the initial store and the replacement store
                            // TODO: do this better once we have liveness information for locals?
                            BasicBlock *current = theStore->getParent();
                            BasicBlock::iterator bbi(theStore);
                            BasicBlock::iterator bbi_end = current->end();
                            patternMatchSuccess = true;
                            ++bbi;
                            while (patternMatchSuccess) {
                                Instruction *inst = &*bbi;
                                if (inst == theOther) {
                                    break; // success
                                }
                                for (Instruction::op_iterator op = inst->op_begin(), op_e = inst->op_end(); op != op_e; ++op) {
                                    if (op->get() == gcroot_other) {
                                        patternMatchSuccess = false;
                                        break; // fail: gcroot_other had another reference, can't make this replacement
                                    }
                                }
                                if (++bbi == bbi_end) {
                                    // iterate the basicblock forward, if it's a simple branch
#if JL_LLVM_VERSION >= 30500
                                    BasicBlock *next = current->getUniqueSuccessor();
#else
                                    succ_iterator SI = succ_begin(current), E = succ_end(current);
                                    BasicBlock *next = NULL;
                                    if (SI != E) {
                                        next = *SI;
                                        for (++SI; SI != E; ++SI) {
                                            if (*SI != next) {
                                                next = NULL;
                                                break;
                                            }
                                        }
                                    }
#endif
                                    if (next) {
                                        bbi = next->begin();
                                        bbi_end = next->end();
                                        current = next;
                                    }
                                    else {
                                        patternMatchSuccess = false;
                                    }
                                }
                            }
                        }
                        // ...or it could be a jlcall frame
                        else if (gcroot_other && gcroot_other->getCalledValue() == jlcall_frame_func) {
                            // jlcall_frame_func slots are effectively SSA,
                            // so it's always safe to merge an earlier store into it
                            // but do need to update liveness information for this slot
                            // TODO: do this better once we have liveness information for locals?
                            if (theStore != NULL && theOther->getParent() == theStore->getParent()) {
                                //unsigned arg_offset = gcroot_other_gep->second;
                                //frame_register def(gcroot_other, arg_offset);
                                //std::map<frame_register, liveness::id> &inuse_list = bb_uses[theOther->getParent()];
                                //std::map<frame_register, liveness::id>::iterator inuse_reg = inuse_list.find(def);
                                patternMatchSuccess = true;
                            }
                        }
                        if (patternMatchSuccess) {
                            // do the gcroot merge -- replace gcroot with gcroot_other in all the store operations for this gcroot
                            // so that theOther, theLoad, and this gcroot are no longer needed
                            Value *gcroot_other = theOther->getPointerOperand();
                            if (&*I == theOther) ++I;
                            theOther->eraseFromParent();
                            if (theLoad) {
                                if (&*I == theLoad) ++I;
                                theLoad->eraseFromParent();
                            }
                            if (theStore) {
                                theStore->setOperand(StoreInst::getPointerOperandIndex(), gcroot_other);
                            }
                            else {
                                for (User::use_iterator use = callInst->use_begin(), usee = callInst->use_end(); use != usee; ) {
#if JL_LLVM_VERSION >= 30500
                                    User *user = use->getUser();
#else
                                    User *user = use.getUse().getUser();
#endif
                                    ++use;
                                    StoreInst *theStore = cast<StoreInst>(user);
                                    theStore->setOperand(StoreInst::getPointerOperandIndex(), gcroot_other);
                                }
                            }
                            callInst->eraseFromParent();
                        }
                    }
                }
            }
        }
    }
}

bool JuliaGCAllocator::record_usage(CallInst *callInst,
        std::map<BasicBlock*, std::map<frame_register, liveness::id> > &bb_uses,
        std::map<BasicBlock*, SmallBitVector> &regs_used,
        unsigned &offset, bool commit)
{
/* record-usage(inst, bb-uses, regs-used, offset, commit=true)
 *     for (arg-offset, operand) in enumerate(arguments(inst))
 *         reg = <inst, arg-offset>
 *         for (bb, liveness) in bb-uses
 *             if not reg in liveness
 *                 continue
 *             # TODO: optimize better if liveness[reg] doesn't contain live
 *             conflict = regs-used[bb][offset + arg-offset]
 *             if commit
 *                 assert !conflict
 *                 regs-used[bb][offset + arg-offset] = true
 *             else if conflict
 *                 return false
 *     return true
 */
    unsigned arg_n = cast<ConstantInt>(callInst->getArgOperand(0))->getZExtValue();
#if 0 // suboptimal allocator that ignores computed liveness data
    {
        SmallBitVector &regs = regs_used[&callInst->getParent()->getParent()->getEntryBlock()];
        if (offset + arg_n > regs.size())
            regs.resize(offset + arg_n);
        for (unsigned arg_offset = 0; arg_offset < arg_n; ++arg_offset) {
            frame_register def(callInst, arg_offset);
#else // }} better allocator that uses per-basicblock liveness
    for (std::map<BasicBlock*, std::map<frame_register, liveness::id> >::iterator
            live_reg = bb_uses.begin(), e = bb_uses.end(); live_reg != e; ++live_reg) {
        BasicBlock *bb = live_reg->first;
        SmallBitVector &regs = regs_used[bb];
        if (offset + arg_n > regs.size())
            regs.resize(offset + arg_n);
        for (unsigned arg_offset = 0; arg_offset < arg_n; ++arg_offset) {
            frame_register def(callInst, arg_offset);
            std::map<frame_register, liveness::id>::iterator inuse_reg = live_reg->second.find(def);
            if (inuse_reg == live_reg->second.end())
                continue;
            // TODO: optimize here better when not live in inuse_reg->second, by ascertaining liveness at the instruction level for this bb
#endif
            unsigned index = offset + arg_offset;
            bool conflict = regs.test(index);
            if (commit) {
                assert(!conflict);
                regs.set(index);
            }
            else if (conflict) {
                // update the offset argument to point to the next open register beyond index
                // to help avoid unnecessary work and accelerate the search
                ++offset;
                while (offset + arg_offset < regs.size() && regs.test(offset + arg_offset))
                    ++offset;
                return false;
            }
        }
    }
    return true;
}

unsigned JuliaGCAllocator::find_space_for(CallInst *callInst,
        std::map<BasicBlock*, std::map<frame_register, liveness::id> > &bb_uses,
        std::map<BasicBlock*, SmallBitVector> &regs_used)
{
/* find-space-for(inst, bb-uses, regs-used)
 *     n = 0
 *     while !record-usage(inst, bb-uses, regs-used, n, false)
 *         n++
 *     return n
 *
 */
    unsigned n = 0;
    while (!record_usage(callInst, bb_uses, regs_used, n, false)) { }
    return n;
}

void JuliaGCAllocator::rearrangeRoots()
{
    for (auto BB = F.begin(), E(F.end()); BB != E; BB++) {
        auto terminst = BB->getTerminator();
        if (!isa<ReturnInst>(terminst) && !isa<UnreachableInst>(terminst))
            continue;
        SmallVector<StoreInst*, 16> toRemove;
        for (auto I = BB->rbegin(), E(BB->rend()); I != E; ++I) {
            // Only handle the simplest case for now, give up if there's a call
            // or load from the GC frame.
            // (Assume we don't have loads that can alias GC frame
            // unless the source address is a `julia.gc_root_decl`)
            Instruction *inst = &*I;
            if (isa<CallInst>(inst))
                break;
            if (LoadInst *loadInst = dyn_cast<LoadInst>(inst)) {
                CallInst *loadAddr =
                    dyn_cast<CallInst>(loadInst->getPointerOperand());
                if (loadAddr && loadAddr->getCalledValue() == gcroot_func)
                    break;
                continue;
            }
            if (StoreInst *storeInst = dyn_cast<StoreInst>(inst)) {
                CallInst *storeAddr =
                    dyn_cast<CallInst>(storeInst->getPointerOperand());
                if (storeAddr && storeAddr->getCalledValue() == gcroot_func)
                    toRemove.push_back(storeInst);
                continue;
            }
        }
        for (auto inst: toRemove) {
            CallInst *decl = cast<CallInst>(inst->getPointerOperand());
            inst->eraseFromParent();
            // TODO removing unused slot should probably be handled later
            // when we allocate the frame
            if (decl->use_empty()) {
                decl->eraseFromParent();
            }
        }
    }
}

void JuliaGCAllocator::allocate_frame()
{
    lowerHandlers();
    if (!ptlsStates)
        return;
    Instruction *last_gcframe_inst = gcframe;
    collapseRedundantRoots();
    rearrangeRoots();

/* # initialize the kill BasicBlock of all jlcall-frames
 * bb-uses : map<BB, map< pair<inst, arg-offset>, assign|live|kill > >
 * for inst in gc-frame(f)
 *     if inst match "a call to make-jlcall-frame"
 *         kill-use = get-unique-use<isa-CallInst>(inst)
 *         bb-uses[bb][<def, 1:nargs>] = kill
 */
    std::map<BasicBlock*, std::map<frame_register, liveness::id> > bb_uses;
    std::priority_queue< std::pair<unsigned, CallInst*> > frames;
    for (BasicBlock::iterator I = gcframe->getParent()->begin(), E(gcframe); I != E; ) {
        CallInst* callInst = dyn_cast<CallInst>(&*I);
        ++I;
        if (callInst && callInst->getCalledValue() == jlcall_frame_func) {
            BasicBlock *bb = NULL;
            unsigned arg_n = cast<ConstantInt>(callInst->getArgOperand(0))->getZExtValue();
            frames.push(std::make_pair(arg_n, callInst));
            // the jlcall frame should have been passed to exactly one call (the jlcall) -- find its basic-block
            for (User::use_iterator use = callInst->use_begin(), usee = callInst->use_end(); use != usee; ++use) {
#if JL_LLVM_VERSION >= 30500
                User *user = use->getUser();
#else
                User *user = use.getUse().getUser();
#endif
                if (CallInst *callInst = dyn_cast<CallInst>(user)) {
                    assert(bb == NULL);
                    bb = callInst->getParent();
#ifdef JL_NDEBUG
                    break;
#endif
                }
            }
            assert(bb != NULL);
            std::map<frame_register, liveness::id> &inuse_list = bb_uses[bb];
            for (unsigned arg_offset = 0; arg_offset < arg_n; ++arg_offset) {
                inuse_list[frame_register(callInst, arg_offset)] = liveness::kill;
            }
        }
    }

/* # initialize the dataflow queue for tracking liveness
 * bb-queue : queue<BB>
 * for bb in iterator(f)
 *     inuse-list = &bb-uses[bb]
 *     for inst in reverse-iterator(f)
 *         if inst matches store-inst # todo: or inst matches "gc-store-inst" (for stores to non-stack-slots)
 *             if inst->operand(0) matches "a call to make-jlcall-frame" (or gep thereof)
 *                 def = <inst, arg-offset>
 *                 if inuse-list[def] is kill
 *                     inuse-list[def] = assign|kill
 *      if not has-live-out(bb)
 *          continue
 *      for pred in predecessors(bb)
 *          if not pred in bb-queue
 *              push-back(bb-queue, pred)
 */
    std::vector<BasicBlock*> bb_queue;
    for (std::map<BasicBlock*, std::map<frame_register, liveness::id> >::iterator
            live_reg = bb_uses.begin(), e = bb_uses.end(); live_reg != e; ++live_reg) {
        BasicBlock *bb = live_reg->first;
        std::map<frame_register, liveness::id> &inuse_list = live_reg->second;
        unsigned live_out = inuse_list.size();

        for (BasicBlock::iterator ri = bb->end(); ri != bb->begin(); ) {
            Instruction *i = &*--ri;
            if (StoreInst *storeInst = dyn_cast<StoreInst>(i)) {
                frame_register def = get_gcroot(storeInst->getPointerOperand());
                if (CallInst *callInst = def.first) {
                    if (callInst->getCalledValue() == jlcall_frame_func) {
                        std::map<frame_register, liveness::id>::iterator inuse_reg = inuse_list.find(def);
                        if (inuse_reg != inuse_list.end() && inuse_reg->second == liveness::kill) {
                            inuse_reg->second |= liveness::assign;
                            --live_out;
                        }
                    }
                }
            }
        }
        if (live_out == 0)
            continue;
        assert(&*bb != &F.getEntryBlock()); // only undef variables should live-out from the entry bb
        for (pred_iterator PI = pred_begin(bb), PE = pred_end(bb); PI != PE; ++PI) {
            if (std::find(bb_queue.begin(), bb_queue.end(), *PI) == bb_queue.end())
                bb_queue.push_back(*PI);
        }
    }


/* # follow liveness information flow until termination
 * while not empty(bb-queue)
 *     bb = pop(bb-queue)
 *     inuse-list = &bb-uses[bb]
 *     changes = 0
 *     for succ in successors(bb)
 *         for <def, op> in bb-uses[succ]
 *             if (not assign in op) and not (inuse-list[def] contains live or assign)
 *                 # need to add live value from successor to current block, unless it was already marked
 *                 inuse-list[def] |= live
 *                 changes += 1
 *     for inst in iterator(bb)
 *         if inst matches store-inst # todo: or inst matches "gc-store-inst" (for stores to non-stack-slots)
 *             if inst->operand(0) matches "a call to make-jlcall-frame" (or gep thereof)
 *                 def = <inst, arg-offset>
 *                 if live in inuse-list[def]
 *                     inuse-list[def] |= assign
 *                     if not kill in inuse-list[def]
 *                          # found the assignment, def is no longer live
 *                          inuse-list[def] &= ~live
 *                     else
 *                          # not a true kill due to recursion -- the kill happened before this assign in this BB, so it is still live
 *                     changes -= 1
 *     # if the live list changed, make sure all predecessors are in the queue to be reanalyzed
 *     if changes == 0
 *         continue
 *     for pred in predecessors(bb)
 *         if not pred in bb-queue
 *             push-back(bb-queue, pred)
 */

    while (!bb_queue.empty()) {
        BasicBlock *bb = bb_queue.back();
        bb_queue.pop_back();
        std::map<frame_register, liveness::id> &inuse_list = bb_uses[bb];
        unsigned changes = 0;
        for (succ_iterator SI = succ_begin(bb), SE = succ_end(bb); SI != SE; ++SI) {
            std::map<frame_register, liveness::id> &succ_uses = bb_uses[*SI];
            for (std::map<frame_register, liveness::id>::iterator reg = succ_uses.begin(), rege = succ_uses.end(); reg != rege; ++reg) {
                if (!(reg->second & liveness::assign)) {
                    liveness::id &live = inuse_list[reg->first];
                    if (!(live & (liveness::live | liveness::assign))) {
                        live |= liveness::live;
                        ++changes;
                    }
                }
            }
        }
        if (!changes) // short-circuit
            continue;
        for (BasicBlock::iterator i = bb->begin(), ie = bb->end(); i != ie; ++i) {
            if (StoreInst *storeInst = dyn_cast<StoreInst>(&*i)) {
                frame_register def = get_gcroot(storeInst->getPointerOperand());
                if (CallInst *callInst = def.first) {
                    if (callInst->getCalledValue() == jlcall_frame_func) {
                        std::map<frame_register, liveness::id>::iterator inuse_reg = inuse_list.find(def);
                        if (inuse_reg != inuse_list.end() && (inuse_reg->second & liveness::live)) {
                            inuse_reg->second |= liveness::assign;
                            if (!(inuse_reg->second & liveness::kill))
                                inuse_reg->second &= ~liveness::live;
                            --changes;
                        }
                    }
                }
            }
        }
        if (!changes)
            continue;
        assert(bb != &F.getEntryBlock()); // only undef variables should live-out from the entry bb
        for (pred_iterator PI = pred_begin(bb), PE = pred_end(bb); PI != PE; ++PI) {
            if (std::find(bb_queue.begin(), bb_queue.end(), *PI) == bb_queue.end())
                bb_queue.push_back(*PI);
        }
    }

/* # allocate space in locals for the variables
 * TBD
 */

/* # allocate space in temp-args for each jlcall frame
 * regs-used = zip(get-basic-blocks(), falses)
 * for <frame-size, inst> in frames
 *     frame-offset = find-space-for(inst, bb-uses, regs-used)
 *     record-usage(inst, bb-uses, regs-used, frame-offset)
 * # frame iterator allocates space in reverse size order
 * # so that the large frames get allocated first
 * # and the smaller frames just fill in the gaps
 * # I believe this is likely to give good results (compact gc-frames)
 */
    std::map<BasicBlock*, SmallBitVector> regs_used;
    std::map<CallInst*, unsigned> frame_offsets;
    unsigned maxDepth = 0;
    for (; !frames.empty(); frames.pop()) {
        std::pair<unsigned, CallInst*> frame = frames.top();
        unsigned arg_n = frame.first;
        if (arg_n == 0) continue;
        CallInst *callInst = frame.second;
        unsigned frame_offset = find_space_for(callInst, bb_uses, regs_used);
        record_usage(callInst, bb_uses, regs_used, frame_offset);
        frame_offsets[callInst] = frame_offset;
        if (frame_offset + arg_n > maxDepth)
            maxDepth = frame_offset + arg_n;
    }

/* # cleanup and finalize the IR */
    for (Function::iterator bb = F.begin(), be = F.end(); bb != be; ++bb) {
        for (BasicBlock::iterator i = bb->begin(), ie = bb->end(); i != ie; ) {
            Instruction *inst = &*i;
            ++i;
            // delete the now unused gckill information
            if (CallInst* callInst = dyn_cast<CallInst>(inst)) {
                Value *callee = callInst->getCalledValue();
                if (callee == gckill_func || callee == gcroot_flush_func) {
                    callInst->eraseFromParent();
                }
            }
            // delete any StoreInst to a gcframe slot that isn't live
            else if (StoreInst *storeInst = dyn_cast<StoreInst>(inst)) {
                frame_register def = get_gcroot(storeInst->getPointerOperand());
                if (CallInst *gcroot = def.first) {
                    if (gcroot->getCalledValue() == jlcall_frame_func) {
                        std::map<frame_register, liveness::id> &inuse_list = bb_uses[storeInst->getParent()];
                        std::map<frame_register, liveness::id>::iterator inuse_reg = inuse_list.find(def);
                        if (inuse_reg == inuse_list.end())
                            storeInst->eraseFromParent();
                    }
                }
            }
        }
    }

    Instruction *tempSlot;
    if (frame_offsets.empty()) {
        tempSlot = NULL;
    }
    else {
        tempSlot = GetElementPtrInst::Create(LLVM37_param(NULL) gcframe, ArrayRef<Value*>(ConstantInt::get(T_int32, 2)));
#ifdef JL_DEBUG_BUILD
        tempSlot->setName("temproots");
#endif
        tempSlot->insertAfter(gcframe);
        if (last_gcframe_inst == gcframe)
            last_gcframe_inst = tempSlot;

        // finalize all of the jlcall frames by replacing all of the frames with the appropriate gep(tempslot)
        for (std::map<CallInst*, unsigned>::iterator frame = frame_offsets.begin(), framee = frame_offsets.end(); frame != framee; ++frame) {
            CallInst *gcroot = frame->first;
            tbaa_decorate_gcframe(gcroot, tbaa_gcframe);
            Value* offset[1] = {ConstantInt::get(T_int32, frame->second)};
            GetElementPtrInst *gep = GetElementPtrInst::Create(LLVM37_param(NULL) tempSlot, makeArrayRef(offset));
            gep->insertAfter(last_gcframe_inst);
            gcroot->replaceAllUsesWith(gep);
            gep->takeName(gcroot);
            gcroot->eraseFromParent();
            last_gcframe_inst = gep;
        }
    }

/* # replace all intermediate roots defs with the appropriate gep(gcroot)
 * for inst in entry-basic-block(function)
 *     if inst matches "gc-root"
 *         slot = get-argument(inst)
 *         newslot = CreateGEP(gc-frame) -> at InsertPoint(gc-frame)
 *         Replace(slot, newslot) -> at InsertPoint(gc-frame)
 *         CreateStore(NULL, newslot) -> at InsertPoint(gc-frame)
 */
#if JL_LLVM_VERSION >= 30600
    DIBuilder dbuilder(M, false);
#endif
    unsigned argSpaceSize = 0;
    for (BasicBlock::iterator I = gcframe->getParent()->begin(), E(gcframe); I != E; ) {
        Instruction* inst = &*I;
        ++I;
        if (CallInst* callInst = dyn_cast<CallInst>(inst)) {
            if (callInst->getCalledValue() == gcroot_func) {
                unsigned offset = 2 + argSpaceSize++;
                Instruction *argTempi = GetElementPtrInst::Create(LLVM37_param(NULL) gcframe, ArrayRef<Value*>(ConstantInt::get(T_int32, offset)));
                argTempi->insertAfter(last_gcframe_inst);
#if JL_LLVM_VERSION >= 30600
                Metadata *md = ValueAsMetadata::getIfExists(callInst);
                if (md) {
                    Value *mdValue = MetadataAsValue::get(M.getContext(), md);
                    for (User::use_iterator use = mdValue->use_begin(), usee = mdValue->use_end(); use != usee; ) {
                        // need to recreate the dbg_declare accordingly -- sadly llvm can't handle this in RAUW
                        User *user = use->getUser();
                        ++use;
                        if (CallInst* dbg = dyn_cast<CallInst>(user)) {
                            Function *called = dbg->getCalledFunction();
                            if (called && called->getIntrinsicID() == Intrinsic::dbg_declare) {
                                DILocalVariable *dinfo = cast<DILocalVariable>(cast<MetadataAsValue>(dbg->getOperand(1))->getMetadata());
                                DIExpression *expr = cast<DIExpression>(cast<MetadataAsValue>(dbg->getOperand(2))->getMetadata());
                                SmallVector<uint64_t, 8> addr;
                                addr.push_back(llvm::dwarf::DW_OP_plus);
                                addr.push_back(offset * sizeof(void*));
                                addr.append(expr->elements_begin(), expr->elements_end());
                                expr = dbuilder.createExpression(addr);
                                dbuilder.insertDeclare(gcframe, dinfo, expr,
#if JL_LLVM_VERSION >= 30700
                                                dbg->getDebugLoc(),
#endif
                                                dbg->getParent());
                                dbg->eraseFromParent();
                            }
                        }
                    }
                }
#endif
                tbaa_decorate_gcframe(callInst, tbaa_gcframe);
                callInst->replaceAllUsesWith(argTempi);
                argTempi->takeName(callInst);
                callInst->eraseFromParent();
                // Initialize the slots for function variables to NULL
                StoreInst *store = new StoreInst(V_null, argTempi);
                store->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe);
                store->insertAfter(argTempi);
                last_gcframe_inst = store;
            }
        }
        else if (AllocaInst *allocaInst = dyn_cast<AllocaInst>(inst)) {
            if (allocaInst->getAllocatedType() == V_null->getType()) {
                // TODO: this is overly aggressive at zeroing allocas that may not actually need to be zeroed
                StoreInst *store = new StoreInst(V_null, allocaInst);
                store->insertAfter(allocaInst);
            }
        }
    }
#if JL_LLVM_VERSION >= 30600
    dbuilder.finalize();
#endif

    if (argSpaceSize + maxDepth == 0) {
        // 0 roots; remove gc frame entirely
        gcframe->eraseFromParent();
    }
    else {
        // Initialize the slots for temporary variables to NULL
        if (maxDepth > 0) {
            BitCastInst *tempSlot_i8 = new BitCastInst(tempSlot, PointerType::get(T_int8, 0), "", last_gcframe_inst);
            Type *argsT[2] = {tempSlot_i8->getType(), T_int32};
            Function *memset = Intrinsic::getDeclaration(&M, Intrinsic::memset, makeArrayRef(argsT));
            Value *args[5] = {
                tempSlot_i8, // dest
                ConstantInt::get(T_int8, 0), // val
                ConstantInt::get(T_int32, sizeof(jl_value_t*)*maxDepth), // len
                ConstantInt::get(T_int32, 0), // align
                ConstantInt::get(T_int1, 0)}; // volatile
            CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args));
            zeroing->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe);
            zeroing->insertAfter(tempSlot_i8);
            last_gcframe_inst = zeroing;
        }

        gcframe->setOperand(0, ConstantInt::get(T_int32, 2 + argSpaceSize + maxDepth)); // fix up the size of the gc frame
        if (tempSlot)
            tempSlot->setOperand(1, ConstantInt::get(T_int32, 2 + argSpaceSize)); // fix up the offset to the temp slot space

        IRBuilder<> builder(F.getContext());
        Type *T_ppjlvalue = V_null->getType()->getPointerTo();
#ifdef _P64
        Type *T_size = T_int64;
#else
        Type *T_size = T_int32;
#endif
        builder.SetInsertPoint(&*(++BasicBlock::iterator(last_gcframe_inst))); // set insert *before* point, e.g. after the gcframe
        DebugLoc noDbg;
        builder.SetCurrentDebugLocation(noDbg);

        Instruction *inst =
            builder.CreateStore(ConstantInt::get(T_size, (argSpaceSize + maxDepth) << 1),
                                builder.CreateBitCast(builder.CreateConstGEP1_32(gcframe, 0), T_size->getPointerTo()));
        inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe);
        inst = builder.CreateStore(builder.CreateLoad(builder.Insert(get_pgcstack(ptlsStates))),
                                   builder.CreatePointerCast(builder.CreateConstGEP1_32(gcframe, 1), PointerType::get(T_ppjlvalue,0)));
        inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe);
        builder.CreateStore(gcframe, builder.Insert(get_pgcstack(ptlsStates)));

        // Finish by emitting the gc pops before any return
        for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
            if (isa<ReturnInst>(I->getTerminator())) {
                builder.SetInsertPoint(I->getTerminator()); // set insert *before* Ret
                Instruction *gcpop =
                    (Instruction*)builder.CreateConstGEP1_32(gcframe, 1);
                inst = builder.CreateLoad(gcpop);
                inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe);
                inst = builder.CreateStore(builder.CreatePointerCast(inst, T_ppjlvalue),
                                           builder.Insert(get_pgcstack(ptlsStates)));
                inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe);
            }
        }
    }

#ifndef JL_NDEBUG
    jl_gc_frame_stats.count++;
    jl_gc_frame_stats.locals += argSpaceSize;
    jl_gc_frame_stats.temp += maxDepth;
#endif
}

struct LowerGCFrame: public ModulePass {
    static char ID;
    LowerGCFrame() : ModulePass(ID)
    {}

private:
    void runOnFunction(Module *M, Function &F, Function *ptls_getter,
                       Type *T_pjlvalue, MDNode *tbaa_gcframe);
    bool runOnModule(Module &M) override;
};

static void eraseFunction(Module &M, const char *name)
{
    if (Function *f = M.getFunction(name)) {
        f->eraseFromParent();
    }
}

static void ensure_enter_function(Module &M)
{
    auto T_int8  = Type::getInt8Ty(M.getContext());
    auto T_pint8 = PointerType::get(T_int8, 0);
    auto T_void = Type::getVoidTy(M.getContext());
    auto T_int32 = Type::getInt32Ty(M.getContext());
    if (!M.getNamedValue("jl_enter_handler")) {
        std::vector<Type*> ehargs(0);
        ehargs.push_back(T_pint8);
        Function::Create(FunctionType::get(T_void, ehargs, false),
                         Function::ExternalLinkage, "jl_enter_handler", &M);
    }
    if (!M.getNamedValue(jl_setjmp_name)) {
        std::vector<Type*> args2(0);
        args2.push_back(T_pint8);
#ifndef _OS_WINDOWS_
        args2.push_back(T_int32);
#endif
        Function::Create(FunctionType::get(T_int32, args2, false),
                         Function::ExternalLinkage, jl_setjmp_name, &M)
            ->addFnAttr(Attribute::ReturnsTwice);
    }
}

bool LowerGCFrame::runOnModule(Module &M)
{
    MDNode *tbaa_gcframe = tbaa_make_child("jtbaa_gcframe").first;

    Function *ptls_getter = M.getFunction("jl_get_ptls_states");
    ensure_enter_function(M);
    FunctionType *functype = nullptr;
    Type *T_pjlvalue = nullptr;
    if (ptls_getter) {
        functype = ptls_getter->getFunctionType();
        auto T_ppjlvalue =
            cast<PointerType>(functype->getReturnType())->getElementType();
        T_pjlvalue = cast<PointerType>(T_ppjlvalue)->getElementType();
    }
    for (auto F = M.begin(), E = M.end(); F != E; ++F) {
        if (F->isDeclaration())
            continue;
        runOnFunction(&M, *F, ptls_getter, T_pjlvalue, tbaa_gcframe);
    }

    // Cleanup for GC frame lowering.
    eraseFunction(M, "julia.gc_root_decl");
    eraseFunction(M, "julia.gc_root_kill");
    eraseFunction(M, "julia.jlcall_frame_decl");
    eraseFunction(M, "julia.gcroot_flush");
    eraseFunction(M, "julia.except_enter");
    return true;
}

void LowerGCFrame::runOnFunction(Module *M, Function &F, Function *ptls_getter,
                                 Type *T_pjlvalue, MDNode *tbaa_gcframe)
{
    CallInst *ptlsStates = nullptr;
    for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end();
         ptls_getter && I != E; ++I) {
        if (CallInst *callInst = dyn_cast<CallInst>(&*I)) {
            if (callInst->getCalledValue() == ptls_getter) {
                ptlsStates = callInst;
                break;
            }
        }
    }
    JuliaGCAllocator allocator(F, ptlsStates, T_pjlvalue, tbaa_gcframe);
    allocator.allocate_frame();
}

char LowerGCFrame::ID = 0;

static RegisterPass<LowerGCFrame> X("LowerGCFrame", "Lower GCFrame Pass",
                                    false /* Only looks at CFG */,
                                    false /* Analysis Pass */);
}

#ifndef JL_NDEBUG // llvm assertions build
// gdb debugging code for inspecting the bb_uses map
void jl_dump_bb_uses(std::map<BasicBlock*, std::map<frame_register, liveness::id> > &bb_uses)
{
    for (std::map<BasicBlock*, std::map<frame_register, liveness::id> >::iterator
            live_reg = bb_uses.begin(), e = bb_uses.end(); live_reg != e; ++live_reg) {
        BasicBlock *bb = live_reg->first;
        errs() << '\n' << bb << '\n';
        for (std::map<frame_register, liveness::id>::iterator
                regs = live_reg->second.begin(), regse = live_reg->second.end(); regs != regse; ++regs) {
            errs() << regs->second << " #" << regs->first.second << ' ' << regs->first.first << '\n';
        }
    }
}
#endif

Pass *createLowerGCFramePass()
{
    return new LowerGCFrame();
}

extern "C" JL_DLLEXPORT
void LLVMAddLowerGCFramePass(LLVMPassManagerRef PM) {
    unwrap(PM)->add(createLowerGCFramePass());
}
back to top