Revision 6113e1c338b23c16424b8f077b8f3b31f828067b authored by TEC on 29 April 2023, 09:53:44 UTC, committed by TEC on 20 October 2023, 18:14:11 UTC
These new types allow for arbitrary properties to be attached to regions of an AbstractString or AbstractChar. The most common expected use of this is for styled content, where the styling is attached as special properties. This has the major benefit of separating styling from content, allowing both to be treated better — functions that operate on the content won't need variants that work around styling, and operations that interact with the styling will have many less edge cases (e.g. printing a substring and having to work around unterminated ANSI styling codes). Other use cases are also enabled by this, such as text links and the preserving of line information in string processing.
1 parent 7bf226b
llvm-remove-addrspaces.cpp
// This file is a part of Julia. License is MIT: https://julialang.org/license
#include "llvm-version.h"
#include <llvm/IR/Module.h>
#include <llvm/IR/Verifier.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/InstIterator.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Support/Debug.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include <llvm/Transforms/Utils/ValueMapper.h>
#include "passes.h"
#include "llvm-codegen-shared.h"
#define DEBUG_TYPE "remove_addrspaces"
using namespace llvm;
using AddrspaceRemapFunction = std::function<unsigned(unsigned)>;
//
// Helpers
//
class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
AddrspaceRemapFunction ASRemapper;
public:
AddrspaceRemoveTypeRemapper(AddrspaceRemapFunction ASRemapper)
: ASRemapper(ASRemapper)
{
}
Type *remapType(Type *SrcTy)
{
// If we already have an entry for this type, return it.
Type *DstTy = MappedTypes[SrcTy];
if (DstTy)
return DstTy;
DstTy = SrcTy;
if (auto Ty = dyn_cast<PointerType>(SrcTy)) {
if (Ty->isOpaque()) {
DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace()));
}
else {
//Remove once opaque pointer transition is complete
DstTy = PointerType::get(
remapType(Ty->getNonOpaquePointerElementType()),
ASRemapper(Ty->getAddressSpace()));
}
}
else if (auto Ty = dyn_cast<FunctionType>(SrcTy)) {
SmallVector<Type *, 4> Params;
for (unsigned Index = 0; Index < Ty->getNumParams(); ++Index)
Params.push_back(remapType(Ty->getParamType(Index)));
DstTy = FunctionType::get(
remapType(Ty->getReturnType()), Params, Ty->isVarArg());
}
else if (auto Ty = dyn_cast<StructType>(SrcTy)) {
if (Ty->isLiteral()) {
// Since a literal type has to have the body when it is created,
// we need to remap the element types first. This is safe only
// for literal types (i.e., no self-reference) and thus treated
// separately.
assert(!Ty->hasName()); // literal type has no name.
SmallVector<Type *, 4> NewElTys;
NewElTys.reserve(Ty->getNumElements());
for (auto E: Ty->elements())
NewElTys.push_back(remapType(E));
DstTy = StructType::get(Ty->getContext(), NewElTys, Ty->isPacked());
} else if (!Ty->isOpaque()) {
// If the struct type is not literal and not opaque, it can have
// self-referential fields (i.e., pointer type of itself as a
// field).
StructType *DstTy_ = StructType::create(Ty->getContext());
if (Ty->hasName()) {
auto Name = std::string(Ty->getName());
Ty->setName(Name + ".bad");
DstTy_->setName(Name);
}
// To avoid infinite recursion, shove the placeholder of the DstTy before
// recursing into the element types:
MappedTypes[SrcTy] = DstTy_;
auto Els = Ty->getNumElements();
SmallVector<Type *, 4> NewElTys(Els);
for (unsigned i = 0; i < Els; ++i)
NewElTys[i] = remapType(Ty->getElementType(i));
DstTy_->setBody(NewElTys, Ty->isPacked());
DstTy = DstTy_;
}
}
else if (auto Ty = dyn_cast<ArrayType>(SrcTy))
DstTy = ArrayType::get(
remapType(Ty->getElementType()), Ty->getNumElements());
else if (auto Ty = dyn_cast<VectorType>(SrcTy))
DstTy = VectorType::get(remapType(Ty->getElementType()), Ty);
if (DstTy != SrcTy)
LLVM_DEBUG(
dbgs() << "Remapping type:\n"
<< " from " << *SrcTy << "\n"
<< " to " << *DstTy << "\n");
MappedTypes[SrcTy] = DstTy;
return DstTy;
}
private:
DenseMap<Type *, Type *> MappedTypes;
};
class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
ValueToValueMapTy &VM;
RemapFlags Flags;
ValueMapTypeRemapper *TypeMapper = nullptr;
public:
AddrspaceRemoveValueMaterializer(
ValueToValueMapTy &VM,
RemapFlags Flags = RF_None,
ValueMapTypeRemapper *TypeMapper = nullptr)
: VM(VM), Flags(Flags), TypeMapper(TypeMapper)
{
}
Value *materialize(Value *SrcV)
{
Value *DstV = nullptr;
if (auto CE = dyn_cast<ConstantExpr>(SrcV)) {
Type *Ty = remapType(CE->getType());
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
// peek through addrspacecasts if their address spaces match
// (like RemoveNoopAddrSpaceCasts, but for const exprs)
Constant *Src = mapConstant(CE->getOperand(0));
if (Src->getType()->getPointerAddressSpace() ==
Ty->getPointerAddressSpace())
DstV = Src;
}
else {
// recreate other const exprs with their operands remapped
SmallVector<Constant *, 4> Ops;
for (unsigned Index = 0; Index < CE->getNumOperands();
++Index) {
Constant *Op = CE->getOperand(Index);
Constant *NewOp = mapConstant(Op);
Ops.push_back(NewOp ? cast<Constant>(NewOp) : Op);
}
if (CE->getOpcode() == Instruction::GetElementPtr) {
// GEP const exprs need to know the type of the source.
// asserts remapType(typeof arg0) == typeof mapValue(arg0).
Constant *Src = CE->getOperand(0);
auto ptrty = cast<PointerType>(Src->getType()->getScalarType());
//Remove once opaque pointer transition is complete
if (!ptrty->isOpaque()) {
Type *SrcTy = remapType(ptrty->getNonOpaquePointerElementType());
DstV = CE->getWithOperands(Ops, Ty, false, SrcTy);
}
}
else
DstV = CE->getWithOperands(Ops, Ty);
}
}
if (DstV)
LLVM_DEBUG(
dbgs() << "Materializing value:\n"
<< " from " << *SrcV << "\n"
<< " to " << *DstV << "\n");
return DstV;
}
private:
Type *remapType(Type *SrcTy)
{
if (TypeMapper)
return TypeMapper->remapType(SrcTy);
else
return SrcTy;
}
Value *mapValue(Value *V)
{
return MapValue(V, VM, Flags, TypeMapper, this);
}
Constant *mapConstant(Constant *V)
{
return MapValue(V, VM, Flags, TypeMapper, this);
}
};
bool RemoveNoopAddrSpaceCasts(Function *F)
{
bool Changed = false;
SmallVector<AddrSpaceCastInst *, 4> NoopCasts;
for (Instruction &I : instructions(F)) {
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
if (ASC->getSrcAddressSpace() == ASC->getDestAddressSpace()) {
LLVM_DEBUG(
dbgs() << "Removing noop address space cast:\n"
<< I << "\n");
ASC->replaceAllUsesWith(ASC->getOperand(0));
NoopCasts.push_back(ASC);
}
}
}
for (auto &I : NoopCasts)
I->eraseFromParent();
return Changed;
}
static void copyComdat(GlobalObject *Dst, const GlobalObject *Src)
{
const Comdat *SC = Src->getComdat();
if (!SC)
return;
Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName());
DC->setSelectionKind(SC->getSelectionKind());
Dst->setComdat(DC);
}
//
// Actual pass
//
unsigned removeAllAddrspaces(unsigned AS)
{
return AddressSpace::Generic;
}
bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
{
ValueToValueMapTy VMap;
AddrspaceRemoveTypeRemapper TypeRemapper(ASRemapper);
AddrspaceRemoveValueMaterializer Materializer(
VMap, RF_None, &TypeRemapper);
// Loop over all of the global variables, creating versions without address
// spaces. We only add the new globals to the VMap, attributes and
// initializers come later.
SmallVector<GlobalVariable *, 4> Globals;
for (auto &GV : M.globals())
Globals.push_back(&GV);
for (auto &GV : Globals) {
std::string Name;
if (GV->hasName()) {
Name = std::string(GV->getName());
GV->setName(Name + ".bad");
}
else
Name = "";
GlobalVariable *NGV = new GlobalVariable(
M,
TypeRemapper.remapType(GV->getValueType()),
GV->isConstant(),
GV->getLinkage(),
(Constant *)nullptr,
Name,
(GlobalVariable *)nullptr,
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
NGV->copyAttributesFrom(GV);
VMap[GV] = NGV;
}
// Loop over the aliases in the module.
SmallVector<GlobalAlias *, 4> Aliases;
for (auto &GA : M.aliases())
Aliases.push_back(&GA);
for (auto &GA : Aliases) {
std::string Name;
if (GA->hasName()) {
Name = std::string(GA->getName());
GA->setName(Name + ".bad");
}
else
Name = "";
auto *NGA = GlobalAlias::create(
TypeRemapper.remapType(GA->getValueType()),
GA->getType()->getPointerAddressSpace(),
GA->getLinkage(),
Name,
&M);
NGA->copyAttributesFrom(GA);
VMap[GA] = NGA;
}
// Loop over the functions in the module, creating new ones as before.
SmallVector<Function *, 4> Functions;
for (Function &F : M)
Functions.push_back(&F);
for (Function *F : Functions) {
std::string Name;
if (F->hasName()) {
Name = std::string(F->getName());
F->setName(Name + ".bad");
}
else
Name = "";
FunctionType *FTy = cast<FunctionType>(F->getValueType());
SmallVector<Type *, 3> Tys;
for (Type *Ty : FTy->params())
Tys.push_back(TypeRemapper.remapType(Ty));
FunctionType *NFTy = FunctionType::get(
TypeRemapper.remapType(FTy->getReturnType()),
Tys,
FTy->isVarArg());
Function *NF = Function::Create(
NFTy, F->getLinkage(), F->getAddressSpace(), Name, &M);
// no need to copy attributes here, that's done by CloneFunctionInto
VMap[F] = NF;
}
// Now that all of the things that global variable initializer can refer to
// have been created, loop through and copy the global variable referrers
// over... We also set the attributes on the globals now.
for (GlobalVariable *GV : Globals) {
if (GV->isDeclaration())
continue;
GlobalVariable *NGV = cast<GlobalVariable>(VMap[GV]);
if (GV->hasInitializer())
NGV->setInitializer(MapValue(GV->getInitializer(), VMap, RF_None, &TypeRemapper, &Materializer));
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
GV->getAllMetadata(MDs);
for (auto MD : MDs)
NGV->addMetadata(
MD.first,
*MapMetadata(MD.second, VMap));
copyComdat(NGV, GV);
GV->setInitializer(nullptr);
}
// Similarly, copy over and rewrite function bodies
for (Function *F : Functions) {
Function *NF = cast<Function>(VMap[F]);
LLVM_DEBUG(dbgs() << "Processing function " << NF->getName() << "\n");
// we also need this to run for declarations, or attributes won't be copied
Function::arg_iterator DestI = NF->arg_begin();
for (Function::const_arg_iterator I = F->arg_begin(); I != F->arg_end();
++I) {
DestI->setName(I->getName());
VMap[&*I] = &*DestI++;
}
SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
CloneFunctionInto(
NF,
F,
VMap,
CloneFunctionChangeType::GlobalChanges,
Returns,
"",
nullptr,
&TypeRemapper,
&Materializer);
// Update function attributes that contain types
AttributeList Attrs = F->getAttributes();
LLVMContext &C = F->getContext();
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
for (Attribute::AttrKind TypedAttr :
{Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
auto Attr = Attrs.getAttributeAtIndex(i, TypedAttr);
if (Type *Ty = Attr.getValueAsType()) {
Attrs = Attrs.replaceAttributeTypeAtIndex(
C, i, TypedAttr, TypeRemapper.remapType(Ty));
break;
}
}
}
NF->setAttributes(Attrs);
copyComdat(NF, F);
RemoveNoopAddrSpaceCasts(NF);
F->deleteBody();
}
// And aliases
for (GlobalAlias *GA : Aliases) {
GlobalAlias *NGA = cast<GlobalAlias>(VMap[GA]);
if (const Constant *C = GA->getAliasee())
NGA->setAliasee(MapValue(C, VMap, RF_None, &TypeRemapper, &Materializer));
GA->setAliasee(nullptr);
}
// And named metadata
for (auto &NMD : M.named_metadata()) {
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
NMD.setOperand(i, MapMetadata(NMD.getOperand(i), VMap));
}
// Now that we've duplicated everything, remove the old references
for (GlobalVariable *GV : Globals)
GV->eraseFromParent();
for (GlobalAlias *GA : Aliases)
GA->eraseFromParent();
for (Function *F : Functions)
F->eraseFromParent();
// Finally, remangle calls to intrinsic
for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE;) {
Function *F = &*FI++;
if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) {
F->replaceAllUsesWith(Remangled.getValue());
F->eraseFromParent();
}
}
return true;
}
RemoveAddrspacesPass::RemoveAddrspacesPass() : RemoveAddrspacesPass(removeAllAddrspaces) {}
PreservedAnalyses RemoveAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
bool modified = removeAddrspaces(M, ASRemapper);
#ifdef JL_VERIFY_PASSES
assert(!verifyLLVMIR(M));
#endif
if (modified) {
return PreservedAnalyses::allInSet<CFGAnalyses>();
} else {
return PreservedAnalyses::all();
}
}
//
// Julia-specific pass
//
unsigned removeJuliaAddrspaces(unsigned AS)
{
if (AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial)
return AddressSpace::Generic;
else
return AS;
}
PreservedAnalyses RemoveJuliaAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
return RemoveAddrspacesPass(removeJuliaAddrspaces).run(M, AM);
}
Computing file changes ...