llvm-D28786-callclearance_4.0.patch
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 247d694f2e4..e455549cdc6 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -1421,6 +1421,17 @@ public:
virtual void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const {}
+ /// May return true if the instruction in question is a dependency breaking
+ /// instruction. If so, the register number for which it is dependency
+ /// breaking should be returned in `OutReg`. It is prefereable to return
+ /// false if the result cannot be determined. This would at worst result
+ /// in the insertion of an unnecessary instruction, while the other
+ /// alternative could result in significant false-dependency penalties.
+ virtual bool isDependencyBreak(MachineInstr &MI,
+ unsigned *OutReg = nullptr) const {
+ return false;
+ }
+
/// Create machine specific model for scheduling.
virtual DFAPacketizer *
CreateTargetScheduleState(const TargetSubtargetInfo &) const {
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 64aed533a9d..9f3bd634622 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -214,13 +214,18 @@ private:
bool isBlockDone(MachineBasicBlock *);
void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass, bool Done);
void updateSuccessors(MachineBasicBlock *MBB, bool Primary, bool Done);
- bool visitInstr(MachineInstr *);
+ bool visitInstr(MachineInstr *, bool PrimaryPass);
void processDefs(MachineInstr *, bool BlockDone, bool Kill);
void visitSoftInstr(MachineInstr*, unsigned mask);
void visitHardInstr(MachineInstr*, unsigned domain);
- void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref);
+ void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, unsigned Pref,
+ bool &TrueDependency);
bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
+
+ // Undef Reads
+ void collapseUndefReads(unsigned from, unsigned to, unsigned Reg);
+ unsigned updateChooseableRegs(SparseSet<unsigned> &,
+ const TargetRegisterClass *, bool);
void processUndefReads(MachineBasicBlock*);
};
}
@@ -394,11 +399,19 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// This is the entry block.
if (MBB->pred_empty()) {
+ // Treat all registers as being defined just before the first instruction.
+ // Howver, we want the logic later to prefer non live-ins over live-ins,
+ // so pretend the live-ins were defined slightly later.
+ // We used to only do this for live-ins, but that's a bit of a gamble.
+ // If our caller does arithmetic with these registers is is quite likely
+ // that it will have used registers beyond the ones that are live here.
+ // Given the immense penalty for getting this wrong, being conservative
+ // here seems worth it.
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ LiveRegs[rx].Def = -2;
+ }
for (const auto &LI : MBB->liveins()) {
for (int rx : regIndices(LI.PhysReg)) {
- // Treat function live-ins as if they were defined just before the first
- // instruction. Usually, function arguments are set up immediately
- // before the call.
LiveRegs[rx].Def = -1;
}
}
@@ -470,24 +483,36 @@ void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
LiveRegs = nullptr;
}
-bool ExeDepsFix::visitInstr(MachineInstr *MI) {
- // Update instructions with explicit execution domains.
- std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
- if (DomP.first) {
- if (DomP.second)
- visitSoftInstr(MI, DomP.second);
- else
- visitHardInstr(MI, DomP.first);
+bool ExeDepsFix::visitInstr(MachineInstr *MI, bool PrimaryPass) {
+ bool Kill = false;
+
+ if (PrimaryPass) {
+ // Update instructions with explicit execution domains.
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
+ if (DomP.first) {
+ if (DomP.second)
+ visitSoftInstr(MI, DomP.second);
+ else
+ visitHardInstr(MI, DomP.first);
+ }
+ Kill = !DomP.first;
+ }
+
+ // If this is a call, pretend all registers we are considering are def'd here.
+ // We have no idea which registers the callee may use.
+ if (MI->isCall()) {
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def = CurInstr;
}
- return !DomP.first;
+ return Kill;
}
/// \brief Helps avoid false dependencies on undef registers by updating the
/// machine instructions' undef operand to use a register that the instruction
/// is truly dependent on, or use a register with clearance higher than Pref.
void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+ unsigned Pref, bool &TrueDependency) {
MachineOperand &MO = MI->getOperand(OpIdx);
assert(MO.isUndef() && "Expected undef machine operand");
@@ -510,6 +535,7 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// We found a true dependency - replace the undef register with the true
// dependency.
MO.setReg(CurrMO.getReg());
+ TrueDependency = true;
return;
}
@@ -571,9 +597,14 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool BlockDone, bool Kill) {
if (BlockDone) {
unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
if (Pref) {
- pickBestRegisterForUndef(MI, OpNum, Pref);
- if (shouldBreakDependence(MI, OpNum, Pref))
+ bool TrueDependency = false;
+ pickBestRegisterForUndef(MI, OpNum, Pref, TrueDependency);
+ // Don't bother adding true dependencies to UndefReads. All we'd find out
+ // is that the register is live (since this very instruction depends on
+ // it), so we can't do anything.
+ if (!TrueDependency && shouldBreakDependence(MI, OpNum, Pref)) {
UndefReads.push_back(std::make_pair(MI, OpNum));
+ }
}
}
const MCInstrDesc &MCID = MI->getDesc();
@@ -606,9 +637,52 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool BlockDone, bool Kill) {
kill(rx);
}
}
+ unsigned DepReg = 0;
+ if (TII->isDependencyBreak(*MI, &DepReg)) {
+ for (int rx : regIndices(DepReg)) {
+ // This instruction is a dependency break, so there are no clearance
+ // issues, reset the counter.
+ LiveRegs[rx].Def = -(1 << 20);
+ }
+ }
++CurInstr;
}
+// Set the undef read register to `Reg` for all UndefReads in the range
+// [from,to).
+void ExeDepsFix::collapseUndefReads(unsigned from, unsigned to, unsigned Reg) {
+ if (from >= to)
+ return;
+ for (unsigned i = from; i < to; ++i) {
+ MachineInstr *MI = std::get<0>(UndefReads[i]);
+ unsigned OpIdx = std::get<1>(UndefReads[i]);
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ MO.setReg(Reg);
+ }
+ TII->breakPartialRegDependency(*std::get<0>(UndefReads[from]),
+ std::get<1>(UndefReads[from]), TRI);
+}
+
+unsigned ExeDepsFix::updateChooseableRegs(SparseSet<unsigned> &ChoosableRegs,
+ const TargetRegisterClass *OpRC,
+ bool add) {
+ unsigned LowestValid = (unsigned)-1;
+
+ for (auto Reg : OpRC->getRegisters()) {
+ if (LiveRegSet.contains(Reg))
+ ChoosableRegs.erase(Reg);
+ else if (add) {
+ ChoosableRegs.insert(Reg);
+ if (LowestValid == (unsigned)-1)
+ LowestValid = Reg;
+ } else if (ChoosableRegs.count(Reg) == 1) {
+ if (LowestValid == (unsigned)-1)
+ LowestValid = Reg;
+ }
+ }
+ return LowestValid;
+}
+
/// \break Break false dependencies on undefined register reads.
///
/// Walk the block backward computing precise liveness. This is expensive, so we
@@ -619,31 +693,87 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
if (UndefReads.empty())
return;
+ // We want to be slightly clever here, to avoid the following common pattern:
+ // Suppose we have some instruction `vrandom %in, %out` and the following code
+ // vrandom %xmm0<undef>, %xmm0<def>
+ // vrandom %xmm1<undef>, %xmm1<def>
+ // vrandom %xmm2<undef>, %xmm2<def>
+ // vrandom %xmm3<undef>, %xmm3<def>
+ // The earlier logic likes to produce these, because it picks the first
+ // register
+ // to break ties in clearance. However, most register allocators pick the dest
+ // register the same way. Naively, we'd have to insert a dependency break,
+ // before every instruction above. However, what we really want is
+ // vxorps %xmm3, %xmm3, %xmm3
+ // vrandom %xmm3<undef>, %xmm0<def>
+ // vrandom %xmm3<undef>, %xmm1<def>
+ // vrandom %xmm3<undef>, %xmm2<def>
+ // vrandom %xmm3<undef>, %xmm3<def>
+ // To do so, we walk backwards and cumulatively keep track of which registers
+ // we can use to break the dependency. Then, once the set has collapsed, we
+ // reset the undef read register for all following instructions.
+
// Collect this block's live out register units.
LiveRegSet.init(*TRI);
// We do not need to care about pristine registers as they are just preserved
// but not actually used in the function.
LiveRegSet.addLiveOutsNoPristines(*MBB);
- MachineInstr *UndefMI = UndefReads.back().first;
- unsigned OpIdx = UndefReads.back().second;
+ SparseSet<unsigned> ChoosableRegs;
+ ChoosableRegs.setUniverse(TRI->getNumRegs());
+
+ unsigned LastValid = (unsigned)-1;
+ const TargetRegisterClass *LastOpRC = nullptr;
+ size_t i, LastInit;
+ i = LastInit = UndefReads.size() - 1;
+ MachineInstr *UndefMI = std::get<0>(UndefReads[i]);
for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
// Update liveness, including the current instruction's defs.
LiveRegSet.stepBackward(I);
+ // This ensures that we don't accidentally pick a register whose live region
+ // lies entirely between two undef reads (since that would defeat the
+ // purpose of breaking the dependency).
+ for (auto LiveReg : LiveRegSet)
+ ChoosableRegs.erase(LiveReg);
+
if (UndefMI == &I) {
- if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
- TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
+ unsigned OpIdx = std::get<1>(UndefReads[i]);
+ // Get the undef operand's register class
+ const TargetRegisterClass *OpRC =
+ TII->getRegClass(UndefMI->getDesc(), OpIdx, TRI, *MF);
+ if (OpRC != LastOpRC || ChoosableRegs.size() == 0) {
+ if (LastInit != i) {
+ if (LastValid != (unsigned)-1)
+ collapseUndefReads(i + 1, LastInit + 1, LastValid);
+ ChoosableRegs.clear();
+ LastInit = i;
+ }
+ }
+
+ unsigned LowestValid =
+ updateChooseableRegs(ChoosableRegs, OpRC, LastInit == i);
+
+ if (ChoosableRegs.size() == 0) {
+ if (LastInit != i) {
+ if (LastValid != (unsigned)-1)
+ collapseUndefReads(i + 1, LastInit + 1, LastValid);
+ LowestValid = updateChooseableRegs(ChoosableRegs, OpRC, true);
+ LastInit = i;
+ }
+ }
+ LastValid = LowestValid;
+ LastOpRC = OpRC;
- UndefReads.pop_back();
- if (UndefReads.empty())
- return;
+ if (i == 0)
+ break;
- UndefMI = UndefReads.back().first;
- OpIdx = UndefReads.back().second;
+ UndefMI = std::get<0>(UndefReads[--i]);
}
}
+ if (LastValid != (unsigned)-1)
+ collapseUndefReads(0, LastInit + 1, LastValid);
}
// A hard instruction only works in one domain. All input registers will be
@@ -787,9 +917,7 @@ void ExeDepsFix::processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass,
enterBasicBlock(MBB);
for (MachineInstr &MI : *MBB) {
if (!MI.isDebugValue()) {
- bool Kill = false;
- if (PrimaryPass)
- Kill = visitInstr(&MI);
+ bool Kill = visitInstr(&MI, PrimaryPass);
processDefs(&MI, isBlockDone(MBB), Kill);
}
}
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 627b6120b04..26665b2a15d 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -7432,6 +7432,23 @@ void X86InstrInfo::breakPartialRegDependency(
}
}
+bool X86InstrInfo::isDependencyBreak(MachineInstr &MI, unsigned *OutReg) const {
+ unsigned Opc = MI.getOpcode();
+ if (!(Opc == X86::VXORPSrr || Opc == X86::VXORPDrr || Opc == X86::XORPSrr ||
+ Opc == X86::XORPDrr))
+ return false;
+ unsigned Reg = 0;
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || (Reg != 0 && MO.getReg() != Reg))
+ return false;
+ Reg = MO.getReg();
+ }
+ if (OutReg)
+ *OutReg = Reg;
+ return true;
+}
+
MachineInstr *
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index acfdef4da7a..4ea9ddfc863 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -477,6 +477,7 @@ public:
const TargetRegisterInfo *TRI) const override;
void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const override;
+ bool isDependencyBreak(MachineInstr &MI, unsigned *OutReg) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
unsigned OpNum,