Raw File
llvm-D28786-callclearance_4.0.patch
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 247d694f2e4..e455549cdc6 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -1421,6 +1421,17 @@ public:
   virtual void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum,
                                          const TargetRegisterInfo *TRI) const {}
 
+  /// May return true if the instruction in question is a dependency breaking
+  /// instruction. If so, the register number for which it is dependency
+  /// breaking should be returned in `OutReg`. It is prefereable to return
+  /// false if the result cannot be determined. This would at worst result
+  /// in the insertion of an unnecessary instruction, while the other
+  /// alternative could result in significant false-dependency penalties.
+  virtual bool isDependencyBreak(MachineInstr &MI,
+                                 unsigned *OutReg = nullptr) const {
+    return false;
+  }
+
   /// Create machine specific model for scheduling.
   virtual DFAPacketizer *
   CreateTargetScheduleState(const TargetSubtargetInfo &) const {
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 64aed533a9d..9f3bd634622 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -214,13 +214,18 @@ private:
   bool isBlockDone(MachineBasicBlock *);
   void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass, bool Done);
   void updateSuccessors(MachineBasicBlock *MBB, bool Primary, bool Done);
-  bool visitInstr(MachineInstr *);
+  bool visitInstr(MachineInstr *, bool PrimaryPass);
   void processDefs(MachineInstr *, bool BlockDone, bool Kill);
   void visitSoftInstr(MachineInstr*, unsigned mask);
   void visitHardInstr(MachineInstr*, unsigned domain);
-  void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
-                                unsigned Pref);
+  void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, unsigned Pref,
+                                bool &TrueDependency);
   bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
+
+  // Undef Reads
+  void collapseUndefReads(unsigned from, unsigned to, unsigned Reg);
+  unsigned updateChooseableRegs(SparseSet<unsigned> &,
+                                const TargetRegisterClass *, bool);
   void processUndefReads(MachineBasicBlock*);
 };
 }
@@ -394,11 +399,19 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
 
   // This is the entry block.
   if (MBB->pred_empty()) {
+    // Treat all registers as being defined just before the first instruction.
+    // Howver, we want the logic later to prefer non live-ins over live-ins,
+    // so pretend the live-ins were defined slightly later.
+    // We used to only do this for live-ins, but that's a bit of a gamble.
+    // If our caller does arithmetic with these registers is is quite likely
+    // that it will have used registers beyond the ones that are live here.
+    // Given the immense penalty for getting this wrong, being conservative
+    // here seems worth it.
+    for (unsigned rx = 0; rx != NumRegs; ++rx) {
+      LiveRegs[rx].Def = -2;
+    }
     for (const auto &LI : MBB->liveins()) {
       for (int rx : regIndices(LI.PhysReg)) {
-        // Treat function live-ins as if they were defined just before the first
-        // instruction.  Usually, function arguments are set up immediately
-        // before the call.
         LiveRegs[rx].Def = -1;
       }
     }
@@ -470,24 +483,36 @@ void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
   LiveRegs = nullptr;
 }
 
-bool ExeDepsFix::visitInstr(MachineInstr *MI) {
-  // Update instructions with explicit execution domains.
-  std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
-  if (DomP.first) {
-    if (DomP.second)
-      visitSoftInstr(MI, DomP.second);
-    else
-      visitHardInstr(MI, DomP.first);
+bool ExeDepsFix::visitInstr(MachineInstr *MI, bool PrimaryPass) {
+  bool Kill = false;
+
+  if (PrimaryPass) {
+    // Update instructions with explicit execution domains.
+    std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
+    if (DomP.first) {
+      if (DomP.second)
+        visitSoftInstr(MI, DomP.second);
+      else
+        visitHardInstr(MI, DomP.first);
+    }
+    Kill = !DomP.first;
+  }
+
+  // If this is a call, pretend all registers we are considering are def'd here.
+  // We have no idea which registers the callee may use.
+  if (MI->isCall()) {
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      LiveRegs[i].Def = CurInstr;
   }
 
-  return !DomP.first;
+  return Kill;
 }
 
 /// \brief Helps avoid false dependencies on undef registers by updating the
 /// machine instructions' undef operand to use a register that the instruction
 /// is truly dependent on, or use a register with clearance higher than Pref.
 void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
-                                          unsigned Pref) {
+                                          unsigned Pref, bool &TrueDependency) {
   MachineOperand &MO = MI->getOperand(OpIdx);
   assert(MO.isUndef() && "Expected undef machine operand");
 
@@ -510,6 +535,7 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
     // We found a true dependency - replace the undef register with the true
     // dependency.
     MO.setReg(CurrMO.getReg());
+    TrueDependency = true;
     return;
   }
 
@@ -571,9 +597,14 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool BlockDone, bool Kill) {
   if (BlockDone) {
     unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
     if (Pref) {
-      pickBestRegisterForUndef(MI, OpNum, Pref);
-      if (shouldBreakDependence(MI, OpNum, Pref))
+      bool TrueDependency = false;
+      pickBestRegisterForUndef(MI, OpNum, Pref, TrueDependency);
+      // Don't bother adding true dependencies to UndefReads. All we'd find out
+      // is that the register is live (since this very instruction depends on
+      // it), so we can't do anything.
+      if (!TrueDependency && shouldBreakDependence(MI, OpNum, Pref)) {
         UndefReads.push_back(std::make_pair(MI, OpNum));
+      }
     }
   }
   const MCInstrDesc &MCID = MI->getDesc();
@@ -606,9 +637,52 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool BlockDone, bool Kill) {
         kill(rx);
     }
   }
+  unsigned DepReg = 0;
+  if (TII->isDependencyBreak(*MI, &DepReg)) {
+    for (int rx : regIndices(DepReg)) {
+      // This instruction is a dependency break, so there are no clearance
+      // issues, reset the counter.
+      LiveRegs[rx].Def = -(1 << 20);
+    }
+  }
   ++CurInstr;
 }
 
+// Set the undef read register to `Reg` for all UndefReads in the range
+// [from,to).
+void ExeDepsFix::collapseUndefReads(unsigned from, unsigned to, unsigned Reg) {
+  if (from >= to)
+    return;
+  for (unsigned i = from; i < to; ++i) {
+    MachineInstr *MI = std::get<0>(UndefReads[i]);
+    unsigned OpIdx = std::get<1>(UndefReads[i]);
+    MachineOperand &MO = MI->getOperand(OpIdx);
+    MO.setReg(Reg);
+  }
+  TII->breakPartialRegDependency(*std::get<0>(UndefReads[from]),
+                                 std::get<1>(UndefReads[from]), TRI);
+}
+
+unsigned ExeDepsFix::updateChooseableRegs(SparseSet<unsigned> &ChoosableRegs,
+                                          const TargetRegisterClass *OpRC,
+                                          bool add) {
+  unsigned LowestValid = (unsigned)-1;
+
+  for (auto Reg : OpRC->getRegisters()) {
+    if (LiveRegSet.contains(Reg))
+      ChoosableRegs.erase(Reg);
+    else if (add) {
+      ChoosableRegs.insert(Reg);
+      if (LowestValid == (unsigned)-1)
+        LowestValid = Reg;
+    } else if (ChoosableRegs.count(Reg) == 1) {
+      if (LowestValid == (unsigned)-1)
+        LowestValid = Reg;
+    }
+  }
+  return LowestValid;
+}
+
 /// \break Break false dependencies on undefined register reads.
 ///
 /// Walk the block backward computing precise liveness. This is expensive, so we
@@ -619,31 +693,87 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
   if (UndefReads.empty())
     return;
 
+  // We want to be slightly clever here, to avoid the following common pattern:
+  // Suppose we have some instruction `vrandom %in, %out` and the following code
+  //    vrandom %xmm0<undef>, %xmm0<def>
+  //    vrandom %xmm1<undef>, %xmm1<def>
+  //    vrandom %xmm2<undef>, %xmm2<def>
+  //    vrandom %xmm3<undef>, %xmm3<def>
+  // The earlier logic likes to produce these, because it picks the first
+  // register
+  // to break ties in clearance. However, most register allocators pick the dest
+  // register the same way. Naively, we'd have to insert a dependency break,
+  // before every instruction above. However, what we really want is
+  //    vxorps %xmm3, %xmm3, %xmm3
+  //    vrandom %xmm3<undef>, %xmm0<def>
+  //    vrandom %xmm3<undef>, %xmm1<def>
+  //    vrandom %xmm3<undef>, %xmm2<def>
+  //    vrandom %xmm3<undef>, %xmm3<def>
+  // To do so, we walk backwards and cumulatively keep track of which registers
+  // we can use to break the dependency. Then, once the set has collapsed, we
+  // reset the undef read register for all following instructions.
+
   // Collect this block's live out register units.
   LiveRegSet.init(*TRI);
   // We do not need to care about pristine registers as they are just preserved
   // but not actually used in the function.
   LiveRegSet.addLiveOutsNoPristines(*MBB);
 
-  MachineInstr *UndefMI = UndefReads.back().first;
-  unsigned OpIdx = UndefReads.back().second;
+  SparseSet<unsigned> ChoosableRegs;
+  ChoosableRegs.setUniverse(TRI->getNumRegs());
+
+  unsigned LastValid = (unsigned)-1;
+  const TargetRegisterClass *LastOpRC = nullptr;
+  size_t i, LastInit;
+  i = LastInit = UndefReads.size() - 1;
+  MachineInstr *UndefMI = std::get<0>(UndefReads[i]);
 
   for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
     // Update liveness, including the current instruction's defs.
     LiveRegSet.stepBackward(I);
 
+    // This ensures that we don't accidentally pick a register whose live region
+    // lies entirely between two undef reads (since that would defeat the
+    // purpose of breaking the dependency).
+    for (auto LiveReg : LiveRegSet)
+      ChoosableRegs.erase(LiveReg);
+
     if (UndefMI == &I) {
-      if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
-        TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
+      unsigned OpIdx = std::get<1>(UndefReads[i]);
+      // Get the undef operand's register class
+      const TargetRegisterClass *OpRC =
+          TII->getRegClass(UndefMI->getDesc(), OpIdx, TRI, *MF);
+      if (OpRC != LastOpRC || ChoosableRegs.size() == 0) {
+        if (LastInit != i) {
+          if (LastValid != (unsigned)-1)
+            collapseUndefReads(i + 1, LastInit + 1, LastValid);
+          ChoosableRegs.clear();
+          LastInit = i;
+        }
+      }
+
+      unsigned LowestValid =
+          updateChooseableRegs(ChoosableRegs, OpRC, LastInit == i);
+
+      if (ChoosableRegs.size() == 0) {
+        if (LastInit != i) {
+          if (LastValid != (unsigned)-1)
+            collapseUndefReads(i + 1, LastInit + 1, LastValid);
+          LowestValid = updateChooseableRegs(ChoosableRegs, OpRC, true);
+          LastInit = i;
+        }
+      }
+      LastValid = LowestValid;
+      LastOpRC = OpRC;
 
-      UndefReads.pop_back();
-      if (UndefReads.empty())
-        return;
+      if (i == 0)
+        break;
 
-      UndefMI = UndefReads.back().first;
-      OpIdx = UndefReads.back().second;
+      UndefMI = std::get<0>(UndefReads[--i]);
     }
   }
+  if (LastValid != (unsigned)-1)
+    collapseUndefReads(0, LastInit + 1, LastValid);
 }
 
 // A hard instruction only works in one domain. All input registers will be
@@ -787,9 +917,7 @@ void ExeDepsFix::processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass,
   enterBasicBlock(MBB);
   for (MachineInstr &MI : *MBB) {
     if (!MI.isDebugValue()) {
-      bool Kill = false;
-      if (PrimaryPass)
-        Kill = visitInstr(&MI);
+      bool Kill = visitInstr(&MI, PrimaryPass);
       processDefs(&MI, isBlockDone(MBB), Kill);
     }
   }
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 627b6120b04..26665b2a15d 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -7432,6 +7432,23 @@ void X86InstrInfo::breakPartialRegDependency(
   }
 }
 
+bool X86InstrInfo::isDependencyBreak(MachineInstr &MI, unsigned *OutReg) const {
+  unsigned Opc = MI.getOpcode();
+  if (!(Opc == X86::VXORPSrr || Opc == X86::VXORPDrr || Opc == X86::XORPSrr ||
+        Opc == X86::XORPDrr))
+    return false;
+  unsigned Reg = 0;
+  for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || (Reg != 0 && MO.getReg() != Reg))
+      return false;
+    Reg = MO.getReg();
+  }
+  if (OutReg)
+    *OutReg = Reg;
+  return true;
+}
+
 MachineInstr *
 X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
                                     ArrayRef<unsigned> Ops,
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index acfdef4da7a..4ea9ddfc863 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -477,6 +477,7 @@ public:
                                 const TargetRegisterInfo *TRI) const override;
   void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum,
                                  const TargetRegisterInfo *TRI) const override;
+  bool isDependencyBreak(MachineInstr &MI, unsigned *OutReg) const override;
 
   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
                                       unsigned OpNum,
back to top