https://github.com/JuliaLang/julia
Raw File
Tip revision: 0f7c248c2b438fd29d22ed73e9fef56aee3cc5f4 authored by Jameson Nash on 24 November 2015, 05:23:33 UTC
wip: html
Tip revision: 0f7c248
win64-allocas-llvm-3.6.0.patch
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 16aab16..41e717f 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -375,12 +375,25 @@ static bool usesTheStack(const MachineFunction &MF) {
   return false;
 }
 
-void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
-                                             unsigned &CallOp,
-                                             const char *&Symbol) {
-  CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32;
+void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
+                                          MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI,
+                                          DebugLoc DL) {
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+  bool Is64Bit = STI.is64Bit();
+  bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
+  const X86RegisterInfo *RegInfo =
+      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+
+  unsigned CallOp;
+  if (Is64Bit)
+    CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
+  else
+    CallOp = X86::CALLpcrel32;
 
-  if (STI.is64Bit()) {
+  const char *Symbol;
+  if (Is64Bit) {
     if (STI.isTargetCygMing()) {
       Symbol = "___chkstk_ms";
     } else {
@@ -390,6 +403,37 @@ void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
     Symbol = "_alloca";
   else
     Symbol = "_chkstk";
+
+  MachineInstrBuilder CI;
+
+  // All current stack probes take AX and SP as input, clobber flags, and
+  // preserve all registers. x86_64 probes leave RSP unmodified.
+  if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
+    // For the large code model, we have to call through a register. Use R11,
+    // as it is scratch in all supported calling conventions.
+    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
+        .addExternalSymbol(Symbol);
+    CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
+  } else {
+    CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
+  }
+
+  unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
+  unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
+  CI.addReg(AX, RegState::Implicit)
+      .addReg(SP, RegState::Implicit)
+      .addReg(AX, RegState::Define | RegState::Implicit)
+      .addReg(SP, RegState::Define | RegState::Implicit)
+      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+  if (Is64Bit) {
+    // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
+    // themselves. It also does not clobber %rax so we can reuse it when
+    // adjusting %rsp.
+    BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
+        .addReg(X86::RSP)
+        .addReg(X86::RAX);
+  }
 }
 
 /// emitPrologue - Push callee-saved registers onto the stack, which
@@ -722,11 +766,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   // increments is necessary to ensure that the guard pages used by the OS
   // virtual memory manager are allocated in correct sequence.
   if (NumBytes >= StackProbeSize && UseStackProbe) {
-    const char *StackProbeSymbol;
-    unsigned CallOp;
-
-    getStackProbeFunction(STI, CallOp, StackProbeSymbol);
-
     // Check whether EAX is livein for this function.
     bool isEAXAlive = isEAXLiveIn(MF);
 
@@ -755,22 +794,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
         .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    BuildMI(MBB, MBBI, DL,
-            TII.get(CallOp))
-      .addExternalSymbol(StackProbeSymbol)
-      .addReg(StackPtr,    RegState::Define | RegState::Implicit)
-      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
-      .setMIFlag(MachineInstr::FrameSetup);
+    // Save a pointer to the MI where we set AX.
+    MachineBasicBlock::iterator SetRAX = MBBI;
+    --SetRAX;
+
+    // Call __chkstk, __chkstk_ms, or __alloca.
+    emitStackProbeCall(MF, MBB, MBBI, DL);
+
+    // Apply the frame setup flag to all inserted instrs.
+    for (; SetRAX != MBBI; ++SetRAX)
+      SetRAX->setFlag(MachineInstr::FrameSetup);
 
-    if (Is64Bit) {
-      // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
-      // themself. It also does not clobber %rax so we can reuse it when
-      // adjusting %rsp.
-      BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr)
-        .addReg(StackPtr)
-        .addReg(X86::RAX)
-        .setMIFlag(MachineInstr::FrameSetup);
-    }
     if (isEAXAlive) {
       // Restore EAX
       MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index ee0ee22..dd8fc32 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -27,9 +27,11 @@ public:
   explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
     : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
 
-  static void getStackProbeFunction(const X86Subtarget &STI,
-                                    unsigned &CallOp,
-                                    const char *&Symbol);
+  /// Emit a call to the target's stack probe function. This is required for all
+  /// large stack allocations on Windows. The caller is required to materialize
+  /// the number of bytes to probe in RAX/EAX.
+  static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MBBI, DebugLoc DL);
 
   void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 177299b..cd5c80d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "X86ISelLowering.h"
 #include "Utils/X86ShuffleDecode.h"
 #include "X86CallingConv.h"
+#include "X86FrameLowering.h"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86TargetMachine.h"
@@ -21076,47 +21077,7 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
 
   assert(!Subtarget->isTargetMachO());
 
-  // The lowering is pretty easy: we're just emitting the call to _alloca.  The
-  // non-trivial part is impdef of ESP.
-
-  if (Subtarget->isTargetWin64()) {
-    if (Subtarget->isTargetCygMing()) {
-      // ___chkstk(Mingw64):
-      // Clobbers R10, R11, RAX and EFLAGS.
-      // Updates RSP.
-      BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
-        .addExternalSymbol("___chkstk")
-        .addReg(X86::RAX, RegState::Implicit)
-        .addReg(X86::RSP, RegState::Implicit)
-        .addReg(X86::RAX, RegState::Define | RegState::Implicit)
-        .addReg(X86::RSP, RegState::Define | RegState::Implicit)
-        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-    } else {
-      // __chkstk(MSVCRT): does not update stack pointer.
-      // Clobbers R10, R11 and EFLAGS.
-      BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
-        .addExternalSymbol("__chkstk")
-        .addReg(X86::RAX, RegState::Implicit)
-        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-      // RAX has the offset to be subtracted from RSP.
-      BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
-        .addReg(X86::RSP)
-        .addReg(X86::RAX);
-    }
-  } else {
-    const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() ||
-                                    Subtarget->isTargetWindowsItanium())
-                                       ? "_chkstk"
-                                       : "_alloca";
-
-    BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
-      .addExternalSymbol(StackProbeSymbol)
-      .addReg(X86::EAX, RegState::Implicit)
-      .addReg(X86::ESP, RegState::Implicit)
-      .addReg(X86::EAX, RegState::Define | RegState::Implicit)
-      .addReg(X86::ESP, RegState::Define | RegState::Implicit)
-      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-  }
+  X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL);
 
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 71415c6..7baff19 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -279,7 +279,8 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
 }
 
 let isCall = 1, isCodeGenOnly = 1 in
-  // __chkstk(MSVC):     clobber R10, R11 and EFLAGS.
+  // __chkstk(MSVC):     clobber R10, R11 and EFLAGS
+  // ___chkstk_ms(Mingw64): clobber R10, R11 and EFLAGS
   // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
   let Defs = [RAX, R10, R11, RSP, EFLAGS],
       Uses = [RSP] in {
diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll
index e5b07e2..65a0f77 100644
--- a/test/CodeGen/X86/inalloca-stdcall.ll
+++ b/test/CodeGen/X86/inalloca-stdcall.ll
@@ -6,6 +6,7 @@ declare x86_stdcallcc void @f(%Foo* inalloca %a)
 declare x86_stdcallcc void @i(i32 %a)
 
 define void @g() {
+; CHECK-LABEL: _g:
   %b = alloca inalloca %Foo
 ; CHECK: movl    $8, %eax
 ; CHECK: calll   __chkstk
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index a6b6536..aab2eea 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
 ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 -code-model=large | FileCheck %s -check-prefix=L64
 ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
@@ -24,6 +25,13 @@ entry:
 ; W64: callq __chkstk
 ; W64: subq  %rax, %rsp
 
+; Use %r11 for the large model.
+; L64: movq  %rsp, %rbp
+; L64:       $4096, %rax
+; L64: movabsq $__chkstk, %r11
+; L64: callq *%r11
+; L64: subq  %rax, %rsp
+
 ; Freestanding
 ; EFI: movq  %rsp, %rbp
 ; EFI:       $[[B0OFS:4096|4104]], %rsp
@@ -33,8 +41,8 @@ entry:
 
 ; M64: leaq  15(%{{.*}}), %rax
 ; M64: andq  $-16, %rax
-; M64: callq ___chkstk
-; M64-NOT:   %rsp
+; M64: callq ___chkstk_ms
+; M64: subq  %rax, %rsp
 ; M64: movq  %rsp, %rax
 
 ; W64: leaq  15(%{{.*}}), %rax
@@ -43,6 +51,13 @@ entry:
 ; W64: subq  %rax, %rsp
 ; W64: movq  %rsp, %rax
 
+; L64: leaq  15(%{{.*}}), %rax
+; L64: andq  $-16, %rax
+; L64: movabsq $__chkstk, %r11
+; L64: callq *%r11
+; L64: subq  %rax, %rsp
+; L64: movq  %rsp, %rax
+
 ; EFI: leaq  15(%{{.*}}), [[R1:%r.*]]
 ; EFI: andq  $-16, [[R1]]
 ; EFI: movq  %rsp, [[R64:%r.*]]
@@ -84,7 +99,8 @@ entry:
 
 ; M64: leaq  15(%{{.*}}), %rax
 ; M64: andq  $-16, %rax
-; M64: callq ___chkstk
+; M64: callq ___chkstk_ms
+; M64: subq  %rax, %rsp
 ; M64: movq  %rsp, [[R2:%r.*]]
 ; M64: andq  $-128, [[R2]]
 ; M64: movq  [[R2]], %rsp
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
index 0c02c1a..4edc89f 100644
--- a/test/CodeGen/X86/win_chkstk.ll
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN_X64
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -code-model=large | FileCheck %s -check-prefix=WIN64_LARGE
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
 ; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
@@ -16,6 +17,8 @@ define i32 @main4k() nounwind {
 entry:
 ; WIN_X32:    calll __chkstk
 ; WIN_X64:    callq __chkstk
+; WIN64_LARGE: movabsq $__chkstk, %r11
+; WIN64_LARGE: callq *%r11
 ; MINGW_X32:  calll __alloca
 ; MINGW_X64:  callq ___chkstk_ms
 ; LINUX-NOT:  call __chkstk
@@ -52,6 +55,8 @@ define x86_64_win64cc i32 @main4k_win64() nounwind {
 entry:
 ; WIN_X32:    calll __chkstk
 ; WIN_X64:    callq __chkstk
+; WIN64_LARGE: movabsq $__chkstk, %r11
+; WIN64_LARGE: callq *%r11
 ; MINGW_X32:  calll __alloca
 ; MINGW_X64:  callq ___chkstk_ms
 ; LINUX-NOT:  call __chkstk
back to top