mirror of
https://src.fedoraproject.org/rpms/llvm.git
synced 2024-12-01 11:38:09 +00:00
308 lines
13 KiB
Diff
308 lines
13 KiB
Diff
From 4594a6164d5ae9252825e23a95aa6f2fce304d6e Mon Sep 17 00:00:00 2001
|
|
From: Reid Kleckner <rnk@google.com>
|
|
Date: Wed, 14 Feb 2018 00:34:13 +0000
|
|
Subject: [PATCH 3/4] Merging r325049:
|
|
------------------------------------------------------------------------
|
|
r325049 | rnk | 2018-02-13 12:47:49 -0800 (Tue, 13 Feb 2018) | 17 lines
|
|
|
|
[X86] Use EDI for retpoline when no scratch regs are left
|
|
|
|
Summary:
|
|
Instead of solving the hard problem of how to pass the callee to the indirect
|
|
jump thunk without a register, just use a CSR. At a call boundary, there's
|
|
nothing stopping us from using a CSR to hold the callee as long as we save and
|
|
restore it in the prologue.
|
|
|
|
Also, add tests for this mregparm=3 case. I wrote execution tests for
|
|
__llvm_retpoline_push, but they never got committed as lit tests, either
|
|
because I never rewrote them or because they got lost in merge conflicts.
|
|
|
|
Reviewers: chandlerc, dwmw2
|
|
|
|
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
|
|
|
|
Differential Revision: https://reviews.llvm.org/D43214
|
|
------------------------------------------------------------------------
|
|
|
|
|
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325090 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
---
|
|
lib/Target/X86/X86ISelLowering.cpp | 50 +++++++++++++----------------------
|
|
lib/Target/X86/X86RetpolineThunks.cpp | 42 ++++++++---------------------
|
|
test/CodeGen/X86/retpoline-regparm.ll | 42 +++++++++++++++++++++++++++++
|
|
test/CodeGen/X86/retpoline.ll | 14 ++++------
|
|
4 files changed, 76 insertions(+), 72 deletions(-)
|
|
create mode 100644 test/CodeGen/X86/retpoline-regparm.ll
|
|
|
|
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
|
|
index 9aa3023..59a9832 100644
|
|
--- a/lib/Target/X86/X86ISelLowering.cpp
|
|
+++ b/lib/Target/X86/X86ISelLowering.cpp
|
|
@@ -26265,9 +26265,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
|
|
// attempt to help out kernels and other systems where duplicating the
|
|
// thunks is costly.
|
|
switch (Reg) {
|
|
- case 0:
|
|
- assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
|
|
- return "__x86_indirect_thunk";
|
|
case X86::EAX:
|
|
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
|
return "__x86_indirect_thunk_eax";
|
|
@@ -26277,6 +26274,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
|
|
case X86::EDX:
|
|
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
|
return "__x86_indirect_thunk_edx";
|
|
+ case X86::EDI:
|
|
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
|
+ return "__x86_indirect_thunk_edi";
|
|
case X86::R11:
|
|
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
|
|
return "__x86_indirect_thunk_r11";
|
|
@@ -26286,9 +26286,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
|
|
|
|
// When targeting an internal COMDAT thunk use an LLVM-specific name.
|
|
switch (Reg) {
|
|
- case 0:
|
|
- assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
|
|
- return "__llvm_retpoline_push";
|
|
case X86::EAX:
|
|
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
|
return "__llvm_retpoline_eax";
|
|
@@ -26298,6 +26295,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
|
|
case X86::EDX:
|
|
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
|
return "__llvm_retpoline_edx";
|
|
+ case X86::EDI:
|
|
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
|
+ return "__llvm_retpoline_edi";
|
|
case X86::R11:
|
|
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
|
|
return "__llvm_retpoline_r11";
|
|
@@ -26319,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
|
|
// just use R11, but we scan for uses anyway to ensure we don't generate
|
|
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
|
|
// already a register use operand to the call to hold the callee. If none
|
|
- // are available, push the callee instead. This is less efficient, but is
|
|
- // necessary for functions using 3 regparms. Such function calls are
|
|
- // (currently) not eligible for tail call optimization, because there is no
|
|
- // scratch register available to hold the address of the callee.
|
|
+ // are available, use EDI instead. EDI is chosen because EBX is the PIC base
|
|
+ // register and ESI is the base pointer to realigned stack frames with VLAs.
|
|
SmallVector<unsigned, 3> AvailableRegs;
|
|
if (Subtarget.is64Bit())
|
|
AvailableRegs.push_back(X86::R11);
|
|
else
|
|
- AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
|
|
+ AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
|
|
|
|
// Zero out any registers that are already used.
|
|
for (const auto &MO : MI.operands()) {
|
|
@@ -26345,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
|
|
break;
|
|
}
|
|
}
|
|
+ if (!AvailableReg)
|
|
+ report_fatal_error("calling convention incompatible with retpoline, no "
|
|
+ "available registers");
|
|
|
|
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
|
|
|
|
- if (AvailableReg == 0) {
|
|
- // No register available. Use PUSH. This must not be a tailcall, and this
|
|
- // must not be x64.
|
|
- if (Subtarget.is64Bit())
|
|
- report_fatal_error(
|
|
- "Cannot make an indirect call on x86-64 using both retpoline and a "
|
|
- "calling convention that preservers r11");
|
|
- if (Opc != X86::CALLpcrel32)
|
|
- report_fatal_error("Cannot make an indirect tail call on x86 using "
|
|
- "retpoline without a preserved register");
|
|
- BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
|
|
- MI.getOperand(0).ChangeToES(Symbol);
|
|
- MI.setDesc(TII->get(Opc));
|
|
- } else {
|
|
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
|
|
- .addReg(CalleeVReg);
|
|
- MI.getOperand(0).ChangeToES(Symbol);
|
|
- MI.setDesc(TII->get(Opc));
|
|
- MachineInstrBuilder(*BB->getParent(), &MI)
|
|
- .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
|
|
- }
|
|
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
|
|
+ .addReg(CalleeVReg);
|
|
+ MI.getOperand(0).ChangeToES(Symbol);
|
|
+ MI.setDesc(TII->get(Opc));
|
|
+ MachineInstrBuilder(*BB->getParent(), &MI)
|
|
+ .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
|
|
return BB;
|
|
}
|
|
|
|
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
|
|
index 223fa57..59ace3f 100644
|
|
--- a/lib/Target/X86/X86RetpolineThunks.cpp
|
|
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
|
|
@@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
|
|
static const char EAXThunkName[] = "__llvm_retpoline_eax";
|
|
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
|
|
static const char EDXThunkName[] = "__llvm_retpoline_edx";
|
|
-static const char PushThunkName[] = "__llvm_retpoline_push";
|
|
+static const char EDIThunkName[] = "__llvm_retpoline_edi";
|
|
|
|
namespace {
|
|
class X86RetpolineThunks : public MachineFunctionPass {
|
|
@@ -127,7 +127,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
|
|
createThunkFunction(M, R11ThunkName);
|
|
else
|
|
for (StringRef Name :
|
|
- {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
|
|
+ {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
|
|
createThunkFunction(M, Name);
|
|
InsertedThunks = true;
|
|
return true;
|
|
@@ -151,9 +151,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
|
|
populateThunk(MF, X86::R11);
|
|
} else {
|
|
// For 32-bit targets we need to emit a collection of thunks for various
|
|
- // possible scratch registers as well as a fallback that is used when
|
|
- // there are no scratch registers and assumes the retpoline target has
|
|
- // been pushed.
|
|
+ // possible scratch registers as well as a fallback that uses EDI, which is
|
|
+ // normally callee saved.
|
|
// __llvm_retpoline_eax:
|
|
// calll .Leax_call_target
|
|
// .Leax_capture_spec:
|
|
@@ -174,32 +173,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
|
|
// movl %edx, (%esp)
|
|
// retl
|
|
//
|
|
- // This last one is a bit more special and so needs a little extra
|
|
- // handling.
|
|
- // __llvm_retpoline_push:
|
|
- // calll .Lpush_call_target
|
|
- // .Lpush_capture_spec:
|
|
- // pause
|
|
- // lfence
|
|
- // jmp .Lpush_capture_spec
|
|
- // .align 16
|
|
- // .Lpush_call_target:
|
|
- // # Clear pause_loop return address.
|
|
- // addl $4, %esp
|
|
- // # Top of stack words are: Callee, RA. Exchange Callee and RA.
|
|
- // pushl 4(%esp) # Push callee
|
|
- // pushl 4(%esp) # Push RA
|
|
- // popl 8(%esp) # Pop RA to final RA
|
|
- // popl (%esp) # Pop callee to next top of stack
|
|
- // retl # Ret to callee
|
|
+ // __llvm_retpoline_edi:
|
|
+ // ... # Same setup
|
|
+ // movl %edi, (%esp)
|
|
+ // retl
|
|
if (MF.getName() == EAXThunkName)
|
|
populateThunk(MF, X86::EAX);
|
|
else if (MF.getName() == ECXThunkName)
|
|
populateThunk(MF, X86::ECX);
|
|
else if (MF.getName() == EDXThunkName)
|
|
populateThunk(MF, X86::EDX);
|
|
- else if (MF.getName() == PushThunkName)
|
|
- populateThunk(MF);
|
|
+ else if (MF.getName() == EDIThunkName)
|
|
+ populateThunk(MF, X86::EDI);
|
|
else
|
|
llvm_unreachable("Invalid thunk name on x86-32!");
|
|
}
|
|
@@ -301,11 +286,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
|
|
CaptureSpec->addSuccessor(CaptureSpec);
|
|
|
|
CallTarget->setAlignment(4);
|
|
- if (Reg) {
|
|
- insertRegReturnAddrClobber(*CallTarget, *Reg);
|
|
- } else {
|
|
- assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
|
|
- insert32BitPushReturnAddrClobber(*CallTarget);
|
|
- }
|
|
+ insertRegReturnAddrClobber(*CallTarget, *Reg);
|
|
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
|
|
}
|
|
diff --git a/test/CodeGen/X86/retpoline-regparm.ll b/test/CodeGen/X86/retpoline-regparm.ll
|
|
new file mode 100644
|
|
index 0000000..13b3274
|
|
--- /dev/null
|
|
+++ b/test/CodeGen/X86/retpoline-regparm.ll
|
|
@@ -0,0 +1,42 @@
|
|
+; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
|
|
+
|
|
+; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
|
|
+; because there are no available scratch registers. The Linux kernel builds
|
|
+; with -mregparm=3, so we need to support it. TCO should fail because we need
|
|
+; to restore EDI.
|
|
+
|
|
+define void @call_edi(void (i32, i32, i32)* %fp) #0 {
|
|
+entry:
|
|
+ tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; CHECK-LABEL: call_edi:
|
|
+; EDI is used, so it must be saved.
|
|
+; CHECK: pushl %edi
|
|
+; CHECK-DAG: xorl %eax, %eax
|
|
+; CHECK-DAG: xorl %edx, %edx
|
|
+; CHECK-DAG: xorl %ecx, %ecx
|
|
+; CHECK-DAG: movl {{.*}}, %edi
|
|
+; CHECK: calll __llvm_retpoline_edi
|
|
+; CHECK: popl %edi
|
|
+; CHECK: retl
|
|
+
|
|
+define void @edi_external(void (i32, i32, i32)* %fp) #1 {
|
|
+entry:
|
|
+ tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; CHECK-LABEL: edi_external:
|
|
+; CHECK: pushl %edi
|
|
+; CHECK-DAG: xorl %eax, %eax
|
|
+; CHECK-DAG: xorl %edx, %edx
|
|
+; CHECK-DAG: xorl %ecx, %ecx
|
|
+; CHECK-DAG: movl {{.*}}, %edi
|
|
+; CHECK: calll __x86_indirect_thunk_edi
|
|
+; CHECK: popl %edi
|
|
+; CHECK: retl
|
|
+
|
|
+attributes #0 = { "target-features"="+retpoline" }
|
|
+attributes #1 = { "target-features"="+retpoline-external-thunk" }
|
|
diff --git a/test/CodeGen/X86/retpoline.ll b/test/CodeGen/X86/retpoline.ll
|
|
index b0d4c85..562386e 100644
|
|
--- a/test/CodeGen/X86/retpoline.ll
|
|
+++ b/test/CodeGen/X86/retpoline.ll
|
|
@@ -336,10 +336,10 @@ latch:
|
|
; X86-NEXT: movl %edx, (%esp)
|
|
; X86-NEXT: retl
|
|
;
|
|
-; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
|
|
-; X86-NEXT: .hidden __llvm_retpoline_push
|
|
-; X86-NEXT: .weak __llvm_retpoline_push
|
|
-; X86: __llvm_retpoline_push:
|
|
+; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
|
|
+; X86-NEXT: .hidden __llvm_retpoline_edi
|
|
+; X86-NEXT: .weak __llvm_retpoline_edi
|
|
+; X86: __llvm_retpoline_edi:
|
|
; X86-NEXT: # {{.*}} # %entry
|
|
; X86-NEXT: calll [[CALL_TARGET:.*]]
|
|
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
|
|
@@ -351,11 +351,7 @@ latch:
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
-; X86-NEXT: addl $4, %esp
|
|
-; X86-NEXT: pushl 4(%esp)
|
|
-; X86-NEXT: pushl 4(%esp)
|
|
-; X86-NEXT: popl 8(%esp)
|
|
-; X86-NEXT: popl (%esp)
|
|
+; X86-NEXT: movl %edi, (%esp)
|
|
; X86-NEXT: retl
|
|
|
|
|
|
--
|
|
1.8.3.1
|
|
|