llvm.org GIT mirror llvm / 3bde6fe
Introduce a pass to insert vzeroupper instructions to avoid AVX to SSE transition penalty. The pass is enabled through the "x86-use-vzeroupper" llc command line option. This is only the first step (very naive and conservative one) to sketch out the idea, but proper DFA is coming next to allow smarter decisions. Comments and ideas now and in further commits will be very appreciated. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138317 91177308-0d34-0410-b5e6-96231b3b80d8 Bruno Cardoso Lopes 9 years ago
5 changed file(s) with 153 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
3131 X86Subtarget.cpp
3232 X86TargetMachine.cpp
3333 X86TargetObjectFile.cpp
34 X86VZeroUpper.cpp
3435 )
3536
3637 if( CMAKE_CL_64 )
4747 /// crossings.
4848 FunctionPass *createSSEDomainFixPass();
4949
50 /// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
51 /// before each call to avoid transition penalty between functions encoded with
52 /// AVX and SSE.
53 FunctionPass *createX86IssueVZeroUpperPass();
54
5055 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
5156 /// to the specified MCE object.
5257 FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
1515 #include "llvm/PassManager.h"
1616 #include "llvm/CodeGen/MachineFunction.h"
1717 #include "llvm/CodeGen/Passes.h"
18 #include "llvm/Support/CommandLine.h"
1819 #include "llvm/Support/FormattedStream.h"
1920 #include "llvm/Target/TargetOptions.h"
2021 #include "llvm/Target/TargetRegistry.h"
9192 }
9293
9394 //===----------------------------------------------------------------------===//
95 // Command line options for x86
96 //===----------------------------------------------------------------------===//
97 bool UseVZeroUpper;
98
99 static cl::opt
100 VZeroUpper("x86-use-vzeroupper",
101 cl::desc("Minimize AVX to SSE transition penalty"),
102 cl::location(UseVZeroUpper), cl::init(false));
103
104 //===----------------------------------------------------------------------===//
94105 // Pass Pipeline Configuration
95106 //===----------------------------------------------------------------------===//
96107
124135 PM.add(createSSEDomainFixPass());
125136 return true;
126137 }
138
139 if (Subtarget.hasAVX() && UseVZeroUpper) {
140 PM.add(createX86IssueVZeroUpperPass());
141 return true;
142 }
127143 return false;
128144 }
129145
0 //===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass which inserts x86 AVX vzeroupper instructions
10 // before calls to SSE encoded functions. This avoids transition latency
11 // penalty when tranfering control between AVX encoded instructions and old
12 // SSE encoding mode.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #define DEBUG_TYPE "x86-codegen"
17 #include "X86.h"
18 #include "X86InstrInfo.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/GlobalValue.h"
24 #include "llvm/Target/TargetInstrInfo.h"
25 using namespace llvm;
26
27 STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
28
29 namespace {
30 struct VZeroUpperInserter : public MachineFunctionPass {
31 static char ID;
32 VZeroUpperInserter() : MachineFunctionPass(ID) {}
33
34 virtual bool runOnMachineFunction(MachineFunction &MF);
35
36 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
37
38 virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
39
40 private:
41 const TargetInstrInfo *TII; // Machine instruction info.
42 MachineBasicBlock *MBB; // Current basic block
43 };
44 char VZeroUpperInserter::ID = 0;
45 }
46
47 FunctionPass *llvm::createX86IssueVZeroUpperPass() {
48 return new VZeroUpperInserter();
49 }
50
51 /// runOnMachineFunction - Loop over all of the basic blocks, inserting
52 /// vzero upper instructions before function calls.
53 bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
54 TII = MF.getTarget().getInstrInfo();
55 bool Changed = false;
56
57 // Process any unreachable blocks in arbitrary order now.
58 for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
59 Changed |= processBasicBlock(MF, *BB);
60
61 return Changed;
62 }
63
64 bool isCallToModuleFn(const MachineInstr *MI) {
65 assert(MI->getDesc().isCall() && "Isn't a call instruction");
66
67 for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
68 const MachineOperand &MO = MI->getOperand(i);
69
70 if (!MO.isGlobal())
71 continue;
72
73 const GlobalValue *GV = MO.getGlobal();
74 GlobalValue::LinkageTypes LT = GV->getLinkage();
75 if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) ||
76 (GV->isExternalLinkage(LT) && !GV->isDeclaration()))
77 return true;
78
79 return false;
80 }
81 return false;
82 }
83
84 /// processBasicBlock - Loop over all of the instructions in the basic block,
85 /// inserting vzero upper instructions before function calls.
86 bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
87 MachineBasicBlock &BB) {
88 bool Changed = false;
89 MBB = &BB;
90
91 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
92 MachineInstr *MI = I;
93 DebugLoc dl = I->getDebugLoc();
94
95 // Insert a vzeroupper instruction before each control transfer
96 // to functions outside this module
97 if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) {
98 BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
99 ++NumVZU;
100 }
101 }
102
103 return Changed;
104 }
0 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
1
2 define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
3 entry:
4 %add.i = fadd <4 x float> %a, %a
5 ret <4 x float> %add.i
6 }
7
8 ; CHECK: _test00
9 define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
10 entry:
11 %add.i = fadd <4 x float> %a, %b
12 ; CHECK: vzeroupper
13 ; CHECK-NEXT: callq _do_sse
14 %call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
15 %sub.i = fsub <4 x float> %call3, %add.i
16 ; CHECK-NOT: vzeroupper
17 ; CHECK: callq _do_sse_local
18 %call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
19 ; CHECK: vzeroupper
20 ; CHECK-NEXT: jmp _do_sse
21 %call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
22 ret <4 x float> %call10
23 }
24
25 declare <4 x float> @do_sse(<4 x float>)