llvm.org GIT mirror llvm / df4b35e
Remove X86-dependent stuff from SSEDomainFix. This also enables domain swizzling for AVX code which required a few trivial test changes. The pass will be moved to lib/CodeGen shortly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140659 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 8 years ago
6 changed file(s) with 44 addition(s) and 25 deletion(s). Raw diff Collapse all Expand all
2323 class MachineFunctionPass;
2424 class PassInfo;
2525 class TargetLowering;
26 class TargetRegisterClass;
2627 class raw_ostream;
2728
2829 /// createUnreachableBlockEliminationPass - The LLVM code generator does not
224225 ///
225226 FunctionPass *createExpandISelPseudosPass();
226227
228 /// createExecutionDependencyFixPass - This pass fixes execution time
229 /// problems with dependent instructions, such as switching execution
230 /// domains to match.
231 ///
232 /// The pass will examine instructions using and defining registers in RC.
233 ///
234 FunctionPass *createExecutionDependencyFixPass(const TargetRegisterClass *RC);
235
227236 } // End llvm namespace
228237
229238 #endif
1717 //
1818 //===----------------------------------------------------------------------===//
1919
20 #define DEBUG_TYPE "sse-domain-fix"
21 #include "X86InstrInfo.h"
20 #define DEBUG_TYPE "execution-fix"
2221 #include "llvm/CodeGen/MachineFunctionPass.h"
2322 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/Passes.h"
24 #include "llvm/Target/TargetInstrInfo.h"
25 #include "llvm/Target/TargetMachine.h"
2426 #include "llvm/ADT/DepthFirstIterator.h"
2527 #include "llvm/Support/Allocator.h"
2628 #include "llvm/Support/Debug.h"
9698 };
9799 }
98100
99 static const unsigned NumRegs = 16;
100
101101 namespace {
102102 class SSEDomainFixPass : public MachineFunctionPass {
103103 static char ID;
104104 SpecificBumpPtrAllocator Allocator;
105105 SmallVector Avail;
106106
107 const TargetRegisterClass *const RC;
107108 MachineFunction *MF;
108 const X86InstrInfo *TII;
109 const TargetInstrInfo *TII;
109110 const TargetRegisterInfo *TRI;
110111 MachineBasicBlock *MBB;
112 std::vector AliasMap;
113 const unsigned NumRegs;
111114 DomainValue **LiveRegs;
112115 typedef DenseMap LiveOutMap;
113116 LiveOutMap LiveOuts;
114117 unsigned Distance;
115118
116119 public:
117 SSEDomainFixPass() : MachineFunctionPass(ID) {}
120 SSEDomainFixPass(const TargetRegisterClass *rc)
121 : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
118122
119123 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
120124 AU.setPreservesAll();
153157
154158 /// Translate TRI register number to an index into our smaller tables of
155159 /// interesting registers. Return -1 for boring registers.
156 int SSEDomainFixPass::RegIndex(unsigned reg) {
157 assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
158 reg -= X86::XMM0;
159 return reg < NumRegs ? (int) reg : -1;
160 int SSEDomainFixPass::RegIndex(unsigned Reg) {
161 assert(Reg < AliasMap.size() && "Invalid register");
162 return AliasMap[Reg];
160163 }
161164
162165 DomainValue *SSEDomainFixPass::Alloc(int domain) {
443446
444447 bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
445448 MF = &mf;
446 TII = static_cast(MF->getTarget().getInstrInfo());
449 TII = MF->getTarget().getInstrInfo();
447450 TRI = MF->getTarget().getRegisterInfo();
448451 MBB = 0;
449452 LiveRegs = 0;
450453 Distance = 0;
451 assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
454 assert(NumRegs == RC->getNumRegs() && "Bad regclass");
452455
453456 // If no XMM registers are used in the function, we can skip it completely.
454457 bool anyregs = false;
455 for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
456 E = X86::VR128RegClass.end(); I != E; ++I)
458 for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
459 I != E; ++I)
457460 if (MF->getRegInfo().isPhysRegUsed(*I)) {
458461 anyregs = true;
459462 break;
460463 }
461464 if (!anyregs) return false;
465
466 // Initialize the AliasMap on the first use.
467 if (AliasMap.empty()) {
468 // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
469 // or -1.
470 AliasMap.resize(TRI->getNumRegs(), -1);
471 for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
472 for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
473 AliasMap[*AI] = i;
474 }
462475
463476 MachineBasicBlock *Entry = MF->begin();
464477 SmallPtrSet Visited;
500513 return false;
501514 }
502515
503 FunctionPass *llvm::createSSEDomainFixPass() {
504 return new SSEDomainFixPass();
505 }
516 FunctionPass *
517 llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
518 return new SSEDomainFixPass(RC);
519 }
4343 ///
4444 FunctionPass *createX86FloatingPointStackifierPass();
4545
46 /// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
47 /// crossings.
48 FunctionPass *createSSEDomainFixPass();
49
5046 /// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
5147 /// before each call to avoid transition penalty between functions encoded with
5248 /// AVX and SSE.
132132 bool ShouldPrint = false;
133133 if (OptLevel != CodeGenOpt::None &&
134134 (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
135 PM.add(createSSEDomainFixPass());
135 PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
136136 ShouldPrint = true;
137137 }
138138
22
33 ; CHECK: vmovaps
44 ; CHECK: vmovaps
5 ; CHECK: vmovapd
6 ; CHECK: vmovapd
5 ; CHECK: vmovaps
6 ; CHECK: vmovaps
77 ; CHECK: vmovaps
88 ; CHECK: vmovaps
99 define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp {
4646 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
4747 ; To:
4848 ; shuffle (vload ptr)), undef, <1, 1, 1, 1>
49 ; CHECK: vmovaps
49 ; CHECK: vmovdqa
5050 ; CHECK-NEXT: vinsertf128 $1
5151 ; CHECK-NEXT: vpermilps $-1
5252 define <8 x float> @funcE() nounwind {