llvm.org GIT mirror llvm / 1243922
Remove the X86 Maximal Stack Alignment Check pass as it is no longer necessary. This pass was conservative in that it always reserved the FP to enable dynamic stack realignment, which allowed the RA to use aligned spills for vector registers. This happens even when spills were not necessary. The RA has since been improved to use unaligned spills when necessary. The new behavior is to realign the stack if the frame pointer was already reserved for some other reason, but don't reserve the frame pointer just because a function contains vector virtual registers. Part of rdar://12719844 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168627 91177308-0d34-0410-b5e6-96231b3b80d8 Chad Rosier 7 years ago
5 changed file(s) with 19 addition(s) and 75 deletion(s). Raw diff Collapse all Expand all
6262 ///
6363 FunctionPass *createEmitX86CodeToMemory();
6464
65 /// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
66 /// which determines whether the frame pointer register should be
67 /// reserved in case dynamic stack alignment is later required.
68 ///
69 FunctionPass *createX86MaxStackAlignmentHeuristicPass();
70
7165 } // End llvm namespace
7266
7367 #endif
788788 }
789789 }
790790 }
791
792 namespace {
793 struct MSAH : public MachineFunctionPass {
794 static char ID;
795 MSAH() : MachineFunctionPass(ID) {}
796
797 virtual bool runOnMachineFunction(MachineFunction &MF) {
798 const X86TargetMachine *TM =
799 static_cast(&MF.getTarget());
800 const TargetFrameLowering *TFI = TM->getFrameLowering();
801 MachineRegisterInfo &RI = MF.getRegInfo();
802 X86MachineFunctionInfo *FuncInfo = MF.getInfo();
803 unsigned StackAlignment = TFI->getStackAlignment();
804
805 // Be over-conservative: scan over all vreg defs and find whether vector
806 // registers are used. If yes, there is a possibility that vector register
807 // will be spilled and thus require dynamic stack realignment.
808 for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
809 unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
810 if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
811 FuncInfo->setForceFramePointer(true);
812 return true;
813 }
814 }
815 // Nothing to do
816 return false;
817 }
818
819 virtual const char *getPassName() const {
820 return "X86 Maximal Stack Alignment Check";
821 }
822
823 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
824 AU.setPreservesCFG();
825 MachineFunctionPass::getAnalysisUsage(AU);
826 }
827 };
828
829 char MSAH::ID = 0;
830 }
831
832 FunctionPass*
833 llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
170170 }
171171
172172 bool X86PassConfig::addPreRegAlloc() {
173 addPass(createX86MaxStackAlignmentHeuristicPass());
174173 return false; // -print-machineinstr shouldn't print after this.
175174 }
176175
8686 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
8787 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
8888
89 ; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
90 ; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
91 ; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
92 ; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
93 ; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
94 ; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
95 ; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
96 ; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
89 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
90 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
91 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
92 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
93 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
94 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
95 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
96 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
9797 ; X64: call
98 ; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
99 ; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
100 ; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
101 ; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
102 ; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
103 ; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
104 ; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
105 ; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
98 ; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
99 ; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
100 ; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
101 ; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
102 ; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
103 ; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
104 ; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
105 ; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
106106 define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
107107 %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
108108 ret <16 x float> %c
102102
103103 declare void @t4_helper(i32*, i32*, <8 x float>*)
104104
105 ; Dynamic realignment + Spill
105 ; Spilling an AVX register shouldn't cause dynamic realignment
106106 define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
107107 entry:
108108 %a = alloca i32, align 4
115115 ret i32 %add
116116
117117 ; CHECK: _t5
118 ; CHECK: pushq %rbp
119 ; CHECK: movq %rsp, %rbp
120 ; CHECK: andq $-32, %rsp
121118 ; CHECK: subq ${{[0-9]+}}, %rsp
122119 ;
123120 ; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
124 ; CHECK: vmovaps [[AVXREG]], (%rsp)
121 ; CHECK: vmovups [[AVXREG]], (%rsp)
125122 ; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
126123 ; CHECK: callq _t5_helper1
127 ; CHECK: vmovaps (%rsp), %ymm0
124 ; CHECK: vmovups (%rsp), %ymm0
128125 ; CHECK: callq _t5_helper2
129126 ; CHECK: movl {{[0-9]+}}(%rsp), %eax
130 ;
131 ; CHECK: movq %rbp, %rsp
132 ; CHECK: popq %rbp
133127 }
134128
135129 declare void @t5_helper1(i32*)