llvm.org GIT mirror llvm / 239ffb3
Enhance the fix of PR17631 - The fix to PR17631 fixes part of the cases where 'vzeroupper' should not be issued before 'call' insn. There're other cases where helper calls will be inserted not limited to epilog. These helper calls do not follow the standard calling convention and won't clobber any YMM registers. (So far, all call conventions will clobber any or part of YMM registers.) This patch enhances the previous fix to cover more cases 'vzerosupper' should not be inserted by checking if that function call won't clobber any YMM registers and skipping it if so. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196261 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Liao 5 years ago
2 changed file(s) with 43 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
147147 return false;
148148 }
149149
150 /// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
151 /// instruction.
152 static bool clobbersAnyYmmReg(MachineInstr *MI) {
153 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
154 const MachineOperand &MO = MI->getOperand(i);
155 if (!MO.isRegMask())
156 continue;
157 for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
158 if (MO.clobbersPhysReg(reg))
159 return true;
160 }
161 for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
162 if (MO.clobbersPhysReg(reg))
163 return true;
164 }
165 }
166 return false;
167 }
168
150169 /// runOnMachineFunction - Loop over all of the basic blocks, inserting
151170 /// vzero upper instructions before function calls.
152171 bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
233252 DebugLoc dl = I->getDebugLoc();
234253 MachineInstr *MI = I;
235254
236 // Don't need to check instructions added in prolog.
237 // In prolog, special function calls may be added for specific targets
238 // (e.g. on Windows, a prolog helper '_chkstk' is called when the local
239 // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM
240 // registers.
241 if (MI->getFlag(MachineInstr::FrameSetup))
242 continue;
243
244255 bool isControlFlow = MI->isCall() || MI->isReturn();
245256
246257 // Shortcut: don't need to check regular instructions in dirty state.
257268 // Check for control-flow out of the current function (which might
258269 // indirectly execute SSE instructions).
259270 if (!isControlFlow)
271 continue;
272
273 // If the call won't clobber any YMM register, skip it as well. It usually
274 // happens on helper function calls (such as '_chkstk', '_ftol2') where
275 // standard calling convention is not used (RegMask is not used to mark
276 // register clobbered and register usage (def/imp-def/use) is well-dfined
277 // and explicitly specified.
278 if (MI->isCall() && !clobbersAnyYmmReg(MI))
260279 continue;
261280
262281 BBHasCall = true;
0 ; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s
1
1
22 %struct_type = type { [64 x <8 x float>], <8 x float> }
3
3
44 ; Function Attrs: nounwind readnone
55 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
6
6
77 ; Function Attrs: nounwind
88 define i32 @equal(<8 x i32> %A) {
99 allocas:
1010 %first_alloc = alloca [64 x <8 x i32>]
1111 %second_alloc = alloca %struct_type
12
12
1313 %A1 = bitcast <8 x i32> %A to <8 x float>
1414 %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1)
1515 ret i32 %A2
1919 ; CHECK-NOT: vzeroupper
2020 ; CHECK: _chkstk
2121 ; CHECK: ret
22
23 define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
24 %i = fptoui double %x to i64
25 store i64 %i, i64* %p
26 %ret = fadd <8 x float> %y, %y
27 ret <8 x float> %ret
28 }
29
30 ; CHECK: foo
31 ; CHECK-NOT: vzeroupper
32 ; CHECK: _ftol2
33 ; CHECK: ret