llvm.org GIT mirror llvm / 1bfec90
Re-enable "[MachineCopyPropagation] Extend pass to do COPY source forwarding" Re-enable commit r323991 now that r325931 has been committed to make MachineOperand::isRenamable() check more conservative w.r.t. code changes and opt-in on a per-target basis. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326208 91177308-0d34-0410-b5e6-96231b3b80d8 Geoff Berry 1 year, 8 months ago
123 changed file(s) with 852 addition(s) and 581 deletion(s). Raw diff Collapse all Expand all
77 //===----------------------------------------------------------------------===//
88 //
99 // This is an extremely simple MachineInstr-level copy propagation pass.
10 //
11 // This pass forwards the source of COPYs to the users of their destinations
12 // when doing so is legal. For example:
13 //
14 // %reg1 = COPY %reg0
15 // ...
16 // ... = OP %reg1
17 //
18 // If
19 // - %reg0 has not been clobbered by the time of the use of %reg1
20 // - the register class constraints are satisfied
21 // - the COPY def is the only value that reaches OP
22 // then this pass replaces the above with:
23 //
24 // %reg1 = COPY %reg0
25 // ...
26 // ... = OP %reg0
27 //
28 // This pass also removes some redundant COPYs. For example:
29 //
30 // %R1 = COPY %R0
31 // ... // No clobber of %R1
32 // %R0 = COPY %R1 <<< Removed
33 //
34 // or
35 //
36 // %R1 = COPY %R0
37 // ... // No clobber of %R0
38 // %R1 = COPY %R0 <<< Removed
1039 //
1140 //===----------------------------------------------------------------------===//
1241
2251 #include "llvm/CodeGen/MachineInstr.h"
2352 #include "llvm/CodeGen/MachineOperand.h"
2453 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/CodeGen/TargetInstrInfo.h"
2555 #include "llvm/CodeGen/TargetRegisterInfo.h"
2656 #include "llvm/CodeGen/TargetSubtargetInfo.h"
2757 #include "llvm/MC/MCRegisterInfo.h"
2858 #include "llvm/Pass.h"
2959 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/DebugCounter.h"
3061 #include "llvm/Support/raw_ostream.h"
3162 #include
3263 #include
3667 #define DEBUG_TYPE "machine-cp"
3768
3869 STATISTIC(NumDeletes, "Number of dead copies deleted");
70 STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
71 DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
72 "Controls which register COPYs are forwarded");
3973
4074 namespace {
4175
72106 void ReadRegister(unsigned Reg);
73107 void CopyPropagateBlock(MachineBasicBlock &MBB);
74108 bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
109 void forwardUses(MachineInstr &MI);
110 bool isForwardableRegClassCopy(const MachineInstr &Copy,
111 const MachineInstr &UseI, unsigned UseIdx);
112 bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
75113
76114 /// Candidates for deletion.
77115 SmallSetVector MaybeDeadCopies;
205243 Changed = true;
206244 ++NumDeletes;
207245 return true;
246 }
247
248 /// Decide whether we should forward the source of \param Copy to its use in
249 /// \param UseI based on the physical register class constraints of the opcode
250 /// and avoiding introducing more cross-class COPYs.
251 bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
252 const MachineInstr &UseI,
253 unsigned UseIdx) {
254
255 unsigned CopySrcReg = Copy.getOperand(1).getReg();
256
257 // If the new register meets the opcode register constraints, then allow
258 // forwarding.
259 if (const TargetRegisterClass *URC =
260 UseI.getRegClassConstraint(UseIdx, TII, TRI))
261 return URC->contains(CopySrcReg);
262
263 if (!UseI.isCopy())
264 return false;
265
266 /// COPYs don't have register class constraints, so if the user instruction
267 /// is a COPY, we just try to avoid introducing additional cross-class
268 /// COPYs. For example:
269 ///
270 /// RegClassA = COPY RegClassB // Copy parameter
271 /// ...
272 /// RegClassB = COPY RegClassA // UseI parameter
273 ///
274 /// which after forwarding becomes
275 ///
276 /// RegClassA = COPY RegClassB
277 /// ...
278 /// RegClassB = COPY RegClassB
279 ///
280 /// so we have reduced the number of cross-class COPYs and potentially
281 /// introduced a nop COPY that can be removed.
282 const TargetRegisterClass *UseDstRC =
283 TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
284
285 const TargetRegisterClass *SuperRC = UseDstRC;
286 for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
287 SuperRC; SuperRC = *SuperRCI++)
288 if (SuperRC->contains(CopySrcReg))
289 return true;
290
291 return false;
292 }
293
294 /// Check that \p MI does not have implicit uses that overlap with it's \p Use
295 /// operand (the register being replaced), since these can sometimes be
296 /// implicitly tied to other operands. For example, on AMDGPU:
297 ///
298 /// V_MOVRELS_B32_e32 %VGPR2, %M0, %EXEC, %VGPR2_VGPR3_VGPR4_VGPR5
299 ///
300 /// the %VGPR2 is implicitly tied to the larger reg operand, but we have no
301 /// way of knowing we need to update the latter when updating the former.
302 bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
303 const MachineOperand &Use) {
304 for (const MachineOperand &MIUse : MI.uses())
305 if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() &&
306 MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg()))
307 return true;
308
309 return false;
310 }
311
312 /// Look for available copies whose destination register is used by \p MI and
313 /// replace the use in \p MI with the copy's source register.
314 void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
315 if (AvailCopyMap.empty())
316 return;
317
318 // Look for non-tied explicit vreg uses that have an active COPY
319 // instruction that defines the physical register allocated to them.
320 // Replace the vreg with the source of the active COPY.
321 for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx < OpEnd;
322 ++OpIdx) {
323 MachineOperand &MOUse = MI.getOperand(OpIdx);
324 // Don't forward into undef use operands since doing so can cause problems
325 // with the machine verifier, since it doesn't treat undef reads as reads,
326 // so we can end up with a live range that ends on an undef read, leading to
327 // an error that the live range doesn't end on a read of the live range
328 // register.
329 if (!MOUse.isReg() || MOUse.isTied() || MOUse.isUndef() || MOUse.isDef() ||
330 MOUse.isImplicit())
331 continue;
332
333 if (!MOUse.getReg())
334 continue;
335
336 // Check that the register is marked 'renamable' so we know it is safe to
337 // rename it without violating any constraints that aren't expressed in the
338 // IR (e.g. ABI or opcode requirements).
339 if (!MOUse.isRenamable())
340 continue;
341
342 auto CI = AvailCopyMap.find(MOUse.getReg());
343 if (CI == AvailCopyMap.end())
344 continue;
345
346 MachineInstr &Copy = *CI->second;
347 unsigned CopyDstReg = Copy.getOperand(0).getReg();
348 const MachineOperand &CopySrc = Copy.getOperand(1);
349 unsigned CopySrcReg = CopySrc.getReg();
350
351 // FIXME: Don't handle partial uses of wider COPYs yet.
352 if (MOUse.getReg() != CopyDstReg) {
353 DEBUG(dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n "
354 << MI);
355 continue;
356 }
357
358 // Don't forward COPYs of reserved regs unless they are constant.
359 if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg))
360 continue;
361
362 if (!isForwardableRegClassCopy(Copy, MI, OpIdx))
363 continue;
364
365 if (hasImplicitOverlap(MI, MOUse))
366 continue;
367
368 if (!DebugCounter::shouldExecute(FwdCounter)) {
369 DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n "
370 << MI);
371 continue;
372 }
373
374 DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
375 << "\n with " << printReg(CopySrcReg, TRI) << "\n in "
376 << MI << " from " << Copy);
377
378 MOUse.setReg(CopySrcReg);
379 if (!CopySrc.isRenamable())
380 MOUse.setIsRenamable(false);
381
382 DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
383
384 // Clear kill markers that may have been invalidated.
385 for (MachineInstr &KMI :
386 make_range(Copy.getIterator(), std::next(MI.getIterator())))
387 KMI.clearRegisterKills(CopySrcReg, TRI);
388
389 ++NumCopyForwards;
390 Changed = true;
391 }
208392 }
209393
210394 void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
240424 if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
241425 continue;
242426
427 forwardUses(*MI);
428
429 // Src may have been changed by forwardUses()
430 Src = MI->getOperand(1).getReg();
431
243432 // If Src is defined by a previous copy, the previous copy cannot be
244433 // eliminated.
245434 ReadRegister(Src);
291480 continue;
292481 }
293482
483 // Clobber any earlyclobber regs first.
484 for (const MachineOperand &MO : MI->operands())
485 if (MO.isReg() && MO.isEarlyClobber()) {
486 unsigned Reg = MO.getReg();
487 // If we have a tied earlyclobber, that means it is also read by this
488 // instruction, so we need to make sure we don't remove it as dead
489 // later.
490 if (MO.isTied())
491 ReadRegister(Reg);
492 ClobberRegister(Reg);
493 }
494
495 forwardUses(*MI);
496
294497 // Not a copy.
295498 SmallVector Defs;
296499 const MachineOperand *RegMask = nullptr;
306509 assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&
307510 "MachineCopyPropagation should be run after register allocation!");
308511
309 if (MO.isDef()) {
512 if (MO.isDef() && !MO.isEarlyClobber()) {
310513 Defs.push_back(Reg);
311514 continue;
312515 } else if (MO.readsReg())
363566 // since we don't want to trust live-in lists.
364567 if (MBB.succ_empty()) {
365568 for (MachineInstr *MaybeDead : MaybeDeadCopies) {
569 DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
570 MaybeDead->dump());
366571 assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
367572 MaybeDead->eraseFromParent();
368573 Changed = true;
10821082 // kill markers.
10831083 addPass(&StackSlotColoringID);
10841084
1085 // Copy propagate to forward register uses and try to eliminate COPYs that
1086 // were not coalesced.
1087 addPass(&MachineCopyPropagationID);
1088
10851089 // Run post-ra machine LICM to hoist reloads / remats.
10861090 //
10871091 // FIXME: can this move into MachineLateOptimization?
88 ; CHECK-LABEL: halfword:
99 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
1010 ; CHECK: ldrh [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #1]
11 ; CHECK: strh [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #1]
11 ; CHECK: mov [[REG3:x[0-9]+]], [[REG2]]
12 ; CHECK: strh [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #1]
1213 %shr81 = lshr i32 %xor72, 9
1314 %conv82 = zext i32 %shr81 to i64
1415 %idxprom83 = and i64 %conv82, 255
2324 ; CHECK-LABEL: word:
2425 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
2526 ; CHECK: ldr [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #2]
26 ; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #2]
27 ; CHECK: mov [[REG3:x[0-9]+]], [[REG2]]
28 ; CHECK: str [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #2]
2729 %shr81 = lshr i32 %xor72, 9
2830 %conv82 = zext i32 %shr81 to i64
2931 %idxprom83 = and i64 %conv82, 255
3840 ; CHECK-LABEL: doubleword:
3941 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
4042 ; CHECK: ldr [[REG1:x[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #3]
41 ; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #3]
43 ; CHECK: mov [[REG3:x[0-9]+]], [[REG2]]
44 ; CHECK: str [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #3]
4245 %shr81 = lshr i32 %xor72, 9
4346 %conv82 = zext i32 %shr81 to i64
4447 %idxprom83 = and i64 %conv82, 255
77 ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1
88 ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1
99 ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
10 ; Without advanced copy optimization, we end up with cross register
11 ; banks copies that cannot be coalesced.
12 ; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
13 ; With advanced copy optimization, we end up with just one copy
14 ; to insert the computed high part into the V register.
15 ; CHECK-OPT-NOT: fmov
10 ; CHECK-NOT: fmov
1611 ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
17 ; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
18 ; CHECK-OPT-NOT: fmov
12 ; CHECK-NOT: fmov
1913 ; CHECK: mov.d v0[1], [[COPY_REG2]]
2014 ; CHECK-NEXT: ret
2115 ;
2317 ; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d
2418 ; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1
2519 ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
26 ; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
27 ; GENERIC-OPT-NOT: fmov
20 ; GENERIC-NOT: fmov
2821 ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
29 ; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
30 ; GENERIC-OPT-NOT: fmov
22 ; GENERIC-NOT: fmov
3123 ; GENERIC: mov v0.d[1], [[COPY_REG2]]
3224 ; GENERIC-NEXT: ret
3325 %add = add <2 x i64> %a, %b
33 define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind ssp {
44 entry:
55 ; CHECK-LABEL: t:
6 ; CHECK: mov x0, [[REG1:x[0-9]+]]
7 ; CHECK: mov x1, [[REG2:x[0-9]+]]
6 ; CHECK: mov [[REG2:x[0-9]+]], x3
7 ; CHECK: mov [[REG1:x[0-9]+]], x2
8 ; CHECK: mov x0, x2
9 ; CHECK: mov x1, x3
810 ; CHECK: bl _foo
911 ; CHECK: mov x0, [[REG1]]
1012 ; CHECK: mov x1, [[REG2]]
4444
4545 ; CHECK: [[FAILED]]:
4646 ; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
47 ; CHECK: mov [[TMP:w[0-9]+]], wzr
48 ; CHECK: eor w0, [[TMP]], #0x1
47 ; CHECK: eor w0, wzr, #0x1
4948 ; CHECK: ret
5049
5150 %pair = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic
0 # RUN: llc -mtriple=aarch64-linux-gnu -run-pass machine-cp -o - %s | FileCheck %s
1 # Tests for MachineCopyPropagation copy forwarding.
2 ---
3 # Simple forwarding.
4 # CHECK-LABEL: name: test1
5 # CHECK: $x0 = SUBXri $x0, 1, 0
6 name: test1
7 tracksRegLiveness: true
8 body: |
9 bb.0:
10 liveins: $x0
11 renamable $x1 = COPY $x0
12 $x0 = SUBXri renamable $x1, 1, 0
13 ...
14 ---
15 # Don't forward if not renamable.
16 # CHECK-LABEL: name: test2
17 # CHECK: $x0 = SUBXri $x1, 1, 0
18 name: test2
19 tracksRegLiveness: true
20 body: |
21 bb.0:
22 liveins: $x0
23 $x1 = COPY $x0
24 $x0 = SUBXri $x1, 1, 0
25 ...
26 ---
27 # Don't forward reserved non-constant reg values.
28 # CHECK-LABEL: name: test4
29 # CHECK: $x0 = SUBXri renamable $x1, 1, 0
30 name: test4
31 tracksRegLiveness: true
32 body: |
33 bb.0:
34 liveins: $x0
35 $sp = SUBXri $sp, 16, 0
36 renamable $x1 = COPY $sp
37 $x0 = SUBXri renamable $x1, 1, 0
38 $sp = ADDXri $sp, 16, 0
39 ...
40 ---
41 # Don't violate opcode constraints when forwarding.
42 # CHECK-LABEL: name: test5
43 # CHECK: $x0 = SUBXri renamable $x1, 1, 0
44 name: test5
45 tracksRegLiveness: true
46 body: |
47 bb.0:
48 liveins: $x0
49 renamable $x1 = COPY $xzr
50 $x0 = SUBXri renamable $x1, 1, 0
51 ...
52 ---
53 # Test cross-class COPY forwarding.
54 # CHECK-LABEL: name: test6
55 # CHECK: $x2 = COPY $x0
56 name: test6
57 tracksRegLiveness: true
58 body: |
59 bb.0:
60 liveins: $x0
61 renamable $d1 = COPY $x0
62 $x2 = COPY renamable $d1
63 RET_ReallyLR implicit $x2
64 ...
65 ---
66 # Don't forward if there are overlapping implicit operands.
67 # CHECK-LABEL: name: test7
68 # CHECK: $w0 = SUBWri killed renamable $w1, 1, 0
69 name: test7
70 tracksRegLiveness: true
71 body: |
72 bb.0:
73 liveins: $w0
74 renamable $w1 = COPY $w0
75 $w0 = SUBWri killed renamable $w1, 1, 0, implicit killed $x1
76 ...
77 ---
78 # Check that kill flags are cleared.
79 # CHECK-LABEL: name: test8
80 # CHECK: $x2 = ADDXri $x0, 1, 0
81 # CHECK: $x0 = SUBXri $x0, 1, 0
82 name: test8
83 tracksRegLiveness: true
84 body: |
85 bb.0:
86 liveins: $x0
87 renamable $x1 = COPY $x0
88 $x2 = ADDXri killed $x0, 1, 0
89 $x0 = SUBXri renamable $x1, 1, 0
90 ...
91 ---
92 # Don't forward if value is clobbered.
93 # CHECK-LABEL: name: test9
94 # CHECK: $x2 = SUBXri renamable $x1, 1, 0
95 name: test9
96 tracksRegLiveness: true
97 body: |
98 bb.0:
99 liveins: $x0
100 renamable $x1 = COPY $x0
101 $x0 = ADDXri $x0, 1, 0
102 $x2 = SUBXri renamable $x1, 1, 0
103 ...
488488
489489 ; CHECK-COMMON-LABEL: test_phi:
490490 ; CHECK-COMMON: mov x[[PTR:[0-9]+]], x0
491 ; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x[[PTR]]]
491 ; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x0]
492492 ; CHECK-COMMON: [[LOOP:LBB[0-9_]+]]:
493493 ; CHECK-COMMON: mov.16b v[[R:[0-9]+]], v[[AB]]
494494 ; CHECK-COMMON: ldr h[[AB]], [x[[PTR]]]
1616 %val = zext i1 %test to i32
1717 ; CHECK: cset {{[xw][0-9]+}}, ne
1818
19 ; CHECK: mov [[RHSCOPY:w[0-9]+]], [[RHS]]
20 ; CHECK: mov [[LHSCOPY:w[0-9]+]], [[LHS]]
21
1922 store i32 %val, i32* @var
2023
2124 call void @bar()
2427 ; Currently, the comparison is emitted again. An MSR/MRS pair would also be
2528 ; acceptable, but assuming the call preserves NZCV is not.
2629 br i1 %test, label %iftrue, label %iffalse
27 ; CHECK: cmp [[LHS]], [[RHS]]
30 ; CHECK: cmp [[LHSCOPY]], [[RHSCOPY]]
2831 ; CHECK: b.eq
2932
3033 iftrue:
16701670 ; CHECK-LABEL: bug34674:
16711671 ; CHECK: // %entry
16721672 ; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr
1673 ; CHECK-DAG: stp [[ZREG]], [[ZREG]], [x0]
1673 ; CHECK-DAG: stp xzr, xzr, [x0]
16741674 ; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1
16751675 define i64 @bug34674(<2 x i64>* %p) {
16761676 entry:
1010 ; A53: mov [[DATA:w[0-9]+]], w1
1111 ; A53: str q{{[0-9]+}}, {{.*}}
1212 ; A53: str q{{[0-9]+}}, {{.*}}
13 ; A53: str [[DATA]], {{.*}}
13 ; A53: str w1, {{.*}}
1414
1515 %0 = bitcast %struct1* %fde to i8*
1616 tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 40, i1 false)
66 define void @test(i32 %px) {
77 ; CHECK_LABEL: test:
88 ; CHECK_LABEL: %entry
9 ; CHECK: subs
10 ; CHECK-NEXT: csel
9 ; CHECK: subs [[REG0:w[0-9]+]],
10 ; CHECK: csel {{w[0-9]+}}, wzr, [[REG0]]
1111 entry:
1212 %sub = add nsw i32 %px, -1
1313 %cmp = icmp slt i32 %px, 1
4040 ; CHECK-APPLE: mov x21, xzr
4141 ; CHECK-APPLE: bl {{.*}}foo
4242 ; CHECK-APPLE: mov x0, x21
43 ; CHECK-APPLE: cbnz x0
43 ; CHECK-APPLE: cbnz x21
4444 ; Access part of the error object and save it to error_ref
4545 ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8]
4646 ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
263263 ; CHECK-APPLE: mov x21, xzr
264264 ; CHECK-APPLE: bl {{.*}}foo_sret
265265 ; CHECK-APPLE: mov x0, x21
266 ; CHECK-APPLE: cbnz x0
266 ; CHECK-APPLE: cbnz x21
267267 ; Access part of the error object and save it to error_ref
268268 ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8]
269269 ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
357357 ; CHECK-APPLE: mov x21, xzr
358358 ; CHECK-APPLE: bl {{.*}}foo_vararg
359359 ; CHECK-APPLE: mov x0, x21
360 ; CHECK-APPLE: cbnz x0
360 ; CHECK-APPLE: cbnz x21
361361 ; Access part of the error object and save it to error_ref
362362 ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8]
363363 ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
546546 ; GCN: s_mov_b32 s5, s32
547547 ; GCN: s_add_u32 s32, s32, 0x300
548548
549 ; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-9]+]], s14
550 ; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-9]+]], s15
551 ; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-9]+]], s16
549 ; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
550 ; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
551 ; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16
552552 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
553553 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
554554 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
555555
556 ; GCN-DAG: s_mov_b32 s6, [[SAVE_X]]
557 ; GCN-DAG: s_mov_b32 s7, [[SAVE_Y]]
558 ; GCN-DAG: s_mov_b32 s8, [[SAVE_Z]]
556 ; GCN-DAG: s_mov_b32 s6, s14
557 ; GCN-DAG: s_mov_b32 s7, s15
558 ; GCN-DAG: s_mov_b32 s8, s16
559559 ; GCN: s_swappc_b64
560560
561561 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
None # RUN: llc -march=amdgcn -start-after=greedy -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s
0 # RUN: llc -march=amdgcn -start-after=greedy -disable-copyprop -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s
11 # Check that we first do all vector instructions and only then change exec
22 # CHECK-DAG: COPY $vgpr10_vgpr11
33 # CHECK-DAG: COPY $vgpr12_vgpr13
7777
7878 ; Uses a copy intsead of an or
7979 ; GCN: s_mov_b64 [[COPY:s\[[0-9]+:[0-9]+\]]], [[BREAK_REG]]
80 ; GCN: s_or_b64 [[BREAK_REG]], exec, [[COPY]]
80 ; GCN: s_or_b64 [[BREAK_REG]], exec, [[BREAK_REG]]
8181 define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
8282 bb:
8383 %id = call i32 @llvm.amdgcn.workitem.id.x()
11 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
22
33 ; GCN-LABEL: {{^}}vgpr:
4 ; GCN: v_mov_b32_e32 v1, v0
5 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
6 ; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
7 ; GCN: s_waitcnt expcnt(0)
4 ; GCN-DAG: v_mov_b32_e32 v1, v0
5 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
6 ; GCN: s_waitcnt expcnt(0)
7 ; GCN: v_add_f32_e32 v0, 1.0, v1
88 ; GCN-NOT: s_endpgm
99 define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
1010 bb:
178178
179179 ; GCN-LABEL: {{^}}sgpr:
180180 ; GCN: s_mov_b32 s2, s3
181 ; GCN: s_add_i32 s0, s2, 2
181 ; GCN: s_add_i32 s0, s3, 2
182182 ; GCN-NOT: s_endpgm
183183 define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
184184 bb:
203203 }
204204
205205 ; GCN-LABEL: {{^}}both:
206 ; GCN: v_mov_b32_e32 v1, v0
207 ; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
208 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
206 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
207 ; GCN-DAG: v_mov_b32_e32 v1, v0
208 ; GCN-DAG: s_mov_b32 s1, s2
209 ; GCN: s_waitcnt expcnt(0)
210 ; GCN: v_add_f32_e32 v0, 1.0, v1
209211 ; GCN-DAG: s_add_i32 s0, s3, 2
210 ; GCN-DAG: s_mov_b32 s1, s2
211 ; GCN: s_mov_b32 s2, s3
212 ; GCN: s_waitcnt expcnt(0)
212 ; GCN-DAG: s_mov_b32 s2, s3
213213 ; GCN-NOT: s_endpgm
214214 define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
215215 bb:
286286
287287 %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
288288 %oldval = extractvalue { i32, i1 } %pair, 0
289 ; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
289 ; CHECK-ARMV7: mov r[[ADDR:[0-9]+]], r0
290 ; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r0]
290291 ; CHECK-ARMV7: cmp [[OLDVAL]], r1
291292 ; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
292293 ; CHECK-ARMV7: dmb ish
304305 ; CHECK-ARMV7: dmb ish
305306 ; CHECK-ARMV7: bx lr
306307
307 ; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
308 ; CHECK-T2: mov r[[ADDR:[0-9]+]], r0
309 ; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r0]
308310 ; CHECK-T2: cmp [[OLDVAL]], r1
309311 ; CHECK-T2: bne [[FAIL_BB:\.?LBB.*]]
310312 ; CHECK-T2: dmb ish
3838 ; ARM: mov pc, lr
3939
4040 ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]]
41 ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]]
41 ; THUMBV6: adds r[[R3:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
4242 ; THUMBV6: movs r[[R0]], #0
4343 ; THUMBV6: movs r[[R1]], #1
4444 ; THUMBV6: cmp r[[R3]], r[[R2]]
3939 ; CHECK-APPLE-DAG: mov r8, #0
4040 ; CHECK-APPLE: bl {{.*}}foo
4141 ; CHECK-APPLE: mov r0, r8
42 ; CHECK-APPLE: cmp r0, #0
42 ; CHECK-APPLE: cmp r8, #0
4343 ; Access part of the error object and save it to error_ref
4444 ; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8]
4545 ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
180180 ; CHECK-APPLE: beq
181181 ; CHECK-APPLE: mov r0, #16
182182 ; CHECK-APPLE: malloc
183 ; CHECK-APPLE: mov r8, r0
184 ; CHECK-APPLE: strb r{{.*}}, [r8, #8]
183 ; CHECK-APPLE: strb r{{.*}}, [r0, #8]
185184 ; CHECK-APPLE: ble
186185
187186 ; CHECK-O0-LABEL: foo_loop:
265264 ; CHECK-APPLE: mov r8, #0
266265 ; CHECK-APPLE: bl {{.*}}foo_sret
267266 ; CHECK-APPLE: mov r0, r8
268 ; CHECK-APPLE: cmp r0, #0
267 ; CHECK-APPLE: cmp r8, #0
269268 ; Access part of the error object and save it to error_ref
270269 ; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8]
271270 ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
346345 ; CHECK-APPLE: mov r8, #0
347346 ; CHECK-APPLE: bl {{.*}}foo_vararg
348347 ; CHECK-APPLE: mov r0, r8
349 ; CHECK-APPLE: cmp r0, #0
348 ; CHECK-APPLE: cmp r8, #0
350349 ; Access part of the error object and save it to error_ref
351350 ; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8]
352351 ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
99 define double @foo(double %a, double %b) nounwind readnone {
1010 ; MIPS32-LABEL: foo:
1111 ; MIPS32: # %bb.0: # %entry
12 ; MIPS32-NEXT: mov.d $f0, $f12
1312 ; MIPS32-NEXT: mtc1 $zero, $f2
1413 ; MIPS32-NEXT: mtc1 $zero, $f3
15 ; MIPS32-NEXT: c.ule.d $f0, $f2
14 ; MIPS32-NEXT: c.ule.d $f12, $f2
1615 ; MIPS32-NEXT: bc1f $BB0_2
17 ; MIPS32-NEXT: nop
16 ; MIPS32-NEXT: mov.d $f0, $f12
1817 ; MIPS32-NEXT: # %bb.1: # %if.else
1918 ; MIPS32-NEXT: mtc1 $zero, $f0
2019 ; MIPS32-NEXT: mtc1 $zero, $f1
3332 ; MIPS32R2-NEXT: mov.d $f0, $f12
3433 ; MIPS32R2-NEXT: mtc1 $zero, $f2
3534 ; MIPS32R2-NEXT: mthc1 $zero, $f2
36 ; MIPS32R2-NEXT: c.ule.d $f0, $f2
35 ; MIPS32R2-NEXT: c.ule.d $f12, $f2
3736 ; MIPS32R2-NEXT: bc1f $BB0_2
3837 ; MIPS32R2-NEXT: nop
3938 ; MIPS32R2-NEXT: # %bb.1: # %if.else
5453 ; MIPS32r6-NEXT: mov.d $f0, $f12
5554 ; MIPS32r6-NEXT: mtc1 $zero, $f1
5655 ; MIPS32r6-NEXT: mthc1 $zero, $f1
57 ; MIPS32r6-NEXT: cmp.lt.d $f1, $f1, $f0
56 ; MIPS32r6-NEXT: cmp.lt.d $f1, $f1, $f12
5857 ; MIPS32r6-NEXT: mfc1 $1, $f1
5958 ; MIPS32r6-NEXT: andi $1, $1, 1
6059 ; MIPS32r6-NEXT: bnezc $1, $BB0_2
7372 ;
7473 ; MIPS4-LABEL: foo:
7574 ; MIPS4: # %bb.0: # %entry
75 ; MIPS4-NEXT: dmtc1 $zero, $f1
76 ; MIPS4-NEXT: c.ule.d $f12, $f1
77 ; MIPS4-NEXT: bc1f .LBB0_2
7678 ; MIPS4-NEXT: mov.d $f0, $f12
77 ; MIPS4-NEXT: dmtc1 $zero, $f1
78 ; MIPS4-NEXT: c.ule.d $f0, $f1
79 ; MIPS4-NEXT: bc1f .LBB0_2
80 ; MIPS4-NEXT: nop
8179 ; MIPS4-NEXT: # %bb.1: # %if.else
8280 ; MIPS4-NEXT: dmtc1 $zero, $f0
8381 ; MIPS4-NEXT: c.ule.d $f13, $f0
9290 ;
9391 ; MIPS64-LABEL: foo:
9492 ; MIPS64: # %bb.0: # %entry
93 ; MIPS64-NEXT: dmtc1 $zero, $f1
94 ; MIPS64-NEXT: c.ule.d $f12, $f1
95 ; MIPS64-NEXT: bc1f .LBB0_2
9596 ; MIPS64-NEXT: mov.d $f0, $f12
96 ; MIPS64-NEXT: dmtc1 $zero, $f1
97 ; MIPS64-NEXT: c.ule.d $f0, $f1
98 ; MIPS64-NEXT: bc1f .LBB0_2
99 ; MIPS64-NEXT: nop
10097 ; MIPS64-NEXT: # %bb.1: # %if.else
10198 ; MIPS64-NEXT: dmtc1 $zero, $f0
10299 ; MIPS64-NEXT: c.ule.d $f13, $f0
111108 ;
112109 ; MIPS64R2-LABEL: foo:
113110 ; MIPS64R2: # %bb.0: # %entry
111 ; MIPS64R2-NEXT: dmtc1 $zero, $f1
112 ; MIPS64R2-NEXT: c.ule.d $f12, $f1
113 ; MIPS64R2-NEXT: bc1f .LBB0_2
114114 ; MIPS64R2-NEXT: mov.d $f0, $f12
115 ; MIPS64R2-NEXT: dmtc1 $zero, $f1
116 ; MIPS64R2-NEXT: c.ule.d $f0, $f1
117 ; MIPS64R2-NEXT: bc1f .LBB0_2
118 ; MIPS64R2-NEXT: nop
119115 ; MIPS64R2-NEXT: # %bb.1: # %if.else
120116 ; MIPS64R2-NEXT: dmtc1 $zero, $f0
121117 ; MIPS64R2-NEXT: c.ule.d $f13, $f0
130126 ;
131127 ; MIPS64R6-LABEL: foo:
132128 ; MIPS64R6: # %bb.0: # %entry
133 ; MIPS64R6-NEXT: mov.d $f0, $f12
134129 ; MIPS64R6-NEXT: dmtc1 $zero, $f1
135 ; MIPS64R6-NEXT: cmp.lt.d $f1, $f1, $f0
130 ; MIPS64R6-NEXT: cmp.lt.d $f1, $f1, $f12
136131 ; MIPS64R6-NEXT: mfc1 $1, $f1
137132 ; MIPS64R6-NEXT: andi $1, $1, 1
138 ; MIPS64R6-NEXT: bnezc $1, .LBB0_2
133 ; MIPS64R6-NEXT: bnez $1, .LBB0_2
134 ; MIPS64R6-NEXT: mov.d $f0, $f12
139135 ; MIPS64R6-NEXT: # %bb.1: # %if.else
140136 ; MIPS64R6-NEXT: dmtc1 $zero, $f0
141137 ; MIPS64R6-NEXT: cmp.ule.d $f1, $f13, $f0
799799 ; MMR3-NEXT: sw $5, 36($sp) # 4-byte Folded Spill
800800 ; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
801801 ; MMR3-NEXT: lw $16, 76($sp)
802 ; MMR3-NEXT: srlv $4, $8, $16
802 ; MMR3-NEXT: srlv $4, $7, $16
803803 ; MMR3-NEXT: not16 $3, $16
804804 ; MMR3-NEXT: sw $3, 24($sp) # 4-byte Folded Spill
805805 ; MMR3-NEXT: sll16 $2, $6, 1
827827 ; MMR3-NEXT: move $17, $5
828828 ; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
829829 ; MMR3-NEXT: lw $16, 76($sp)
830 ; MMR3-NEXT: srlv $7, $8, $16
830 ; MMR3-NEXT: srlv $7, $7, $16
831831 ; MMR3-NEXT: not16 $3, $16
832832 ; MMR3-NEXT: sw $3, 24($sp) # 4-byte Folded Spill
833833 ; MMR3-NEXT: sll16 $2, $6, 1
918918 ; MMR6-NEXT: not16 $5, $3
919919 ; MMR6-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
920920 ; MMR6-NEXT: move $17, $6
921 ; MMR6-NEXT: sw $17, 16($sp) # 4-byte Folded Spill
922 ; MMR6-NEXT: sll16 $6, $17, 1
921 ; MMR6-NEXT: sw $6, 16($sp) # 4-byte Folded Spill
922 ; MMR6-NEXT: sll16 $6, $6, 1
923923 ; MMR6-NEXT: sllv $6, $6, $5
924924 ; MMR6-NEXT: or $8, $6, $2
925925 ; MMR6-NEXT: addiu $5, $3, -64
926926 ; MMR6-NEXT: srlv $9, $7, $5
927927 ; MMR6-NEXT: move $6, $4
928 ; MMR6-NEXT: sll16 $2, $6, 1
928 ; MMR6-NEXT: sll16 $2, $4, 1
929929 ; MMR6-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
930930 ; MMR6-NEXT: not16 $16, $5
931931 ; MMR6-NEXT: sllv $10, $2, $16
947947 ; MMR6-NEXT: selnez $11, $12, $4
948948 ; MMR6-NEXT: sllv $12, $6, $2
949949 ; MMR6-NEXT: move $7, $6
950 ; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
950 ; MMR6-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
951951 ; MMR6-NEXT: not16 $2, $2
952952 ; MMR6-NEXT: srl16 $6, $17, 1
953953 ; MMR6-NEXT: srlv $2, $6, $2
200200 define double @tst_select_fcmp_olt_double(double %x, double %y) {
201201 ; M2-LABEL: tst_select_fcmp_olt_double:
202202 ; M2: # %bb.0: # %entry
203 ; M2-NEXT: c.olt.d $f12, $f14
204 ; M2-NEXT: bc1t $BB2_2
203205 ; M2-NEXT: mov.d $f0, $f12
204 ; M2-NEXT: c.olt.d $f0, $f14
205 ; M2-NEXT: bc1t $BB2_2
206 ; M2-NEXT: nop
207206 ; M2-NEXT: # %bb.1: # %entry
208207 ; M2-NEXT: mov.d $f0, $f14
209208 ; M2-NEXT: $BB2_2: # %entry
213212 ; CMOV32R1-LABEL: tst_select_fcmp_olt_double:
214213 ; CMOV32R1: # %bb.0: # %entry
215214 ; CMOV32R1-NEXT: mov.d $f0, $f14
216 ; CMOV32R1-NEXT: c.olt.d $f12, $f0
215 ; CMOV32R1-NEXT: c.olt.d $f12, $f14
217216 ; CMOV32R1-NEXT: jr $ra
218217 ; CMOV32R1-NEXT: movt.d $f0, $f12, $fcc0
219218 ;
220219 ; CMOV32R2-LABEL: tst_select_fcmp_olt_double:
221220 ; CMOV32R2: # %bb.0: # %entry
222221 ; CMOV32R2-NEXT: mov.d $f0, $f14
223 ; CMOV32R2-NEXT: c.olt.d $f12, $f0
222 ; CMOV32R2-NEXT: c.olt.d $f12, $f14
224223 ; CMOV32R2-NEXT: jr $ra
225224 ; CMOV32R2-NEXT: movt.d $f0, $f12, $fcc0
226225 ;
234233 ;
235234 ; M3-LABEL: tst_select_fcmp_olt_double:
236235 ; M3: # %bb.0: # %entry
236 ; M3-NEXT: c.olt.d $f12, $f13
237 ; M3-NEXT: bc1t .LBB2_2
237238 ; M3-NEXT: mov.d $f0, $f12
238 ; M3-NEXT: c.olt.d $f0, $f13
239 ; M3-NEXT: bc1t .LBB2_2
240 ; M3-NEXT: nop
241239 ; M3-NEXT: # %bb.1: # %entry
242240 ; M3-NEXT: mov.d $f0, $f13
243241 ; M3-NEXT: .LBB2_2: # %entry
247245 ; CMOV64-LABEL: tst_select_fcmp_olt_double:
248246 ; CMOV64: # %bb.0: # %entry
249247 ; CMOV64-NEXT: mov.d $f0, $f13
250 ; CMOV64-NEXT: c.olt.d $f12, $f0
248 ; CMOV64-NEXT: c.olt.d $f12, $f13
251249 ; CMOV64-NEXT: jr $ra
252250 ; CMOV64-NEXT: movt.d $f0, $f12, $fcc0
253251 ;
262260 ; MM32R3-LABEL: tst_select_fcmp_olt_double:
263261 ; MM32R3: # %bb.0: # %entry
264262 ; MM32R3-NEXT: mov.d $f0, $f14
265 ; MM32R3-NEXT: c.olt.d $f12, $f0
263 ; MM32R3-NEXT: c.olt.d $f12, $f14
266264 ; MM32R3-NEXT: jr $ra
267265 ; MM32R3-NEXT: movt.d $f0, $f12, $fcc0
268266 ;
282280 define double @tst_select_fcmp_ole_double(double %x, double %y) {
283281 ; M2-LABEL: tst_select_fcmp_ole_double:
284282 ; M2: # %bb.0: # %entry
283 ; M2-NEXT: c.ole.d $f12, $f14
284 ; M2-NEXT: bc1t $BB3_2
285285 ; M2-NEXT: mov.d $f0, $f12
286 ; M2-NEXT: c.ole.d $f0, $f14
287 ; M2-NEXT: bc1t $BB3_2
288 ; M2-NEXT: nop
289286 ; M2-NEXT: # %bb.1: # %entry
290287 ; M2-NEXT: mov.d $f0, $f14
291288 ; M2-NEXT: $BB3_2: # %entry
295292 ; CMOV32R1-LABEL: tst_select_fcmp_ole_double:
296293 ; CMOV32R1: # %bb.0: # %entry
297294 ; CMOV32R1-NEXT: mov.d $f0, $f14
298 ; CMOV32R1-NEXT: c.ole.d $f12, $f0
295 ; CMOV32R1-NEXT: c.ole.d $f12, $f14
299296 ; CMOV32R1-NEXT: jr $ra
300297 ; CMOV32R1-NEXT: movt.d $f0, $f12, $fcc0
301298 ;
302299 ; CMOV32R2-LABEL: tst_select_fcmp_ole_double:
303300 ; CMOV32R2: # %bb.0: # %entry
304301 ; CMOV32R2-NEXT: mov.d $f0, $f14
305 ; CMOV32R2-NEXT: c.ole.d $f12, $f0
302 ; CMOV32R2-NEXT: c.ole.d $f12, $f14
306303 ; CMOV32R2-NEXT: jr $ra
307304 ; CMOV32R2-NEXT: movt.d $f0, $f12, $fcc0
308305 ;
316313 ;
317314 ; M3-LABEL: tst_select_fcmp_ole_double:
318315 ; M3: # %bb.0: # %entry
316 ; M3-NEXT: c.ole.d $f12, $f13
317 ; M3-NEXT: bc1t .LBB3_2
319318 ; M3-NEXT: mov.d $f0, $f12
320 ; M3-NEXT: c.ole.d $f0, $f13
321 ; M3-NEXT: bc1t .LBB3_2
322 ; M3-NEXT: nop
323319 ; M3-NEXT: # %bb.1: # %entry
324320 ; M3-NEXT: mov.d $f0, $f13
325321 ; M3-NEXT: .LBB3_2: # %entry
329325 ; CMOV64-LABEL: tst_select_fcmp_ole_double:
330326 ; CMOV64: # %bb.0: # %entry
331327 ; CMOV64-NEXT: mov.d $f0, $f13
332 ; CMOV64-NEXT: c.ole.d $f12, $f0
328 ; CMOV64-NEXT: c.ole.d $f12, $f13
333329 ; CMOV64-NEXT: jr $ra
334330 ; CMOV64-NEXT: movt.d $f0, $f12, $fcc0
335331 ;
344340 ; MM32R3-LABEL: tst_select_fcmp_ole_double:
345341 ; MM32R3: # %bb.0: # %entry
346342 ; MM32R3-NEXT: mov.d $f0, $f14
347 ; MM32R3-NEXT: c.ole.d $f12, $f0
343 ; MM32R3-NEXT: c.ole.d $f12, $f14
348344 ; MM32R3-NEXT: jr $ra
349345 ; MM32R3-NEXT: movt.d $f0, $f12, $fcc0
350346 ;
364360 define double @tst_select_fcmp_ogt_double(double %x, double %y) {
365361 ; M2-LABEL: tst_select_fcmp_ogt_double:
366362 ; M2: # %bb.0: # %entry
363 ; M2-NEXT: c.ule.d $f12, $f14
364 ; M2-NEXT: bc1f $BB4_2
367365 ; M2-NEXT: mov.d $f0, $f12
368 ; M2-NEXT: c.ule.d $f0, $f14
369 ; M2-NEXT: bc1f $BB4_2
370 ; M2-NEXT: nop
371366 ; M2-NEXT: # %bb.1: # %entry
372367 ; M2-NEXT: mov.d $f0, $f14
373368 ; M2-NEXT: $BB4_2: # %entry
377372 ; CMOV32R1-LABEL: tst_select_fcmp_ogt_double:
378373 ; CMOV32R1: # %bb.0: # %entry
379374 ; CMOV32R1-NEXT: mov.d $f0, $f14
380 ; CMOV32R1-NEXT: c.ule.d $f12, $f0
375 ; CMOV32R1-NEXT: c.ule.d $f12, $f14
381376 ; CMOV32R1-NEXT: jr $ra
382377 ; CMOV32R1-NEXT: movf.d $f0, $f12, $fcc0
383378 ;
384379 ; CMOV32R2-LABEL: tst_select_fcmp_ogt_double:
385380 ; CMOV32R2: # %bb.0: # %entry
386381 ; CMOV32R2-NEXT: mov.d $f0, $f14
387 ; CMOV32R2-NEXT: c.ule.d $f12, $f0
382 ; CMOV32R2-NEXT: c.ule.d $f12, $f14
388383 ; CMOV32R2-NEXT: jr $ra
389384 ; CMOV32R2-NEXT: movf.d $f0, $f12, $fcc0
390385 ;
398393 ;
399394 ; M3-LABEL: tst_select_fcmp_ogt_double:
400395 ; M3: # %bb.0: # %entry
396 ; M3-NEXT: c.ule.d $f12, $f13
397 ; M3-NEXT: bc1f .LBB4_2
401398 ; M3-NEXT: mov.d $f0, $f12
402 ; M3-NEXT: c.ule.d $f0, $f13
403 ; M3-NEXT: bc1f .LBB4_2
404 ; M3-NEXT: nop
405399 ; M3-NEXT: # %bb.1: # %entry
406400 ; M3-NEXT: mov.d $f0, $f13
407401 ; M3-NEXT: .LBB4_2: # %entry
411405 ; CMOV64-LABEL: tst_select_fcmp_ogt_double:
412406 ; CMOV64: # %bb.0: # %entry
413407 ; CMOV64-NEXT: mov.d $f0, $f13
414 ; CMOV64-NEXT: c.ule.d $f12, $f0
408 ; CMOV64-NEXT: c.ule.d $f12, $f13
415409 ; CMOV64-NEXT: jr $ra
416410 ; CMOV64-NEXT: movf.d $f0, $f12, $fcc0
417411 ;
426420 ; MM32R3-LABEL: tst_select_fcmp_ogt_double:
427421 ; MM32R3: # %bb.0: # %entry
428422 ; MM32R3-NEXT: mov.d $f0, $f14
429 ; MM32R3-NEXT: c.ule.d $f12, $f0
423 ; MM32R3-NEXT: c.ule.d $f12, $f14
430424 ; MM32R3-NEXT: jr $ra
431425 ; MM32R3-NEXT: movf.d $f0, $f12, $fcc0
432426 ;
446440 define double @tst_select_fcmp_oge_double(double %x, double %y) {
447441 ; M2-LABEL: tst_select_fcmp_oge_double:
448442 ; M2: # %bb.0: # %entry
443 ; M2-NEXT: c.ult.d $f12, $f14
444 ; M2-NEXT: bc1f $BB5_2
449445 ; M2-NEXT: mov.d $f0, $f12
450 ; M2-NEXT: c.ult.d $f0, $f14
451 ; M2-NEXT: bc1f $BB5_2
452 ; M2-NEXT: nop
453446 ; M2-NEXT: # %bb.1: # %entry
454447 ; M2-NEXT: mov.d $f0, $f14
455448 ; M2-NEXT: $BB5_2: # %entry
459452 ; CMOV32R1-LABEL: tst_select_fcmp_oge_double:
460453 ; CMOV32R1: # %bb.0: # %entry
461454 ; CMOV32R1-NEXT: mov.d $f0, $f14
462 ; CMOV32R1-NEXT: c.ult.d $f12, $f0
455 ; CMOV32R1-NEXT: c.ult.d $f12, $f14
463456 ; CMOV32R1-NEXT: jr $ra
464457 ; CMOV32R1-NEXT: movf.d $f0, $f12, $fcc0
465458 ;
466459 ; CMOV32R2-LABEL: tst_select_fcmp_oge_double:
467460 ; CMOV32R2: # %bb.0: # %entry
468461 ; CMOV32R2-NEXT: mov.d $f0, $f14
469 ; CMOV32R2-NEXT: c.ult.d $f12, $f0
462 ; CMOV32R2-NEXT: c.ult.d $f12, $f14
470463 ; CMOV32R2-NEXT: jr $ra
471464 ; CMOV32R2-NEXT: movf.d $f0, $f12, $fcc0
472465 ;
480473 ;
481474 ; M3-LABEL: tst_select_fcmp_oge_double:
482475 ; M3: # %bb.0: # %entry
476 ; M3-NEXT: c.ult.d $f12, $f13
477 ; M3-NEXT: bc1f .LBB5_2
483478 ; M3-NEXT: mov.d $f0, $f12
484 ; M3-NEXT: c.ult.d $f0, $f13
485 ; M3-NEXT: bc1f .LBB5_2
486 ; M3-NEXT: nop
487479 ; M3-NEXT: # %bb.1: # %entry
488480 ; M3-NEXT: mov.d $f0, $f13
489481 ; M3-NEXT: .LBB5_2: # %entry
493485 ; CMOV64-LABEL: tst_select_fcmp_oge_double:
494486 ; CMOV64: # %bb.0: # %entry
495487 ; CMOV64-NEXT: mov.d $f0, $f13
496 ; CMOV64-NEXT: c.ult.d $f12, $f0
488 ; CMOV64-NEXT: c.ult.d $f12, $f13
497489 ; CMOV64-NEXT: jr $ra
498490 ; CMOV64-NEXT: movf.d $f0, $f12, $fcc0
499491 ;
508500 ; MM32R3-LABEL: tst_select_fcmp_oge_double:
509501 ; MM32R3: # %bb.0: # %entry
510502 ; MM32R3-NEXT: mov.d $f0, $f14
511 ; MM32R3-NEXT: c.ult.d $f12, $f0
503 ; MM32R3-NEXT: c.ult.d $f12, $f14
512504 ; MM32R3-NEXT: jr $ra
513505 ; MM32R3-NEXT: movf.d $f0, $f12, $fcc0
514506 ;
528520 define double @tst_select_fcmp_oeq_double(double %x, double %y) {
529521 ; M2-LABEL: tst_select_fcmp_oeq_double:
530522 ; M2: # %bb.0: # %entry
523 ; M2-NEXT: c.eq.d $f12, $f14
524 ; M2-NEXT: bc1t $BB6_2
531525 ; M2-NEXT: mov.d $f0, $f12
532 ; M2-NEXT: c.eq.d $f0, $f14
533 ; M2-NEXT: bc1t $BB6_2
534 ; M2-NEXT: nop
535526 ; M2-NEXT: # %bb.1: # %entry
536527 ; M2-NEXT: mov.d $f0, $f14
537528 ; M2-NEXT: $BB6_2: # %entry
541532 ; CMOV32R1-LABEL: tst_select_fcmp_oeq_double:
542533 ; CMOV32R1: # %bb.0: # %entry
543534 ; CMOV32R1-NEXT: mov.d $f0, $f14
544 ; CMOV32R1-NEXT: c.eq.d $f12, $f0
535 ; CMOV32R1-NEXT: c.eq.d $f12, $f14
545536 ; CMOV32R1-NEXT: jr $ra
546537 ; CMOV32R1-NEXT: movt.d $f0, $f12, $fcc0
547538 ;
548539 ; CMOV32R2-LABEL: tst_select_fcmp_oeq_double:
549540 ; CMOV32R2: # %bb.0: # %entry
550541 ; CMOV32R2-NEXT: mov.d $f0, $f14
551 ; CMOV32R2-NEXT: c.eq.d $f12, $f0
542 ; CMOV32R2-NEXT: c.eq.d $f12, $f14
552543 ; CMOV32R2-NEXT: jr $ra
553544 ; CMOV32R2-NEXT: movt.d $f0, $f12, $fcc0
554545 ;
562553 ;
563554 ; M3-LABEL: tst_select_fcmp_oeq_double:
564555 ; M3: # %bb.0: # %entry
556 ; M3-NEXT: c.eq.d $f12, $f13
557 ; M3-NEXT: bc1t .LBB6_2
565558 ; M3-NEXT: mov.d $f0, $f12
566 ; M3-NEXT: c.eq.d $f0, $f13
567 ; M3-NEXT: bc1t .LBB6_2
568 ; M3-NEXT: nop
569559 ; M3-NEXT: # %bb.1: # %entry
570560 ; M3-NEXT: mov.d $f0, $f13
571561 ; M3-NEXT: .LBB6_2: # %entry
575565 ; CMOV64-LABEL: tst_select_fcmp_oeq_double:
576566 ; CMOV64: # %bb.0: # %entry
577567 ; CMOV64-NEXT: mov.d $f0, $f13
578 ; CMOV64-NEXT: c.eq.d $f12, $f0
568 ; CMOV64-NEXT: c.eq.d $f12, $f13
579569 ; CMOV64-NEXT: jr $ra
580570 ; CMOV64-NEXT: movt.d $f0, $f12, $fcc0
581571 ;
590580 ; MM32R3-LABEL: tst_select_fcmp_oeq_double:
591581 ; MM32R3: # %bb.0: # %entry
592582 ; MM32R3-NEXT: mov.d $f0, $f14
593 ; MM32R3-NEXT: c.eq.d $f12, $f0
583 ; MM32R3-NEXT: c.eq.d $f12, $f14
594584 ; MM32R3-NEXT: jr $ra
595585 ; MM32R3-NEXT: movt.d $f0, $f12, $fcc0
596586 ;
610600 define double @tst_select_fcmp_one_double(double %x, double %y) {
611601 ; M2-LABEL: tst_select_fcmp_one_double:
612602 ; M2: # %bb.0: # %entry
603 ; M2-NEXT: c.ueq.d $f12, $f14
604 ; M2-NEXT: bc1f $BB7_2
613605 ; M2-NEXT: mov.d $f0, $f12
614 ; M2-NEXT: c.ueq.d $f0, $f14
615 ; M2-NEXT: bc1f $BB7_2
616 ; M2-NEXT: nop
617606 ; M2-NEXT: # %bb.1: # %entry
618607 ; M2-NEXT: mov.d $f0, $f14
619608 ; M2-NEXT: $BB7_2: # %entry
623612 ; CMOV32R1-LABEL: tst_select_fcmp_one_double:
624613 ; CMOV32R1: # %bb.0: # %entry
625614 ; CMOV32R1-NEXT: mov.d $f0, $f14
626 ; CMOV32R1-NEXT: c.ueq.d $f12, $f0
615 ; CMOV32R1-NEXT: c.ueq.d $f12, $f14
627616 ; CMOV32R1-NEXT: jr $ra
628617 ; CMOV32R1-NEXT: movf.d $f0, $f12, $fcc0
629618 ;
630619 ; CMOV32R2-LABEL: tst_select_fcmp_one_double:
631620 ; CMOV32R2: # %bb.0: # %entry
632621 ; CMOV32R2-NEXT: mov.d $f0, $f14
633 ; CMOV32R2-NEXT: c.ueq.d $f12, $f0
622 ; CMOV32R2-NEXT: c.ueq.d $f12, $f14
634623 ; CMOV32R2-NEXT: jr $ra
635624 ; CMOV32R2-NEXT: movf.d $f0, $f12, $fcc0
636625 ;
645634 ;
646635 ; M3-LABEL: tst_select_fcmp_one_double:
647636 ; M3: # %bb.0: # %entry
637 ; M3-NEXT: c.ueq.d $f12, $f13
638 ; M3-NEXT: bc1f .LBB7_2
648639 ; M3-NEXT: mov.d $f0, $f12
649 ; M3-NEXT: c.ueq.d $f0, $f13
650 ; M3-NEXT: bc1f .LBB7_2
651 ; M3-NEXT: nop
652640 ; M3-NEXT: # %bb.1: # %entry
653641 ; M3-NEXT: mov.d $f0, $f13
654642 ; M3-NEXT: .LBB7_2: # %entry
658646 ; CMOV64-LABEL: tst_select_fcmp_one_double:
659647 ; CMOV64: # %bb.0: # %entry
660648 ; CMOV64-NEXT: mov.d $f0, $f13
661 ; CMOV64-NEXT: c.ueq.d $f12, $f0
649 ; CMOV64-NEXT: c.ueq.d $f12, $f13
662650 ; CMOV64-NEXT: jr $ra
663651 ; CMOV64-NEXT: movf.d $f0, $f12, $fcc0
664652 ;
674662 ; MM32R3-LABEL: tst_select_fcmp_one_double:
675663 ; MM32R3: # %bb.0: # %entry
676664 ; MM32R3-NEXT: mov.d $f0, $f14
677 ; MM32R3-NEXT: c.ueq.d $f12, $f0
665 ; MM32R3-NEXT: c.ueq.d $f12, $f14
678666 ; MM32R3-NEXT: jr $ra
679667 ; MM32R3-NEXT: movf.d $f0, $f12, $fcc0
680668 ;
187187 define float @tst_select_fcmp_olt_float(float %x, float %y) {
188188 ; M2-LABEL: tst_select_fcmp_olt_float:
189189 ; M2: # %bb.0: # %entry
190 ; M2-NEXT: c.olt.s $f12, $f14
191 ; M2-NEXT: bc1t $BB2_2
190192 ; M2-NEXT: mov.s $f0, $f12
191 ; M2-NEXT: c.olt.s $f0, $f14
192 ; M2-NEXT: bc1t $BB2_2
193 ; M2-NEXT: nop
194193 ; M2-NEXT: # %bb.1: # %entry
195194 ; M2-NEXT: mov.s $f0, $f14
196195 ; M2-NEXT: $BB2_2: # %entry
200199 ; CMOV32R1-LABEL: tst_select_fcmp_olt_float:
201200 ; CMOV32R1: # %bb.0: # %entry
202201 ; CMOV32R1-NEXT: mov.s $f0, $f14
203 ; CMOV32R1-NEXT: c.olt.s $f12, $f0
202 ; CMOV32R1-NEXT: c.olt.s $f12, $f14
204203 ; CMOV32R1-NEXT: jr $ra
205204 ; CMOV32R1-NEXT: movt.s $f0, $f12, $fcc0
206205 ;
207206 ; CMOV32R2-LABEL: tst_select_fcmp_olt_float:
208207 ; CMOV32R2: # %bb.0: # %entry
209208 ; CMOV32R2-NEXT: mov.s $f0, $f14
210 ; CMOV32R2-NEXT: c.olt.s $f12, $f0
209 ; CMOV32R2-NEXT: c.olt.s $f12, $f14
211210 ; CMOV32R2-NEXT: jr $ra
212211 ; CMOV32R2-NEXT: movt.s $f0, $f12, $fcc0
213212 ;
219218 ;
220219 ; M3-LABEL: tst_select_fcmp_olt_float:
221220 ; M3: # %bb.0: # %entry
221 ; M3-NEXT: c.olt.s $f12, $f13
222 ; M3-NEXT: bc1t .LBB2_2
222223 ; M3-NEXT: mov.s $f0, $f12
223 ; M3-NEXT: c.olt.s $f0, $f13
224 ; M3-NEXT: bc1t .LBB2_2
225 ; M3-NEXT: nop
226224 ; M3-NEXT: # %bb.1: # %entry
227225 ; M3-NEXT: mov.s $f0, $f13
228226 ; M3-NEXT: .LBB2_2: # %entry
232230 ; CMOV64-LABEL: tst_select_fcmp_olt_float:
233231 ; CMOV64: # %bb.0: # %entry
234232 ; CMOV64-NEXT: mov.s $f0, $f13
235 ; CMOV64-NEXT: c.olt.s $f12, $f0
233 ; CMOV64-NEXT: c.olt.s $f12, $f13
236234 ; CMOV64-NEXT: jr $ra
237235 ; CMOV64-NEXT: movt.s $f0, $f12, $fcc0
238236 ;
245243 ; MM32R3-LABEL: tst_select_fcmp_olt_float:
246244 ; MM32R3: # %bb.0: # %entry
247245 ; MM32R3-NEXT: mov.s $f0, $f14
248 ; MM32R3-NEXT: c.olt.s $f12, $f0
246 ; MM32R3-NEXT: c.olt.s $f12, $f14
249247 ; MM32R3-NEXT: jr $ra
250248 ; MM32R3-NEXT: movt.s $f0, $f12, $fcc0
251249 ;
263261 define float @tst_select_fcmp_ole_float(float %x, float %y) {
264262 ; M2-LABEL: tst_select_fcmp_ole_float:
265263 ; M2: # %bb.0: # %entry
264 ; M2-NEXT: c.ole.s $f12, $f14
265 ; M2-NEXT: bc1t $BB3_2
266266 ; M2-NEXT: mov.s $f0, $f12
267 ; M2-NEXT: c.ole.s $f0, $f14
268 ; M2-NEXT: bc1t $BB3_2
269 ; M2-NEXT: nop
270267 ; M2-NEXT: # %bb.1: # %entry
271268 ; M2-NEXT: mov.s $f0, $f14
272269 ; M2-NEXT: $BB3_2: # %entry
276273 ; CMOV32R1-LABEL: tst_select_fcmp_ole_float:
277274 ; CMOV32R1: # %bb.0: # %entry
278275 ; CMOV32R1-NEXT: mov.s $f0, $f14
279 ; CMOV32R1-NEXT: c.ole.s $f12, $f0
276 ; CMOV32R1-NEXT: c.ole.s $f12, $f14
280277 ; CMOV32R1-NEXT: jr $ra
281278 ; CMOV32R1-NEXT: movt.s $f0, $f12, $fcc0
282279 ;
283280 ; CMOV32R2-LABEL: tst_select_fcmp_ole_float:
284281 ; CMOV32R2: # %bb.0: # %entry
285282 ; CMOV32R2-NEXT: mov.s $f0, $f14
286 ; CMOV32R2-NEXT: c.ole.s $f12, $f0
283 ; CMOV32R2-NEXT: c.ole.s $f12, $f14
287284 ; CMOV32R2-NEXT: jr $ra
288285 ; CMOV32R2-NEXT: movt.s $f0, $f12, $fcc0
289286 ;
295292 ;
296293 ; M3-LABEL: tst_select_fcmp_ole_float:
297294 ; M3: # %bb.0: # %entry
295 ; M3-NEXT: c.ole.s $f12, $f13
296 ; M3-NEXT: bc1t .LBB3_2
298297 ; M3-NEXT: mov.s $f0, $f12
299 ; M3-NEXT: c.ole.s $f0, $f13
300 ; M3-NEXT: bc1t .LBB3_2
301 ; M3-NEXT: nop
302298 ; M3-NEXT: # %bb.1: # %entry
303299 ; M3-NEXT: mov.s $f0, $f13
304300 ; M3-NEXT: .LBB3_2: # %entry
308304 ; CMOV64-LABEL: tst_select_fcmp_ole_float:
309305 ; CMOV64: # %bb.0: # %entry
310306 ; CMOV64-NEXT: mov.s $f0, $f13
311 ; CMOV64-NEXT: c.ole.s $f12, $f0
307 ; CMOV64-NEXT: c.ole.s $f12, $f13
312308 ; CMOV64-NEXT: jr $ra
313309 ; CMOV64-NEXT: movt.s $f0, $f12, $fcc0
314310 ;
321317 ; MM32R3-LABEL: tst_select_fcmp_ole_float:
322318 ; MM32R3: # %bb.0: # %entry
323319 ; MM32R3-NEXT: mov.s $f0, $f14
324 ; MM32R3-NEXT: c.ole.s $f12, $f0
320 ; MM32R3-NEXT: c.ole.s $f12, $f14
325321 ; MM32R3-NEXT: jr $ra
326322 ; MM32R3-NEXT: movt.s $f0, $f12, $fcc0
327323 ;
339335 define float @tst_select_fcmp_ogt_float(float %x, float %y) {
340336 ; M2-LABEL: tst_select_fcmp_ogt_float:
341337 ; M2: # %bb.0: # %entry
338 ; M2-NEXT: c.ule.s $f12, $f14
339 ; M2-NEXT: bc1f $BB4_2
342340 ; M2-NEXT: mov.s $f0, $f12
343 ; M2-NEXT: c.ule.s $f0, $f14
344 ; M2-NEXT: bc1f $BB4_2
345 ; M2-NEXT: nop
346341 ; M2-NEXT: # %bb.1: # %entry
347342 ; M2-NEXT: mov.s $f0, $f14
348343 ; M2-NEXT: $BB4_2: # %entry
352347 ; CMOV32R1-LABEL: tst_select_fcmp_ogt_float:
353348 ; CMOV32R1: # %bb.0: # %entry
354349 ; CMOV32R1-NEXT: mov.s $f0, $f14
355 ; CMOV32R1-NEXT: c.ule.s $f12, $f0
350 ; CMOV32R1-NEXT: c.ule.s $f12, $f14
356351 ; CMOV32R1-NEXT: jr $ra
357352 ; CMOV32R1-NEXT: movf.s $f0, $f12, $fcc0
358353 ;
359354 ; CMOV32R2-LABEL: tst_select_fcmp_ogt_float:
360355 ; CMOV32R2: # %bb.0: # %entry
361356 ; CMOV32R2-NEXT: mov.s $f0, $f14
362 ; CMOV32R2-NEXT: c.ule.s $f12, $f0
357 ; CMOV32R2-NEXT: c.ule.s $f12, $f14
363358 ; CMOV32R2-NEXT: jr $ra
364359 ; CMOV32R2-NEXT: movf.s $f0, $f12, $fcc0
365360 ;
371366 ;
372367 ; M3-LABEL: tst_select_fcmp_ogt_float:
373368 ; M3: # %bb.0: # %entry
369 ; M3-NEXT: c.ule.s $f12, $f13
370 ; M3-NEXT: bc1f .LBB4_2
374371 ; M3-NEXT: mov.s $f0, $f12
375 ; M3-NEXT: c.ule.s $f0, $f13
376 ; M3-NEXT: bc1f .LBB4_2
377 ; M3-NEXT: nop
378372 ; M3-NEXT: # %bb.1: # %entry
379373 ; M3-NEXT: mov.s $f0, $f13
380374 ; M3-NEXT: .LBB4_2: # %entry
384378 ; CMOV64-LABEL: tst_select_fcmp_ogt_float:
385379 ; CMOV64: # %bb.0: # %entry
386380 ; CMOV64-NEXT: mov.s $f0, $f13
387 ; CMOV64-NEXT: c.ule.s $f12, $f0
381 ; CMOV64-NEXT: c.ule.s $f12, $f13
388382 ; CMOV64-NEXT: jr $ra
389383 ; CMOV64-NEXT: movf.s $f0, $f12, $fcc0
390384 ;
397391 ; MM32R3-LABEL: tst_select_fcmp_ogt_float:
398392 ; MM32R3: # %bb.0: # %entry
399393 ; MM32R3-NEXT: mov.s $f0, $f14
400 ; MM32R3-NEXT: c.ule.s $f12, $f0
394 ; MM32R3-NEXT: c.ule.s $f12, $f14
401395 ; MM32R3-NEXT: jr $ra
402396 ; MM32R3-NEXT: movf.s $f0, $f12, $fcc0
403397 ;
415409 define float @tst_select_fcmp_oge_float(float %x, float %y) {
416410 ; M2-LABEL: tst_select_fcmp_oge_float:
417411 ; M2: # %bb.0: # %entry
412 ; M2-NEXT: c.ult.s $f12, $f14
413 ; M2-NEXT: bc1f $BB5_2
418414 ; M2-NEXT: mov.s $f0, $f12
419 ; M2-NEXT: c.ult.s $f0, $f14
420 ; M2-NEXT: bc1f $BB5_2
421 ; M2-NEXT: nop
422415 ; M2-NEXT: # %bb.1: # %entry
423416 ; M2-NEXT: mov.s $f0, $f14
424417 ; M2-NEXT: $BB5_2: # %entry
428421 ; CMOV32R1-LABEL: tst_select_fcmp_oge_float:
429422 ; CMOV32R1: # %bb.0: # %entry
430423 ; CMOV32R1-NEXT: mov.s $f0, $f14
431 ; CMOV32R1-NEXT: c.ult.s $f12, $f0
424 ; CMOV32R1-NEXT: c.ult.s $f12, $f14
432425 ; CMOV32R1-NEXT: jr $ra
433426 ; CMOV32R1-NEXT: movf.s $f0, $f12, $fcc0
434427 ;
435428 ; CMOV32R2-LABEL: tst_select_fcmp_oge_float:
436429 ; CMOV32R2: # %bb.0: # %entry
437430 ; CMOV32R2-NEXT: mov.s $f0, $f14
438 ; CMOV32R2-NEXT: c.ult.s $f12, $f0
431 ; CMOV32R2-NEXT: c.ult.s $f12, $f14
439432 ; CMOV32R2-NEXT: jr $ra
440433 ; CMOV32R2-NEXT: movf.s $f0, $f12, $fcc0
441434 ;
447440 ;
448441 ; M3-LABEL: tst_select_fcmp_oge_float:
449442 ; M3: # %bb.0: # %entry
443 ; M3-NEXT: c.ult.s $f12, $f13
444 ; M3-NEXT: bc1f .LBB5_2
450445 ; M3-NEXT: mov.s $f0, $f12
451 ; M3-NEXT: c.ult.s $f0, $f13
452 ; M3-NEXT: bc1f .LBB5_2
453 ; M3-NEXT: nop
454446 ; M3-NEXT: # %bb.1: # %entry
455447 ; M3-NEXT: mov.s $f0, $f13
456448 ; M3-NEXT: .LBB5_2: # %entry
460452 ; CMOV64-LABEL: tst_select_fcmp_oge_float:
461453 ; CMOV64: # %bb.0: # %entry
462454 ; CMOV64-NEXT: mov.s $f0, $f13
463 ; CMOV64-NEXT: c.ult.s $f12, $f0
455 ; CMOV64-NEXT: c.ult.s $f12, $f13
464456 ; CMOV64-NEXT: jr $ra
465457 ; CMOV64-NEXT: movf.s $f0, $f12, $fcc0
466458 ;
473465 ; MM32R3-LABEL: tst_select_fcmp_oge_float:
474466 ; MM32R3: # %bb.0: # %entry
475467 ; MM32R3-NEXT: mov.s $f0, $f14
476 ; MM32R3-NEXT: c.ult.s $f12, $f0
468 ; MM32R3-NEXT: c.ult.s $f12, $f14
477469 ; MM32R3-NEXT: jr $ra
478470 ; MM32R3-NEXT: movf.s $f0, $f12, $fcc0
479471 ;
491483 define float @tst_select_fcmp_oeq_float(float %x, float %y) {
492484 ; M2-LABEL: tst_select_fcmp_oeq_float:
493485 ; M2: # %bb.0: # %entry
486 ; M2-NEXT: c.eq.s $f12, $f14
487 ; M2-NEXT: bc1t $BB6_2
494488 ; M2-NEXT: mov.s $f0, $f12
495 ; M2-NEXT: c.eq.s $f0, $f14
496 ; M2-NEXT: bc1t $BB6_2
497 ; M2-NEXT: nop
498489 ; M2-NEXT: # %bb.1: # %entry
499490 ; M2-NEXT: mov.s $f0, $f14
500491 ; M2-NEXT: $BB6_2: # %entry
504495 ; CMOV32R1-LABEL: tst_select_fcmp_oeq_float:
505496 ; CMOV32R1: # %bb.0: # %entry
506497 ; CMOV32R1-NEXT: mov.s $f0, $f14
507 ; CMOV32R1-NEXT: c.eq.s $f12, $f0
498 ; CMOV32R1-NEXT: c.eq.s $f12, $f14
508499 ; CMOV32R1-NEXT: jr $ra
509500 ; CMOV32R1-NEXT: movt.s $f0, $f12, $fcc0
510501 ;
511502 ; CMOV32R2-LABEL: tst_select_fcmp_oeq_float:
512503 ; CMOV32R2: # %bb.0: # %entry
513504 ; CMOV32R2-NEXT: mov.s $f0, $f14
514 ; CMOV32R2-NEXT: c.eq.s $f12, $f0
505 ; CMOV32R2-NEXT: c.eq.s $f12, $f14
515506 ; CMOV32R2-NEXT: jr $ra
516507 ; CMOV32R2-NEXT: movt.s $f0, $f12, $fcc0
517508 ;
523514 ;
524515 ; M3-LABEL: tst_select_fcmp_oeq_float:
525516 ; M3: # %bb.0: # %entry
517 ; M3-NEXT: c.eq.s $f12, $f13
518 ; M3-NEXT: bc1t .LBB6_2
526519 ; M3-NEXT: mov.s $f0, $f12
527 ; M3-NEXT: c.eq.s $f0, $f13
528 ; M3-NEXT: bc1t .LBB6_2
529 ; M3-NEXT: nop
530520 ; M3-NEXT: # %bb.1: # %entry
531521 ; M3-NEXT: mov.s $f0, $f13
532522 ; M3-NEXT: .LBB6_2: # %entry
536526 ; CMOV64-LABEL: tst_select_fcmp_oeq_float:
537527 ; CMOV64: # %bb.0: # %entry
538528 ; CMOV64-NEXT: mov.s $f0, $f13
539 ; CMOV64-NEXT: c.eq.s $f12, $f0
529 ; CMOV64-NEXT: c.eq.s $f12, $f13
540530 ; CMOV64-NEXT: jr $ra
541531 ; CMOV64-NEXT: movt.s $f0, $f12, $fcc0
542532 ;
549539 ; MM32R3-LABEL: tst_select_fcmp_oeq_float:
550540 ; MM32R3: # %bb.0: # %entry
551541 ; MM32R3-NEXT: mov.s $f0, $f14
552 ; MM32R3-NEXT: c.eq.s $f12, $f0
542 ; MM32R3-NEXT: c.eq.s $f12, $f14
553543 ; MM32R3-NEXT: jr $ra
554544 ; MM32R3-NEXT: movt.s $f0, $f12, $fcc0
555545 ;
567557 define float @tst_select_fcmp_one_float(float %x, float %y) {
568558 ; M2-LABEL: tst_select_fcmp_one_float:
569559 ; M2: # %bb.0: # %entry
560 ; M2-NEXT: c.ueq.s $f12, $f14
561 ; M2-NEXT: bc1f $BB7_2
570562 ; M2-NEXT: mov.s $f0, $f12
571 ; M2-NEXT: c.ueq.s $f0, $f14
572 ; M2-NEXT: bc1f $BB7_2
573 ; M2-NEXT: nop
574563 ; M2-NEXT: # %bb.1: # %entry
575564 ; M2-NEXT: mov.s $f0, $f14
576565 ; M2-NEXT: $BB7_2: # %entry
580569 ; CMOV32R1-LABEL: tst_select_fcmp_one_float:
581570 ; CMOV32R1: # %bb.0: # %entry
582571 ; CMOV32R1-NEXT: mov.s $f0, $f14
583 ; CMOV32R1-NEXT: c.ueq.s $f12, $f0
572 ; CMOV32R1-NEXT: c.ueq.s $f12, $f14
584573 ; CMOV32R1-NEXT: jr $ra
585574 ; CMOV32R1-NEXT: movf.s $f0, $f12, $fcc0
586575 ;
587576 ; CMOV32R2-LABEL: tst_select_fcmp_one_float:
588577 ; CMOV32R2: # %bb.0: # %entry
589578 ; CMOV32R2-NEXT: mov.s $f0, $f14
590 ; CMOV32R2-NEXT: c.ueq.s $f12, $f0
579 ; CMOV32R2-NEXT: c.ueq.s $f12, $f14
591580 ; CMOV32R2-NEXT: jr $ra
592581 ; CMOV32R2-NEXT: movf.s $f0, $f12, $fcc0
593582 ;
602591 ;
603592 ; M3-LABEL: tst_select_fcmp_one_float:
604593 ; M3: # %bb.0: # %entry
594 ; M3-NEXT: c.ueq.s $f12, $f13
595 ; M3-NEXT: bc1f .LBB7_2
605596 ; M3-NEXT: mov.s $f0, $f12
606 ; M3-NEXT: c.ueq.s $f0, $f13
607 ; M3-NEXT: bc1f .LBB7_2
608 ; M3-NEXT: nop
609597 ; M3-NEXT: # %bb.1: # %entry
610598 ; M3-NEXT: mov.s $f0, $f13
611599 ; M3-NEXT: .LBB7_2: # %entry
615603 ; CMOV64-LABEL: tst_select_fcmp_one_float:
616604 ; CMOV64: # %bb.0: # %entry
617605 ; CMOV64-NEXT: mov.s $f0, $f13
618 ; CMOV64-NEXT: c.ueq.s $f12, $f0
606 ; CMOV64-NEXT: c.ueq.s $f12, $f13
619607 ; CMOV64-NEXT: jr $ra
620608 ; CMOV64-NEXT: movf.s $f0, $f12, $fcc0
621609 ;
631619 ; MM32R3-LABEL: tst_select_fcmp_one_float:
632620 ; MM32R3: # %bb.0: # %entry
633621 ; MM32R3-NEXT: mov.s $f0, $f14
634 ; MM32R3-NEXT: c.ueq.s $f12, $f0
622 ; MM32R3-NEXT: c.ueq.s $f12, $f14
635623 ; MM32R3-NEXT: jr $ra
636624 ; MM32R3-NEXT: movf.s $f0, $f12, $fcc0
637625 ;
856856 ; MMR3-NEXT: sw $5, 32($sp) # 4-byte Folded Spill
857857 ; MMR3-NEXT: move $1, $4
858858 ; MMR3-NEXT: lw $16, 76($sp)
859 ; MMR3-NEXT: sllv $2, $1, $16
859 ; MMR3-NEXT: sllv $2, $4, $16
860860 ; MMR3-NEXT: not16 $4, $16
861861 ; MMR3-NEXT: sw $4, 24($sp) # 4-byte Folded Spill
862862 ; MMR3-NEXT: srl16 $3, $5, 1
944944 ; MMR6-NEXT: .cfi_offset 16, -8
945945 ; MMR6-NEXT: move $11, $4
946946 ; MMR6-NEXT: lw $3, 44($sp)
947 ; MMR6-NEXT: sllv $1, $11, $3
947 ; MMR6-NEXT: sllv $1, $4, $3
948948 ; MMR6-NEXT: not16 $2, $3
949949 ; MMR6-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
950950 ; MMR6-NEXT: srl16 $16, $5, 1
162162 ; MMR3: subu16 $5, $[[T19]], $[[T20]]
163163
164164 ; MMR6: move $[[T0:[0-9]+]], $7
165 ; MMR6: sw $[[T0]], 8($sp)
165 ; MMR6: sw $7, 8($sp)
166166 ; MMR6: move $[[T1:[0-9]+]], $5
167167 ; MMR6: sw $4, 12($sp)
168168 ; MMR6: lw $[[T2:[0-9]+]], 48($sp)
192192 ; CHECK-NEXT: move $4, $7
193193 ; CHECK-NEXT: sw $5, 52($sp)
194194 ; CHECK-NEXT: sw $6, 56($sp)
195 ; CHECK-NEXT: sw $4, 60($sp)
195 ; CHECK-NEXT: sw $7, 60($sp)
196196 ; CHECK-NEXT: lw $1, 80($sp)
197197 ; CHECK-NEXT: lb $2, 52($sp)
198198 ; CHECK-NEXT: addiu $3, $zero, 4
1919 ; CHECK-NEXT: .cfi_def_cfa_offset 48
2020 ; CHECK-NEXT: .cfi_offset lr, 16
2121 ; CHECK-NEXT: .cfi_offset r30, -16
22 ; CHECK-NEXT: ld 12, 0(3)
2223 ; CHECK-NEXT: std 30, 32(1)
2324 ; CHECK-NEXT: mr 30, 3
24 ; CHECK-NEXT: ld 12, 0(30)
2525 ; CHECK-NEXT: std 2, 24(1)
2626 ; CHECK-NEXT: mtctr 12
2727 ; CHECK-NEXT: bctrl
1313 ret double %r
1414
1515 ; CHECK: @foo3
16 ; CHECK: xsnmsubadp [[REG:[0-9]+]], {{[0-9]+}}, [[REG]]
16 ; CHECK: fmr [[REG:[0-9]+]], [[REG2:[0-9]+]]
17 ; CHECK: xsnmsubadp [[REG]], {{[0-9]+}}, [[REG2]]
1718 ; CHECK: xsmaddmdp
1819 ; CHECK: xsmaddadp
1920 }
1515 ret i32 %e.0
1616 ; CHECK: @foo
1717 ; CHECK: mr [[NEWREG:[0-9]+]], 3
18 ; CHECK: mr [[REG1:[0-9]+]], 4
1819 ; CHECK: mtvsrd [[NEWREG2:[0-9]+]], 4
19 ; CHECK: mffprd [[REG1:[0-9]+]], [[NEWREG2]]
2020 ; CHECK: add {{[0-9]+}}, [[NEWREG]], [[REG1]]
2121 ; CHECK: mffprd [[REG2:[0-9]+]], [[NEWREG2]]
2222 ; CHECK: add {{[0-9]+}}, [[REG2]], [[NEWREG]]
1919 define linkonce_odr void @ZN6snappyDecompressor_(%"class.snappy::SnappyDecompressor"* %this, %"class.snappy::SnappyIOVecWriter"* %writer) {
2020 ; CHECK-LABEL: ZN6snappyDecompressor_:
2121 ; CHECK: # %bb.0: # %entry
22 ; CHECK: addis 3, 2, _ZN6snappy8internalL8wordmaskE@toc@ha
23 ; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l
22 ; CHECK: addis 23, 2, _ZN6snappy8internalL8wordmaskE@toc@ha
23 ; CHECK-DAG: addi 25, 23, _ZN6snappy8internalL8wordmaskE@toc@l
2424 ; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha
2525 ; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l
2626 ; CHECK: b .LBB0_2
22
33 define i64 @testOptimizeLiAddToAddi(i64 %a) {
44 ; CHECK-LABEL: testOptimizeLiAddToAddi:
5 ; CHECK: addi 3, 30, 2444
5 ; CHECK: addi 3, 3, 2444
66 ; CHECK: bl callv
77 ; CHECK: addi 3, 30, 234
88 ; CHECK: bl call
2424 ;CHECK-LABEL: straight_test:
2525 ; test1 may have been merged with entry
2626 ;CHECK: mr [[TAGREG:[0-9]+]], 3
27 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
27 ;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1
2828 ;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
2929 ;CHECK-NEXT: # %test2
3030 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
147147 ; HARD-NEXT: std %o0, [%sp+96]
148148 ; HARD-NEXT: st %o1, [%sp+92]
149149 ; HARD-NEXT: mov %i0, %o2
150 ; HARD-NEXT: mov %o0, %o3
150 ; HARD-NEXT: mov %i1, %o3
151151 ; HARD-NEXT: mov %o1, %o4
152 ; HARD-NEXT: mov %o0, %o5
152 ; HARD-NEXT: mov %i1, %o5
153153 ; HARD-NEXT: call floatarg
154154 ; HARD: std %f0, [%i4]
155155 ; SOFT: st %i0, [%sp+104]
234234
235235 ; CHECK-LABEL: test_load_add_i32
236236 ; CHECK: membar
237 ; CHECK: add [[V:%[gilo][0-7]]], %o1, [[U:%[gilo][0-7]]]
238 ; CHECK: cas [%o0], [[V]], [[U]]
237 ; CHECK: mov [[U:%[gilo][0-7]]], [[V:%[gilo][0-7]]]
238 ; CHECK: add [[U:%[gilo][0-7]]], %o1, [[V2:%[gilo][0-7]]]
239 ; CHECK: cas [%o0], [[V]], [[V2]]
239240 ; CHECK: membar
240241 define zeroext i32 @test_load_add_i32(i32* %p, i32 zeroext %v) {
241242 entry:
4545 ; CHECK-LABEL: f5:
4646 ; CHECK-DAG: vlr %v[[A1:[0-5]]], %v24
4747 ; CHECK-DAG: vlr %v[[A2:[0-5]]], %v26
48 ; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v[[A1]], 1
49 ; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v[[A2]], 1
50 ; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v[[A1]], 2
51 ; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2
52 ; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3
53 ; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3
48 ; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v24, 1
49 ; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v26, 1
50 ; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v24, 2
51 ; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v26, 2
52 ; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v24, 3
53 ; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v26, 3
5454 ; CHECK-DAG: sebr %f[[A1]], %f[[A2]]
5555 ; CHECK-DAG: sebr %f[[B1]], %f[[B2]]
5656 ; CHECK-DAG: sebr %f[[C1]], %f[[C2]]
3636 ; CHECK: adds r3, r0, r1
3737 ; CHECK: push {r5}
3838 ; CHECK: pop {r1}
39 ; CHECK: adcs r1, r1
39 ; CHECK: adcs r1, r5
4040 ; CHECK: ldr r0, [sp, #12] @ 4-byte Reload
4141 ; CHECK: ldr r2, [sp, #8] @ 4-byte Reload
4242 ; CHECK: adds r2, r0, r2
4343 ; CHECK: push {r5}
4444 ; CHECK: pop {r4}
45 ; CHECK: adcs r4, r4
45 ; CHECK: adcs r4, r5
4646 ; CHECK: adds r0, r2, r5
4747 ; CHECK: push {r3}
4848 ; CHECK: pop {r0}
597597 define i32 @b_to_bx(i32 %value) {
598598 ; CHECK-LABEL: b_to_bx:
599599 ; DISABLE: push {r7, lr}
600 ; CHECK: cmp r1, #49
600 ; CHECK: cmp r0, #49
601601 ; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]]
602602 ; ENABLE: push {r7, lr}
603603
66 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
77 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
88 ; CHECK-NEXT: movl %ecx, %edx
9 ; CHECK-NEXT: imull %edx, %edx
9 ; CHECK-NEXT: imull %ecx, %edx
1010 ; CHECK-NEXT: imull %eax, %ecx
1111 ; CHECK-NEXT: imull %eax, %eax
1212 ; CHECK-NEXT: addl %edx, %eax
105105 ; CHECK-DAG: movl %edx, %[[r1:[^ ]*]]
106106 ; CHECK-DAG: movl 8(%ebp), %[[r2:[^ ]*]]
107107 ; CHECK-DAG: movl %[[r2]], 4(%esp)
108 ; CHECK-DAG: movl %[[r1]], (%esp)
108 ; CHECK-DAG: movl %edx, (%esp)
109109 ; CHECK: movl %esp, %[[reg:[^ ]*]]
110110 ; CHECK: pushl %[[reg]]
111111 ; CHECK: calll _addrof_i64
22212221 ; SSE2-NEXT: movq %rax, %xmm11
22222222 ; SSE2-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
22232223 ; SSE2-NEXT: movq %rbp, %rcx
2224 ; SSE2-NEXT: shrdq $1, %rcx, %rax
2224 ; SSE2-NEXT: shrdq $1, %rbp, %rax
22252225 ; SSE2-NEXT: pslldq {{.*#+}} xmm13 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm13[0,1,2]
22262226 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm15 = xmm15[0],xmm8[0]
22272227 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
1111 ; CHECK-NEXT: movq %rdx, %r14
1212 ; CHECK-NEXT: movq %rsi, %r15
1313 ; CHECK-NEXT: movq %rdi, %rbx
14 ; CHECK-NEXT: vmovaps (%rbx), %ymm0
14 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
1515 ; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
16 ; CHECK-NEXT: vmovaps (%r15), %ymm1
16 ; CHECK-NEXT: vmovaps (%rsi), %ymm1
1717 ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
18 ; CHECK-NEXT: vmovaps (%r14), %ymm2
18 ; CHECK-NEXT: vmovaps (%rdx), %ymm2
1919 ; CHECK-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill
2020 ; CHECK-NEXT: callq dummy
2121 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
88 ; CHECK-NEXT: pushq %rbx
99 ; CHECK-NEXT: subq $112, %rsp
1010 ; CHECK-NEXT: movq %rdi, %rbx
11 ; CHECK-NEXT: vmovups (%rbx), %zmm0
11 ; CHECK-NEXT: vmovups (%rdi), %zmm0
1212 ; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
1313 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
14 ; CHECK-NEXT: vmovaps %zmm1, (%rbx)
14 ; CHECK-NEXT: vmovaps %zmm1, (%rdi)
1515 ; CHECK-NEXT: callq _Print__512
1616 ; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload
1717 ; CHECK-NEXT: callq _Print__512
354354 ; KNL_X32-NEXT: movl %edi, (%esp)
355355 ; KNL_X32-NEXT: calll _test11
356356 ; KNL_X32-NEXT: movl %eax, %ebx
357 ; KNL_X32-NEXT: movzbl %bl, %eax
357 ; KNL_X32-NEXT: movzbl %al, %eax
358358 ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
359359 ; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
360360 ; KNL_X32-NEXT: movl %edi, (%esp)
147147 ; X64-NEXT: andq $-64, %rsp
148148 ; X64-NEXT: subq $128, %rsp
149149 ; X64-NEXT: vmovaps %zmm1, %zmm16
150 ; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0
150 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
151151 ; X64-NEXT: movq %rsp, %rdi
152152 ; X64-NEXT: callq _func_float16_ptr
153153 ; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0
649649 ; X32-NEXT: subl $24, %esp
650650 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
651651 ; X32-NEXT: vmovdqa %xmm0, %xmm4
652 ; X32-NEXT: vmovdqa %xmm4, %xmm1
652 ; X32-NEXT: vmovdqa %xmm0, %xmm1
653653 ; X32-NEXT: calll _test_argRet128Vector
654654 ; X32-NEXT: vmovdqa32 %xmm4, %xmm0 {%k1}
655655 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
667667 ; WIN64-NEXT: .seh_savexmm 8, 0
668668 ; WIN64-NEXT: .seh_endprologue
669669 ; WIN64-NEXT: vmovdqa %xmm0, %xmm8
670 ; WIN64-NEXT: vmovdqa %xmm8, %xmm1
670 ; WIN64-NEXT: vmovdqa %xmm0, %xmm1
671671 ; WIN64-NEXT: callq test_argRet128Vector
672672 ; WIN64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1}
673673 ; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
688688 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
689689 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
690690 ; LINUXOSX64-NEXT: vmovdqa %xmm0, %xmm8
691 ; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm1
691 ; LINUXOSX64-NEXT: vmovdqa %xmm0, %xmm1
692692 ; LINUXOSX64-NEXT: callq test_argRet128Vector
693693 ; LINUXOSX64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1}
694694 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
907907 ; X32-NEXT: subl $20, %esp
908908 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
909909 ; X32-NEXT: movl %edi, %esi
910 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
910 ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
911911 ; X32-NEXT: movl %edx, %ebx
912 ; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
912 ; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
913913 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
914914 ; X32-NEXT: movl %eax, %edx
915 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
915 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
916916 ; X32-NEXT: subl %ecx, %edx
917917 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
918918 ; X32-NEXT: movl %edi, %ebp
3737 ; SSE2-LABEL: test_negative_zero_1:
3838 ; SSE2: # %bb.0: # %entry
3939 ; SSE2-NEXT: movaps %xmm0, %xmm1
40 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
40 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
4141 ; SSE2-NEXT: xorps %xmm2, %xmm2
4242 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
4343 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
196196 ; SSE-NEXT: cvtss2sd %xmm2, %xmm4
197197 ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
198198 ; SSE-NEXT: movaps %xmm2, %xmm6
199 ; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1]
200 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
199 ; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1]
200 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3]
201201 ; SSE-NEXT: movaps {{.*#+}} xmm7
202202 ; SSE-NEXT: movaps %xmm0, %xmm2
203203 ; SSE-NEXT: andps %xmm7, %xmm2
212212 ; SSE-NEXT: orps %xmm0, %xmm4
213213 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0]
214214 ; SSE-NEXT: movaps %xmm1, %xmm0
215 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
215 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
216216 ; SSE-NEXT: andps %xmm7, %xmm0
217217 ; SSE-NEXT: cvtss2sd %xmm3, %xmm3
218218 ; SSE-NEXT: andps %xmm8, %xmm3
259259 ; SSE-NEXT: orps %xmm6, %xmm1
260260 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
261261 ; SSE-NEXT: movaps %xmm3, %xmm1
262 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
262 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
263263 ; SSE-NEXT: andps %xmm5, %xmm1
264264 ; SSE-NEXT: xorps %xmm6, %xmm6
265265 ; SSE-NEXT: cvtsd2ss %xmm2, %xmm6
187187 ; SSE-LABEL: combine_vec_shl_ext_shl0:
188188 ; SSE: # %bb.0:
189189 ; SSE-NEXT: movdqa %xmm0, %xmm1
190 ; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
190 ; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
191191 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
192192 ; SSE-NEXT: pslld $20, %xmm1
193193 ; SSE-NEXT: pslld $20, %xmm0
1313 ; SSE: # %bb.0:
1414 ; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1515 ; SSE-NEXT: movaps %xmm0, %xmm2
16 ; SSE-NEXT: addss %xmm2, %xmm2
16 ; SSE-NEXT: addss %xmm0, %xmm2
1717 ; SSE-NEXT: mulss %xmm1, %xmm2
1818 ; SSE-NEXT: mulss %xmm0, %xmm0
1919 ; SSE-NEXT: mulss %xmm1, %xmm1
5757 ; SSE-LABEL: complex_square_f64:
5858 ; SSE: # %bb.0:
5959 ; SSE-NEXT: movaps %xmm0, %xmm1
60 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
60 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
6161 ; SSE-NEXT: movaps %xmm0, %xmm2
62 ; SSE-NEXT: addsd %xmm2, %xmm2
62 ; SSE-NEXT: addsd %xmm0, %xmm2
6363 ; SSE-NEXT: mulsd %xmm1, %xmm2
6464 ; SSE-NEXT: mulsd %xmm0, %xmm0
6565 ; SSE-NEXT: mulsd %xmm1, %xmm1
160160 ; SSE-LABEL: complex_mul_f64:
161161 ; SSE: # %bb.0:
162162 ; SSE-NEXT: movaps %xmm0, %xmm2
163 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
163 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
164164 ; SSE-NEXT: movaps %xmm1, %xmm3
165 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1]
165 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
166166 ; SSE-NEXT: movaps %xmm3, %xmm4
167167 ; SSE-NEXT: mulsd %xmm0, %xmm4
168168 ; SSE-NEXT: mulsd %xmm1, %xmm0
311311 ; X64: # %bb.0: # %entry
312312 ; X64-NEXT: movq %rdi, %rcx
313313 ; X64-NEXT: movabsq $6120523590596543007, %rdx # imm = 0x54F077C718E7C21F
314 ; X64-NEXT: movq %rcx, %rax
314 ; X64-NEXT: movq %rdi, %rax
315315 ; X64-NEXT: mulq %rdx
316316 ; X64-NEXT: shrq $12, %rdx
317317 ; X64-NEXT: imulq $12345, %rdx, %rax # imm = 0x3039
1717
1818 ; CHECK-LABEL: @test_fmaxf
1919 ; SSE: movaps %xmm0, %xmm2
20 ; SSE-NEXT: cmpunordss %xmm2, %xmm2
20 ; SSE-NEXT: cmpunordss %xmm0, %xmm2
2121 ; SSE-NEXT: movaps %xmm2, %xmm3
2222 ; SSE-NEXT: andps %xmm1, %xmm3
2323 ; SSE-NEXT: maxss %xmm0, %xmm1
4646
4747 ; CHECK-LABEL: @test_fmax
4848 ; SSE: movapd %xmm0, %xmm2
49 ; SSE-NEXT: cmpunordsd %xmm2, %xmm2
49 ; SSE-NEXT: cmpunordsd %xmm0, %xmm2
5050 ; SSE-NEXT: movapd %xmm2, %xmm3
5151 ; SSE-NEXT: andpd %xmm1, %xmm3
5252 ; SSE-NEXT: maxsd %xmm0, %xmm1
7373
7474 ; CHECK-LABEL: @test_intrinsic_fmaxf
7575 ; SSE: movaps %xmm0, %xmm2
76 ; SSE-NEXT: cmpunordss %xmm2, %xmm2
76 ; SSE-NEXT: cmpunordss %xmm0, %xmm2
7777 ; SSE-NEXT: movaps %xmm2, %xmm3
7878 ; SSE-NEXT: andps %xmm1, %xmm3
7979 ; SSE-NEXT: maxss %xmm0, %xmm1
9494
9595 ; CHECK-LABEL: @test_intrinsic_fmax
9696 ; SSE: movapd %xmm0, %xmm2
97 ; SSE-NEXT: cmpunordsd %xmm2, %xmm2
97 ; SSE-NEXT: cmpunordsd %xmm0, %xmm2
9898 ; SSE-NEXT: movapd %xmm2, %xmm3
9999 ; SSE-NEXT: andpd %xmm1, %xmm3
100100 ; SSE-NEXT: maxsd %xmm0, %xmm1
2929 ; X64-LABEL: fast_fmuladd_opts:
3030 ; X64: # %bb.0:
3131 ; X64-NEXT: movaps %xmm0, %xmm1
32 ; X64-NEXT: addss %xmm1, %xmm1
32 ; X64-NEXT: addss %xmm0, %xmm1
3333 ; X64-NEXT: addss %xmm0, %xmm1
3434 ; X64-NEXT: movaps %xmm1, %xmm0
3535 ; X64-NEXT: retq
1717
1818 ; CHECK-LABEL: @test_fminf
1919 ; SSE: movaps %xmm0, %xmm2
20 ; SSE-NEXT: cmpunordss %xmm2, %xmm2
20 ; SSE-NEXT: cmpunordss %xmm0, %xmm2
2121 ; SSE-NEXT: movaps %xmm2, %xmm3
2222 ; SSE-NEXT: andps %xmm1, %xmm3
2323 ; SSE-NEXT: minss %xmm0, %xmm1
3939
4040 ; CHECK-LABEL: @test_fmin
4141 ; SSE: movapd %xmm0, %xmm2
42 ; SSE-NEXT: cmpunordsd %xmm2, %xmm2
42 ; SSE-NEXT: cmpunordsd %xmm0, %xmm2
4343 ; SSE-NEXT: movapd %xmm2, %xmm3
4444 ; SSE-NEXT: andpd %xmm1, %xmm3
4545 ; SSE-NEXT: minsd %xmm0, %xmm1
6666
6767 ; CHECK-LABEL: @test_intrinsic_fminf
6868 ; SSE: movaps %xmm0, %xmm2
69 ; SSE-NEXT: cmpunordss %xmm2, %xmm2
69 ; SSE-NEXT: cmpunordss %xmm0, %xmm2
7070 ; SSE-NEXT: movaps %xmm2, %xmm3
7171 ; SSE-NEXT: andps %xmm1, %xmm3
7272 ; SSE-NEXT: minss %xmm0, %xmm1
8686
8787 ; CHECK-LABEL: @test_intrinsic_fmin
8888 ; SSE: movapd %xmm0, %xmm2
89 ; SSE-NEXT: cmpunordsd %xmm2, %xmm2
89 ; SSE-NEXT: cmpunordsd %xmm0, %xmm2
9090 ; SSE-NEXT: movapd %xmm2, %xmm3
9191 ; SSE-NEXT: andpd %xmm1, %xmm3
9292 ; SSE-NEXT: minsd %xmm0, %xmm1
226226 ; CHECK: # %bb.0: # %entry
227227 ; CHECK-NEXT: subq $40, %rsp
228228 ; CHECK-NEXT: movaps %xmm0, %xmm1
229 ; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
229 ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
230230 ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
231231 ; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
232232 ; CHECK-NEXT: movq $0, (%rsp)
274274 ; CHECK: # %bb.0: # %entry
275275 ; CHECK-NEXT: subq $40, %rsp
276276 ; CHECK-NEXT: movaps %xmm0, %xmm1
277 ; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
277 ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
278278 ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
279279 ; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
280280 ; CHECK-NEXT: movq $0, (%rsp)
3131 ; CHECK-NEXT: movzbl %ah, %eax
3232 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx
3333 ; CHECK-NEXT: movzbl %bh, %edi
34 ; CHECK-NEXT: movq %r10, %r8
35 ; CHECK-NEXT: addq %r8, %rsi
34 ; CHECK-NEXT: addq %r10, %rsi
3635 ; CHECK-NEXT: addq %r11, %rdx
3736 ; CHECK-NEXT: addq %rsi, %rdx
3837 ; CHECK-NEXT: addq %rbp, %rcx
6766 ; GNUX32-NEXT: movzbl %ah, %eax
6867 ; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %ebx
6968 ; GNUX32-NEXT: movzbl %bh, %edi
70 ; GNUX32-NEXT: movq %r10, %r8
71 ; GNUX32-NEXT: addq %r8, %rsi
69 ; GNUX32-NEXT: addq %r10, %rsi
7270 ; GNUX32-NEXT: addq %r11, %rdx
7371 ; GNUX32-NEXT: addq %rsi, %rdx
7472 ; GNUX32-NEXT: addq %rbp, %rcx
895895 ; SSE-LABEL: not_a_hsub_2:
896896 ; SSE: # %bb.0:
897897 ; SSE-NEXT: movaps %xmm0, %xmm2
898 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
898 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
899899 ; SSE-NEXT: movaps %xmm0, %xmm3
900 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
900 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
901901 ; SSE-NEXT: subss %xmm3, %xmm2
902902 ; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
903903 ; SSE-NEXT: subss %xmm3, %xmm0
904904 ; SSE-NEXT: movaps %xmm1, %xmm3
905 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
905 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm1[2,3]
906906 ; SSE-NEXT: movaps %xmm1, %xmm4
907 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1]
907 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
908908 ; SSE-NEXT: subss %xmm4, %xmm3
909909 ; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
910910 ; SSE-NEXT: subss %xmm4, %xmm1
952952 ; SSE-LABEL: not_a_hsub_3:
953953 ; SSE: # %bb.0:
954954 ; SSE-NEXT: movaps %xmm1, %xmm2
955 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
955 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
956956 ; SSE-NEXT: subsd %xmm2, %xmm1
957957 ; SSE-NEXT: movaps %xmm0, %xmm2
958 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
958 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
959959 ; SSE-NEXT: subsd %xmm0, %xmm2
960960 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
961961 ; SSE-NEXT: movapd %xmm2, %xmm0
66 ; SSE2-LABEL: pr26491:
77 ; SSE2: # %bb.0:
88 ; SSE2-NEXT: movaps %xmm0, %xmm1
9 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
9 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
1010 ; SSE2-NEXT: addps %xmm0, %xmm1
1111 ; SSE2-NEXT: movaps %xmm1, %xmm0
12 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
12 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1313 ; SSE2-NEXT: addss %xmm1, %xmm0
1414 ; SSE2-NEXT: retq
1515 ;
1818 ; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1919 ; SSSE3-NEXT: addps %xmm0, %xmm1
2020 ; SSSE3-NEXT: movaps %xmm1, %xmm0
21 ; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
21 ; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2222 ; SSSE3-NEXT: addss %xmm1, %xmm0
2323 ; SSSE3-NEXT: retq
2424 ;
102102 ; SSE-LABEL: test5_undef:
103103 ; SSE: # %bb.0:
104104 ; SSE-NEXT: movaps %xmm0, %xmm1
105 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
105 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
106106 ; SSE-NEXT: addsd %xmm0, %xmm1
107107 ; SSE-NEXT: movapd %xmm1, %xmm0
108108 ; SSE-NEXT: retq
167167 ; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
168168 ; SSE-NEXT: addss %xmm0, %xmm1
169169 ; SSE-NEXT: movaps %xmm0, %xmm2
170 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
170 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
171171 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
172172 ; SSE-NEXT: addss %xmm2, %xmm0
173173 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
385385 ; CHECK-LIBCALL-NEXT: pushq %rbx
386386 ; CHECK-LIBCALL-NEXT: subq $48, %rsp
387387 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
388 ; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi
388 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
389389 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
390390 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
391391 ; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi
471471 ; CHECK-LIBCALL-NEXT: pushq %rbx
472472 ; CHECK-LIBCALL-NEXT: subq $16, %rsp
473473 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
474 ; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi
474 ; CHECK-LIBCALL-NEXT: movzwl 4(%rdi), %edi
475475 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
476476 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
477477 ; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi
656656 ; CHECK-I686-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) # 16-byte Spill
657657 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp
658658 ; CHECK-I686-NEXT: movaps %xmm0, %xmm1
659 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
659 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
660660 ; CHECK-I686-NEXT: movss %xmm1, (%esp)
661661 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee
662662 ; CHECK-I686-NEXT: movw %ax, %si
3939 ; X86-SSE42-LABEL: test_reduce_v2i64:
4040 ; X86-SSE42: ## %bb.0:
4141 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
42 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
42 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
4343 ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
4444 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
4545 ; X86-SSE42-NEXT: movd %xmm2, %eax
7979 ; X64-SSE42-LABEL: test_reduce_v2i64:
8080 ; X64-SSE42: ## %bb.0:
8181 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
82 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
82 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
8383 ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
8484 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
8585 ; X64-SSE42-NEXT: movq %xmm2, %rax
3939 ; X86-SSE42-LABEL: test_reduce_v2i64:
4040 ; X86-SSE42: ## %bb.0:
4141 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
42 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
42 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
4343 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
4444 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
4545 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
8080 ; X64-SSE42-LABEL: test_reduce_v2i64:
8181 ; X64-SSE42: ## %bb.0:
8282 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
83 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
83 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
8484 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
8585 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
8686 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
3939 ; X86-SSE42-LABEL: test_reduce_v2i64:
4040 ; X86-SSE42: ## %bb.0:
4141 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
42 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
42 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
4343 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
4444 ; X86-SSE42-NEXT: pxor %xmm3, %xmm0
4545 ; X86-SSE42-NEXT: pxor %xmm2, %xmm3
8585 ; X64-SSE42-LABEL: test_reduce_v2i64:
8686 ; X64-SSE42: ## %bb.0:
8787 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
88 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
88 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
8989 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
9090 ; X64-SSE42-NEXT: pxor %xmm3, %xmm0
9191 ; X64-SSE42-NEXT: pxor %xmm2, %xmm3
16921692 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1
16931693 ; X86-SSE2-NEXT: pmaxsw %xmm3, %xmm1
16941694 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm2
1695 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
1695 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2
16961696 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1
16971697 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2
16981698 ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm2
17701770 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
17711771 ; X64-SSE2-NEXT: pmaxsw %xmm3, %xmm1
17721772 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm2
1773 ; X64-SSE2-NEXT: pxor %xmm2, %xmm2
1773 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2
17741774 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1
17751775 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2
17761776 ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm2
3939 ; X86-SSE42-LABEL: test_reduce_v2i64:
4040 ; X86-SSE42: ## %bb.0:
4141 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
42 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
42 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
4343 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
4444 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm3
4545 ; X86-SSE42-NEXT: pxor %xmm0, %xmm3
8686 ; X64-SSE42-LABEL: test_reduce_v2i64:
8787 ; X64-SSE42: ## %bb.0:
8888 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
89 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
89 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
9090 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
9191 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm3
9292 ; X64-SSE42-NEXT: pxor %xmm0, %xmm3
443443 ; X86-SSE42: ## %bb.0:
444444 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
445445 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
446 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm4
446 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm4
447447 ; X86-SSE42-NEXT: pxor %xmm3, %xmm4
448448 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
449449 ; X86-SSE42-NEXT: pxor %xmm3, %xmm0
542542 ; X64-SSE42: ## %bb.0:
543543 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
544544 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
545 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm4
545 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm4
546546 ; X64-SSE42-NEXT: pxor %xmm3, %xmm4
547547 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
548548 ; X64-SSE42-NEXT: pxor %xmm3, %xmm0
15961596 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1
15971597 ; X86-SSE2-NEXT: pminsw %xmm3, %xmm1
15981598 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm2
1599 ; X86-SSE2-NEXT: pxor %xmm2, %xmm2
1599 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2
16001600 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1
16011601 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2
16021602 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm2
16651665 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
16661666 ; X64-SSE2-NEXT: pminsw %xmm3, %xmm1
16671667 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm2
1668 ; X64-SSE2-NEXT: pxor %xmm2, %xmm2
1668 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2
16691669 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1
16701670 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2
16711671 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm2
144144 ; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
145145 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
146146 ; X86-NOBMI-NEXT: movl %eax, %ecx
147 ; X86-NOBMI-NEXT: movl (%ecx,%ebx,8), %ebp
148 ; X86-NOBMI-NEXT: movl 4(%ecx,%ebx,8), %esi
147 ; X86-NOBMI-NEXT: movl (%eax,%ebx,8), %ebp
148 ; X86-NOBMI-NEXT: movl 4(%eax,%ebx,8), %esi
149149 ; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
150150 ; X86-NOBMI-NEXT: movl %ebp, %eax
151151 ; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
244244 ; X86-BMI-NEXT: movl %ecx, %edx
245245 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
246246 ; X86-BMI-NEXT: movl %eax, %esi
247 ; X86-BMI-NEXT: mulxl %esi, %eax, %ebp
247 ; X86-BMI-NEXT: mulxl %eax, %eax, %ebp
248248 ; X86-BMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
249249 ; X86-BMI-NEXT: movl %ebx, %edx
250250 ; X86-BMI-NEXT: mulxl %esi, %eax, %esi
160160 ; CHECK-NEXT: fstpt (%esp)
161161 ; CHECK-NEXT: calll _ceil
162162 ; CHECK-NEXT: fld %st(0)
163 ; CHECK-NEXT: fxch %st(1)
163164 ; CHECK-NEXT: ## InlineAsm Start
164165 ; CHECK-NEXT: fistpl %st(0)
165166 ; CHECK-NEXT: ## InlineAsm End
2323 call void @foo()
2424 ; CHECK-LABEL: bar:
2525 ; CHECK: callq foo
26 ; CHECK-NEXT: movl %eax, %r15d
26 ; CHECK-NEXT: movl %edi, %r15d
2727 call void asm sideeffect "movl $0, %r12d", "{r15}~{r12}"(i32 %X)
2828 ret void
2929 }
99 ; X86-NEXT: roll %ecx
1010 ; X86-NEXT: addl %eax, %eax
1111 ; X86-NEXT: movl %ecx, %edx
12 ; X86-NEXT: orl %edx, %edx
12 ; X86-NEXT: orl %ecx, %edx
1313 ; X86-NEXT: orl %ecx, %edx
1414 ; X86-NEXT: orl %eax, %edx
1515 ; X86-NEXT: orl %ecx, %edx
2323 ; X64-NEXT: rolq %rcx
2424 ; X64-NEXT: addq %rax, %rax
2525 ; X64-NEXT: movq %rcx, %rdx
26 ; X64-NEXT: orq %rdx, %rdx
26 ; X64-NEXT: orq %rcx, %rdx
2727 ; X64-NEXT: orq %rax, %rdx
2828 ; X64-NEXT: orq %rcx, %rdx
2929 ; X64-NEXT: sete (%rax)
2626
2727 ; X64-LABEL: print_framealloc_from_fp:
2828 ; X64: movq %rcx, %[[parent_fp:[a-z]+]]
29 ; X64: movl .Lalloc_func$frame_escape_0(%[[parent_fp]]), %edx
29 ; X64: movl .Lalloc_func$frame_escape_0(%rcx), %edx
3030 ; X64: leaq {{.*}}(%rip), %[[str:[a-z]+]]
3131 ; X64: movq %[[str]], %rcx
3232 ; X64: callq printf
77 ; CHECK: ## %bb.0: ## %entry
88 ; CHECK-NEXT: movl %esi, %edx
99 ; CHECK-NEXT: movl %edi, %eax
10 ; CHECK-NEXT: testl %edx, %edx
10 ; CHECK-NEXT: testl %esi, %esi
1111 ; CHECK-NEXT: je LBB0_1
1212 ; CHECK-NEXT: .p2align 4, 0x90
1313 ; CHECK-NEXT: LBB0_2: ## %while.body
5858 ; CHECK: ## %bb.0: ## %entry
5959 ; CHECK-NEXT: movq %rsi, %rdx
6060 ; CHECK-NEXT: movq %rdi, %rax
61 ; CHECK-NEXT: testq %rdx, %rdx
61 ; CHECK-NEXT: testq %rsi, %rsi
6262 ; CHECK-NEXT: je LBB2_1
6363 ; CHECK-NEXT: .p2align 4, 0x90
6464 ; CHECK-NEXT: LBB2_2: ## %while.body
579579 ; X32-NEXT: # =>This Inner Loop Header: Depth=1
580580 ; X32-NEXT: movl 8(%ebp), %ecx
581581 ; X32-NEXT: movl %ecx, %esi
582 ; X32-NEXT: movl (%esi,%ebx,8), %ecx
582 ; X32-NEXT: movl (%ecx,%ebx,8), %ecx
583583 ; X32-NEXT: movl 4(%esi,%ebx,8), %esi
584584 ; X32-NEXT: movl 12(%ebp), %edi
585585 ; X32-NEXT: addl (%edi,%ebx,8), %ecx
3737 ; X32-NEXT: movl %edx, %eax
3838 ; X32-NEXT: adcl %edi, %eax
3939 ; X32-NEXT: movl %edi, %ecx
40 ; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill
40 ; X32-NEXT: movl %edi, -204(%ebp) # 4-byte Spill
4141 ; X32-NEXT: movl %eax, -892(%ebp) # 4-byte Spill
4242 ; X32-NEXT: movl 12(%ebp), %eax
4343 ; X32-NEXT: movl 36(%eax), %eax
4646 ; X32-NEXT: mull %edx
4747 ; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill
4848 ; X32-NEXT: movl %eax, %edi
49 ; X32-NEXT: movl %edi, -304(%ebp) # 4-byte Spill
49 ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill
5050 ; X32-NEXT: addl %ecx, %edi
5151 ; X32-NEXT: movl %edi, -80(%ebp) # 4-byte Spill
5252 ; X32-NEXT: movl %edx, %eax
5757 ; X32-NEXT: xorl %ecx, %ecx
5858 ; X32-NEXT: mull %ecx
5959 ; X32-NEXT: movl %edx, %ecx
60 ; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill
60 ; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill
6161 ; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill
6262 ; X32-NEXT: movl %eax, %edx
6363 ; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload
7171 ; X32-NEXT: movl %eax, -656(%ebp) # 4-byte Spill
7272 ; X32-NEXT: leal (%ebx,%edi), %eax
7373 ; X32-NEXT: movl %edx, %edi
74 ; X32-NEXT: leal (%ecx,%edi), %edx
74 ; X32-NEXT: leal (%ecx,%edx), %edx
7575 ; X32-NEXT: adcl %eax, %edx
7676 ; X32-NEXT: movl %edx, -700(%ebp) # 4-byte Spill
7777 ; X32-NEXT: seto %al
122122 ; X32-NEXT: adcl %edi, %ebx
123123 ; X32-NEXT: movl %ebx, -424(%ebp) # 4-byte Spill
124124 ; X32-NEXT: movl %edi, %ebx
125 ; X32-NEXT: movl %ebx, -256(%ebp) # 4-byte Spill
125 ; X32-NEXT: movl %edi, -256(%ebp) # 4-byte Spill
126126 ; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload
127127 ; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill
128128 ; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload
147147 ; X32-NEXT: movzbl %bh, %eax
148148 ; X32-NEXT: adcl %edx, %eax
149149 ; X32-NEXT: movl %eax, %edi
150 ; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill
150 ; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill
151151 ; X32-NEXT: movl 12(%ebp), %eax
152152 ; X32-NEXT: movl 8(%eax), %eax
153153 ; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill
219219 ; X32-NEXT: mull %ecx
220220 ; X32-NEXT: movl %eax, -364(%ebp) # 4-byte Spill
221221 ; X32-NEXT: movl %edx, %ebx
222 ; X32-NEXT: movl %ebx, -396(%ebp) # 4-byte Spill
222 ; X32-NEXT: movl %edx, -396(%ebp) # 4-byte Spill
223223 ; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload
224224 ; X32-NEXT: movl %edx, %edi
225225 ; X32-NEXT: addl %eax, %edi
251251 ; X32-NEXT: mull %ebx
252252 ; X32-NEXT: movl %eax, %edi
253253 ; X32-NEXT: movl %edx, %esi
254 ; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill
254 ; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill
255255 ; X32-NEXT: movl 20(%ecx), %eax
256256 ; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill
257257 ; X32-NEXT: mull %ebx
302302 ; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload
303303 ; X32-NEXT: adcl %edx, %eax
304304 ; X32-NEXT: movl %edx, %ebx
305 ; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill
305 ; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill
306306 ; X32-NEXT: movl %eax, -780(%ebp) # 4-byte Spill
307307 ; X32-NEXT: movl -132(%ebp), %edx # 4-byte Reload
308308 ; X32-NEXT: movl %edx, %eax
392392 ; X32-NEXT: xorl %ecx, %ecx
393393 ; X32-NEXT: mull %ecx
394394 ; X32-NEXT: movl %eax, %ecx
395 ; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill
395 ; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill
396396 ; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill
397397 ; X32-NEXT: movl %ebx, %esi
398 ; X32-NEXT: movl %esi, %eax
398 ; X32-NEXT: movl %ebx, %eax
399399 ; X32-NEXT: addl %ecx, %eax
400400 ; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload
401401 ; X32-NEXT: movl %ebx, %ecx
424424 ; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload
425425 ; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill
426426 ; X32-NEXT: movl %esi, %edx
427 ; X32-NEXT: movl %edx, %eax
427 ; X32-NEXT: movl %esi, %eax
428428 ; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload
429429 ; X32-NEXT: addl %esi, %eax
430430 ; X32-NEXT: movl %ebx, %eax
532532 ; X32-NEXT: xorl %ecx, %ecx
533533 ; X32-NEXT: mull %ecx
534534 ; X32-NEXT: movl %eax, %ebx
535 ; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill
535 ; X32-NEXT: movl %eax, -336(%ebp) # 4-byte Spill
536536 ; X32-NEXT: movl %edx, %edi
537537 ; X32-NEXT: movl 52(%esi), %eax
538538 ; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill
558558 ; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload
559559 ; X32-NEXT: addl %eax, %ebx
560560 ; X32-NEXT: movl %edi, %edx
561 ; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill
561 ; X32-NEXT: movl %edi, -176(%ebp) # 4-byte Spill
562562 ; X32-NEXT: adcl -360(%ebp), %edi # 4-byte Folded Reload
563563 ; X32-NEXT: addl %ecx, %ebx
564564 ; X32-NEXT: movl %ebx, -472(%ebp) # 4-byte Spill
589589 ; X32-NEXT: xorl %ecx, %ecx
590590 ; X32-NEXT: mull %ecx
591591 ; X32-NEXT: movl %edx, %esi
592 ; X32-NEXT: movl %esi, -384(%ebp) # 4-byte Spill
592 ; X32-NEXT: movl %edx, -384(%ebp) # 4-byte Spill
593593 ; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload
594594 ; X32-NEXT: movl %edi, %ecx
595595 ; X32-NEXT: movl %eax, %edx
596 ; X32-NEXT: movl %edx, -480(%ebp) # 4-byte Spill
597 ; X32-NEXT: addl %edx, %ecx
596 ; X32-NEXT: movl %eax, -480(%ebp) # 4-byte Spill
597 ; X32-NEXT: addl %eax, %ecx
598598 ; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload
599599 ; X32-NEXT: movl %ebx, %eax
600600 ; X32-NEXT: adcl %esi, %eax
641641 ; X32-NEXT: movl %eax, %ecx
642642 ; X32-NEXT: addl %esi, %ecx
643643 ; X32-NEXT: movl %edx, %esi
644 ; X32-NEXT: movl %esi, -496(%ebp) # 4-byte Spill
645 ; X32-NEXT: movl %esi, %ecx
644 ; X32-NEXT: movl %edx, -496(%ebp) # 4-byte Spill
645 ; X32-NEXT: movl %edx, %ecx
646646 ; X32-NEXT: adcl %edi, %ecx
647647 ; X32-NEXT: movl %ecx, -992(%ebp) # 4-byte Spill
648648 ; X32-NEXT: movl %eax, %ecx
760760 ; X32-NEXT: xorl %edx, %edx
761761 ; X32-NEXT: mull %edx
762762 ; X32-NEXT: movl %eax, %esi
763 ; X32-NEXT: movl %esi, -484(%ebp) # 4-byte Spill
763 ; X32-NEXT: movl %eax, -484(%ebp) # 4-byte Spill
764764 ; X32-NEXT: movl %edx, -488(%ebp) # 4-byte Spill
765765 ; X32-NEXT: movl %ebx, %eax
766766 ; X32-NEXT: addl %esi, %eax
792792 ; X32-NEXT: adcl -60(%ebp), %ebx # 4-byte Folded Reload
793793 ; X32-NEXT: movl %ebx, -928(%ebp) # 4-byte Spill
794794 ; X32-NEXT: movl 8(%ebp), %ecx
795 ; X32-NEXT: movl %ecx, %eax
796 ; X32-NEXT: movl 84(%eax), %eax
795 ; X32-NEXT: movl 84(%ecx), %eax
797796 ; X32-NEXT: movl %eax, -544(%ebp) # 4-byte Spill
798797 ; X32-NEXT: xorl %ecx, %ecx
799798 ; X32-NEXT: mull %ecx
870869 ; X32-NEXT: xorl %edx, %edx
871870 ; X32-NEXT: mull %edx
872871 ; X32-NEXT: movl %eax, %esi
873 ; X32-NEXT: movl %esi, -556(%ebp) # 4-byte Spill
872 ; X32-NEXT: movl %eax, -556(%ebp) # 4-byte Spill
874873 ; X32-NEXT: movl %edx, -560(%ebp) # 4-byte Spill
875874 ; X32-NEXT: movl -524(%ebp), %eax # 4-byte Reload
876875 ; X32-NEXT: movl %eax, %ebx
881880 ; X32-NEXT: movl %ebx, -732(%ebp) # 4-byte Spill
882881 ; X32-NEXT: adcl %edi, %esi
883882 ; X32-NEXT: movl %esi, %edx
884 ; X32-NEXT: movl %edx, -728(%ebp) # 4-byte Spill
883 ; X32-NEXT: movl %esi, -728(%ebp) # 4-byte Spill
885884 ; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload
886885 ; X32-NEXT: movl %eax, -712(%ebp) # 4-byte Spill
887886 ; X32-NEXT: movl -668(%ebp), %ecx # 4-byte Reload
916915 ; X32-NEXT: mull %ebx
917916 ; X32-NEXT: movl %eax, -564(%ebp) # 4-byte Spill
918917 ; X32-NEXT: movl %edx, %ebx
919 ; X32-NEXT: movl %ebx, -568(%ebp) # 4-byte Spill
918 ; X32-NEXT: movl %edx, -568(%ebp) # 4-byte Spill
920919 ; X32-NEXT: movl -500(%ebp), %edx # 4-byte Reload
921920 ; X32-NEXT: movl %edx, %edi
922921 ; X32-NEXT: addl %eax, %edi
982981 ; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload
983982 ; X32-NEXT: adcl %edx, %eax
984983 ; X32-NEXT: movl %ecx, %edx
985 ; X32-NEXT: addl %edx, %ebx
984 ; X32-NEXT: addl %ecx, %ebx
986985 ; X32-NEXT: adcl %esi, %eax
987986 ; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill
988987 ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload
10371036 ; X32-NEXT: mull %ecx
10381037 ; X32-NEXT: movl %edx, %edi
10391038 ; X32-NEXT: movl %eax, %ebx
1040 ; X32-NEXT: movl %ebx, %ecx
1039 ; X32-NEXT: movl %eax, %ecx
10411040 ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload
10421041 ; X32-NEXT: addl %esi, %ecx
10431042 ; X32-NEXT: adcl $0, %edx
10511050 ; X32-NEXT: movzbl -16(%ebp), %ebx # 1-byte Folded Reload
10521051 ; X32-NEXT: adcl %edi, %ebx
10531052 ; X32-NEXT: movl %eax, %esi
1054 ; X32-NEXT: addl %esi, %edx
1053 ; X32-NEXT: addl %eax, %edx
10551054 ; X32-NEXT: adcl %ecx, %ebx
10561055 ; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload
10571056 ; X32-NEXT: addl -324(%ebp), %eax # 4-byte Folded Reload
11421141 ; X32-NEXT: movzbl %cl, %eax
11431142 ; X32-NEXT: adcl %esi, %eax
11441143 ; X32-NEXT: movl %edi, %esi
1145 ; X32-NEXT: addl %esi, %edx
1144 ; X32-NEXT: addl %edi, %edx
11461145 ; X32-NEXT: adcl %ebx, %eax
11471146 ; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill
11481147 ; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload
12221221 ; X32-NEXT: movzbl %bl, %eax
12231222 ; X32-NEXT: adcl %edx, %eax
12241223 ; X32-NEXT: movl %ecx, %edx
1225 ; X32-NEXT: addl %edx, %esi
1224 ; X32-NEXT: addl %ecx, %esi
12261225 ; X32-NEXT: adcl %edi, %eax
12271226 ; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill
12281227 ; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload
16961695 ; X32-NEXT: movl %esi, %eax
16971696 ; X32-NEXT: mull %ebx
16981697 ; X32-NEXT: movl %ebx, %esi
1699 ; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill
1698 ; X32-NEXT: movl %ebx, -48(%ebp) # 4-byte Spill
17001699 ; X32-NEXT: movl %edx, %ebx
17011700 ; X32-NEXT: addl %ecx, %eax
17021701 ; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill
44784477 ; X32-NEXT: movl %esi, %eax
44794478 ; X32-NEXT: mull %ebx
44804479 ; X32-NEXT: movl %ebx, %esi
4481 ; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill
4480 ; X32-NEXT: movl %ebx, -140(%ebp) # 4-byte Spill
44824481 ; X32-NEXT: movl %edx, %ebx
44834482 ; X32-NEXT: addl %ecx, %eax
44844483 ; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill
51985197 ; X32-NEXT: addl %edi, %edx
51995198 ; X32-NEXT: movl 124(%ebx), %ebx
52005199 ; X32-NEXT: movl %ecx, %eax
5201 ; X32-NEXT: imull %eax, %ebx
5200 ; X32-NEXT: imull %ecx, %ebx
52025201 ; X32-NEXT: addl %edx, %ebx
52035202 ; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload
52045203 ; X32-NEXT: addl %ecx, -96(%ebp) # 4-byte Folded Spill
60726071 ; X32-NEXT: movl 108(%eax), %edx
60736072 ; X32-NEXT: movl %ebx, %eax
60746073 ; X32-NEXT: movl %edx, %ebx
6075 ; X32-NEXT: movl %ebx, -112(%ebp) # 4-byte Spill
6076 ; X32-NEXT: mull %ebx
6074 ; X32-NEXT: movl %edx, -112(%ebp) # 4-byte Spill
6075 ; X32-NEXT: mull %edx
60776076 ; X32-NEXT: movl %edx, %esi
60786077 ; X32-NEXT: addl %ecx, %eax
60796078 ; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill
61126111 ; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload
61136112 ; X32-NEXT: movl %ecx, %eax
61146113 ; X32-NEXT: movl %ebx, %esi
6115 ; X32-NEXT: mull %esi
6114 ; X32-NEXT: mull %ebx
61166115 ; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill
61176116 ; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill
61186117 ; X32-NEXT: movl -60(%ebp), %ebx # 4-byte Reload
67536752 ; X64-NEXT: adcq $0, %rbp
67546753 ; X64-NEXT: addq %rcx, %rbx
67556754 ; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
6756 ; X64-NEXT: movq %rcx, %r11
67576755 ; X64-NEXT: adcq %rdi, %rbp
67586756 ; X64-NEXT: setb %bl
67596757 ; X64-NEXT: movzbl %bl, %ebx
67636761 ; X64-NEXT: mulq %r8
67646762 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
67656763 ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
6766 ; X64-NEXT: movq %r11, %r12
6767 ; X64-NEXT: movq %r11, %r8
6764 ; X64-NEXT: movq %rcx, %r12
6765 ; X64-NEXT: movq %rcx, %r8
67686766 ; X64-NEXT: addq %rax, %r12
67696767 ; X64-NEXT: movq %rdi, %rax
67706768 ; X64-NEXT: movq %rdi, %r9
6771 ; X64-NEXT: movq %r9, (%rsp) # 8-byte Spill
6769 ; X64-NEXT: movq %rdi, (%rsp) # 8-byte Spill
67726770 ; X64-NEXT: adcq %rdx, %rax
67736771 ; X64-NEXT: addq %rbp, %r12
67746772 ; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
67976795 ; X64-NEXT: adcq %rdx, %rbx
67986796 ; X64-NEXT: movq 16(%rsi), %rax
67996797 ; X64-NEXT: movq %rsi, %r13
6800 ; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill
6798 ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
68016799 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
68026800 ; X64-NEXT: mulq %r11
68036801 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
68106808 ; X64-NEXT: adcq %rbx, %r11
68116809 ; X64-NEXT: movq %r8, %rax
68126810 ; X64-NEXT: movq %r8, %rbp
6813 ; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill
6811 ; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
68146812 ; X64-NEXT: addq %rdi, %rax
68156813 ; X64-NEXT: movq %r9, %rax
68166814 ; X64-NEXT: adcq %rcx, %rax
68236821 ; X64-NEXT: movq %rax, %rbx
68246822 ; X64-NEXT: addq %rdi, %rax
68256823 ; X64-NEXT: movq %rdi, %r9
6826 ; X64-NEXT: movq %rsi, %rax
6824 ; X64-NEXT: movq %rdx, %rax
68276825 ; X64-NEXT: adcq %rcx, %rax
68286826 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
68296827 ; X64-NEXT: movq 32(%r13), %rax
68396837 ; X64-NEXT: adcq %rdx, %rax
68406838 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
68416839 ; X64-NEXT: movq %rbp, %rax
6842 ; X64-NEXT: addq %r9, %rax
6840 ; X64-NEXT: addq %rdi, %rax
68436841 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
6844 ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
6842 ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
68456843 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
68466844 ; X64-NEXT: adcq %r15, %rax
68476845 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
68596857 ; X64-NEXT: addq %rsi, %r11
68606858 ; X64-NEXT: movq %rdx, %rbp
68616859 ; X64-NEXT: adcq $0, %rbp
6862 ; X64-NEXT: addq %rcx, %r11
6860 ; X64-NEXT: addq %rbx, %r11
68636861 ; X64-NEXT: adcq %rsi, %rbp
68646862 ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
68656863 ; X64-NEXT: setb %bl
68806878 ; X64-NEXT: adcq %rbx, %r10
68816879 ; X64-NEXT: movq %rcx, %rdx
68826880 ; X64-NEXT: movq %rcx, %r12
6883 ; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
6881 ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
68846882 ; X64-NEXT: addq %r9, %rdx
68856883 ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
68866884 ; X64-NEXT: movq %r11, %r8
6887 ; X64-NEXT: adcq %r8, %r15
6885 ; X64-NEXT: adcq %r11, %r15
68886886 ; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill
68896887 ; X64-NEXT: adcq %rax, %r14
68906888 ; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill
69806978 ; X64-NEXT: adcq %rdx, %r12
69816979 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
69826980 ; X64-NEXT: movq %rcx, %rax
6983 ; X64-NEXT: movq %r10, %rbp
6984 ; X64-NEXT: mulq %rbp
6981 ; X64-NEXT: mulq %r10
69856982 ; X64-NEXT: movq %rdx, %rsi
69866983 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
69876984 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
69886985 ; X64-NEXT: movq %rdi, %rax
6989 ; X64-NEXT: mulq %rbp
6986 ; X64-NEXT: mulq %r10
69906987 ; X64-NEXT: movq %rdx, %rbp
69916988 ; X64-NEXT: movq %rax, %rbx
69926989 ; X64-NEXT: addq %rsi, %rbx
70137010 ; X64-NEXT: adcq $0, %r15
70147011 ; X64-NEXT: adcq $0, %r12
70157012 ; X64-NEXT: movq %r10, %rbx
7016 ; X64-NEXT: movq %rbx, %rax
7013 ; X64-NEXT: movq %r10, %rax
70177014 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload
70187015 ; X64-NEXT: mulq %r11
70197016 ; X64-NEXT: movq %rdx, %rcx
70307027 ; X64-NEXT: movq %rbx, %rax
70317028 ; X64-NEXT: mulq %rcx
70327029 ; X64-NEXT: movq %rcx, %rbx
7033 ; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
7030 ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
70347031 ; X64-NEXT: movq %rdx, %rcx
70357032 ; X64-NEXT: movq %rax, %r8
70367033 ; X64-NEXT: addq %rbp, %r8
70617058 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
70627059 ; X64-NEXT: movq %rcx, %rax
70637060 ; X64-NEXT: movq %r11, %rsi
7064 ; X64-NEXT: mulq %rsi
7061 ; X64-NEXT: mulq %r11
70657062 ; X64-NEXT: movq %rdx, %r11
70667063 ; X64-NEXT: movq %rax, %r13
70677064 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload
71417138 ; X64-NEXT: adcq %rdx, %r10
71427139 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
71437140 ; X64-NEXT: movq %rcx, %rax
7144 ; X64-NEXT: movq %r11, %rbp
7145 ; X64-NEXT: mulq %rbp
7141 ; X64-NEXT: mulq %r11
71467142 ; X64-NEXT: movq %rdx, %rdi
71477143 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
71487144 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
71497145 ; X64-NEXT: movq %rsi, %rax
7150 ; X64-NEXT: mulq %rbp
7146 ; X64-NEXT: mulq %r11
71517147 ; X64-NEXT: movq %rdx, %rbp
71527148 ; X64-NEXT: movq %rax, %rbx
71537149 ; X64-NEXT: addq %rdi, %rbx
72777273 ; X64-NEXT: movq %rdx, %rsi
72787274 ; X64-NEXT: movq %rax, %r14
72797275 ; X64-NEXT: movq %r8, %rbp
7280 ; X64-NEXT: movq %rbp, %rax
7276 ; X64-NEXT: movq %r8, %rax
72817277 ; X64-NEXT: mulq %rcx
72827278 ; X64-NEXT: movq %rcx, %r11
72837279 ; X64-NEXT: movq %rdx, %rbx
73377333 ; X64-NEXT: adcq $0, %r9
73387334 ; X64-NEXT: adcq $0, %r10
73397335 ; X64-NEXT: movq %rbp, %rsi
7340 ; X64-NEXT: movq %rsi, %rax
7336 ; X64-NEXT: movq %rbp, %rax
73417337 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
73427338 ; X64-NEXT: mulq %rcx
73437339 ; X64-NEXT: movq %rdx, %r14
73947390 ; X64-NEXT: adcq $0, %r15
73957391 ; X64-NEXT: movq %rbp, %rax
73967392 ; X64-NEXT: movq %r8, %rdi
7397 ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
7398 ; X64-NEXT: mulq %rdi
7393 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
7394 ; X64-NEXT: mulq %r8
73997395 ; X64-NEXT: movq %rdx, %r9
74007396 ; X64-NEXT: movq %rax, %r8
74017397 ; X64-NEXT: addq %rbx, %r8
74787474 ; X64-NEXT: movq %rcx, %r14
74797475 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
74807476 ; X64-NEXT: movq %rcx, %rax
7481 ; X64-NEXT: movq %r10, %rdi
7482 ; X64-NEXT: mulq %rdi
7477 ; X64-NEXT: mulq %r10
74837478 ; X64-NEXT: movq %rdx, %r11
74847479 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
74857480 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
74867481 ; X64-NEXT: movq %rsi, %rax
7487 ; X64-NEXT: mulq %rdi
7482 ; X64-NEXT: mulq %r10
74887483 ; X64-NEXT: movq %rdx, %rdi
74897484 ; X64-NEXT: movq %rax, %rbx
74907485 ; X64-NEXT: addq %r11, %rbx
75127507 ; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
75137508 ; X64-NEXT: adcq $0, %r14
75147509 ; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill
7515 ; X64-NEXT: movq %r13, %rbx
7516 ; X64-NEXT: movq %rbx, %rax
7510 ; X64-NEXT: movq %r13, %rax
75177511 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
75187512 ; X64-NEXT: mulq %rcx
75197513 ; X64-NEXT: movq %rdx, %r8
75267520 ; X64-NEXT: movq %rax, %rcx
75277521 ; X64-NEXT: addq %r8, %rcx
75287522 ; X64-NEXT: adcq $0, %rsi
7529 ; X64-NEXT: movq %rbx, %rax
7523 ; X64-NEXT: movq %r13, %rax
75307524 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload
75317525 ; X64-NEXT: mulq %r13
75327526 ; X64-NEXT: movq %rdx, %rbx
75607554 ; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill
75617555 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload
75627556 ; X64-NEXT: movq %rbx, %rax
7563 ; X64-NEXT: movq %r10, %rsi
7564 ; X64-NEXT: mulq %rsi
7557 ; X64-NEXT: mulq %r10
75657558 ; X64-NEXT: movq %rdx, %rcx
75667559 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
75677560 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
75687561 ; X64-NEXT: movq %r8, %rax
7569 ; X64-NEXT: mulq %rsi
7562 ; X64-NEXT: mulq %r10
75707563 ; X64-NEXT: movq %rdx, %rsi
75717564 ; X64-NEXT: movq %rax, %rdi
75727565 ; X64-NEXT: addq %rcx, %rdi
76427635 ; X64-NEXT: movq %r9, %rax
76437636 ; X64-NEXT: mulq %rcx
76447637 ; X64-NEXT: movq %rcx, %r10
7645 ; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill
7638 ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
76467639 ; X64-NEXT: movq %rdx, %rcx
76477640 ; X64-NEXT: movq %rax, %rdi
76487641 ; X64-NEXT: addq %rsi, %rdi
76547647 ; X64-NEXT: movq %rax, %rbx
76557648 ; X64-NEXT: movq %rdx, %r14
76567649 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload
7657 ; X64-NEXT: addq %rbx, %r12
7650 ; X64-NEXT: addq %rax, %r12
76587651 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload
7659 ; X64-NEXT: adcq %r14, %r15
7652 ; X64-NEXT: adcq %rdx, %r15
76607653 ; X64-NEXT: addq %rdi, %r12
76617654 ; X64-NEXT: adcq %rcx, %r15
76627655 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
76637656 ; X64-NEXT: movq %rcx, %rax
76647657 ; X64-NEXT: movq %r11, %rsi
7665 ; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
7666 ; X64-NEXT: mulq %rsi
7658 ; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
7659 ; X64-NEXT: mulq %r11
76677660 ; X64-NEXT: movq %rdx, %r11
76687661 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
76697662 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload
77277720 ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
77287721 ; X64-NEXT: movq %rax, %r9
77297722 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
7730 ; X64-NEXT: addq %r9, %rbp
7723 ; X64-NEXT: addq %rax, %rbp
77317724 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
77327725 ; X64-NEXT: adcq %rdx, %rax
77337726 ; X64-NEXT: addq %rsi, %rbp
79057898 ; X64-NEXT: movq 88(%rsi), %rax
79067899 ; X64-NEXT: movq %rsi, %r9
79077900 ; X64-NEXT: movq %rax, %rsi
7908 ; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
7901 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
79097902 ; X64-NEXT: mulq %rcx
79107903 ; X64-NEXT: movq %rcx, %r11
79117904 ; X64-NEXT: movq %rdx, %rbp
79417934 ; X64-NEXT: adcq %r8, %r10
79427935 ; X64-NEXT: addq %rbx, %rsi
79437936 ; X64-NEXT: adcq %rbp, %r10
7944 ; X64-NEXT: movq %r9, %rdi
7945 ; X64-NEXT: movq 64(%rdi), %r13
7937 ; X64-NEXT: movq 64(%r9), %r13
79467938 ; X64-NEXT: movq %r13, %rax
79477939 ; X64-NEXT: mulq %r11
79487940 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
79497941 ; X64-NEXT: movq %rdx, %rcx
7950 ; X64-NEXT: movq 72(%rdi), %r9
7942 ; X64-NEXT: movq 72(%r9), %r9
79517943 ; X64-NEXT: movq %r9, %rax
79527944 ; X64-NEXT: mulq %r11
79537945 ; X64-NEXT: movq %rdx, %rbp
79757967 ; X64-NEXT: movq %rdx, %r11
79767968 ; X64-NEXT: movq %rax, %r15
79777969 ; X64-NEXT: movq %r12, %rcx
7978 ; X64-NEXT: addq %r15, %rcx
7979 ; X64-NEXT: adcq %r11, %r8
7970 ; X64-NEXT: addq %rax, %rcx
7971 ; X64-NEXT: adcq %rdx, %r8
79807972 ; X64-NEXT: addq %rbp, %rcx
79817973 ; X64-NEXT: adcq %rbx, %r8
79827974 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
80288020 ; X64-NEXT: setb %r10b
80298021 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
80308022 ; X64-NEXT: movq %rsi, %rax
8031 ; X64-NEXT: movq %r8, %rdi
8032 ; X64-NEXT: mulq %rdi
8023 ; X64-NEXT: mulq %r8
80338024 ; X64-NEXT: movq %rdx, %rcx
80348025 ; X64-NEXT: movq %rax, %r9
80358026 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
80368027 ; X64-NEXT: movq %rbp, %rax
8037 ; X64-NEXT: mulq %rdi
8038 ; X64-NEXT: movq %rdi, %r12
8028 ; X64-NEXT: mulq %r8
8029 ; X64-NEXT: movq %r8, %r12
80398030 ; X64-NEXT: movq %rdx, %rdi
80408031 ; X64-NEXT: movq %rax, %rbx
80418032 ; X64-NEXT: addq %rcx, %rbx
80748065 ; X64-NEXT: imulq %rcx, %rdi
80758066 ; X64-NEXT: movq %rcx, %rax
80768067 ; X64-NEXT: movq %r12, %rsi
8077 ; X64-NEXT: mulq %rsi
8068 ; X64-NEXT: mulq %r12
80788069 ; X64-NEXT: movq %rax, %r9
80798070 ; X64-NEXT: addq %rdi, %rdx
80808071 ; X64-NEXT: movq 104(%rbp), %r8
4343 ; X32-NEXT: movl %edi, %eax
4444 ; X32-NEXT: mull %ecx
4545 ; X32-NEXT: movl %ecx, %edi
46 ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
46 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
4747 ; X32-NEXT: movl %edx, %ecx
4848 ; X32-NEXT: addl %ebx, %eax
4949 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
6161 ; X32-NEXT: movl %ecx, %eax
6262 ; X32-NEXT: mull %edx
6363 ; X32-NEXT: movl %edx, %ebp
64 ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
64 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
6565 ; X32-NEXT: movl %eax, %esi
66 ; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
66 ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
6767 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
6868 ; X32-NEXT: xorl %edx, %edx
6969 ; X32-NEXT: mull %edx
126126 ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
127127 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
128128 ; X32-NEXT: movl %eax, %ecx
129 ; X32-NEXT: movl 8(%ecx), %ebx
129 ; X32-NEXT: movl 8(%eax), %ebx
130130 ; X32-NEXT: movl %esi, %eax
131131 ; X32-NEXT: movl %esi, %edi
132132 ; X32-NEXT: mull %ebx
155155 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
156156 ; X32-NEXT: adcl %eax, %esi
157157 ; X32-NEXT: movl %ebx, %edi
158 ; X32-NEXT: movl %edi, %eax
158 ; X32-NEXT: movl %ebx, %eax
159159 ; X32-NEXT: xorl %ecx, %ecx
160160 ; X32-NEXT: mull %ecx
161161 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
3030 ; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
3131 ; X32-NEXT: adcl %ecx, %ebx
3232 ; X32-NEXT: movl %ecx, %edi
33 ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
33 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
3434 ; X32-NEXT: setb %cl
3535 ; X32-NEXT: addl %eax, %ebx
3636 ; X32-NEXT: movzbl %cl, %ecx
5454 ; X32-NEXT: mull %ebx
5555 ; X32-NEXT: movl %eax, %ebp
5656 ; X32-NEXT: movl %edx, %edi
57 ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
57 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
5858 ; X32-NEXT: movl 4(%ecx), %eax
5959 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
6060 ; X32-NEXT: movl %ecx, %esi
9191 ; X32-NEXT: adcl %edi, %eax
9292 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
9393 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
94 ; X32-NEXT: movl %ecx, %eax
95 ; X32-NEXT: movl (%eax), %eax
94 ; X32-NEXT: movl (%ecx), %eax
9695 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
9796 ; X32-NEXT: xorl %ebp, %ebp
9897 ; X32-NEXT: mull %ebp
9998 ; X32-NEXT: movl %edx, %ebx
10099 ; X32-NEXT: movl %eax, %ecx
101 ; X32-NEXT: movl %ecx, %edx
100 ; X32-NEXT: movl %eax, %edx
102101 ; X32-NEXT: addl %esi, %edx
103102 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
104103 ; X32-NEXT: movl %ebx, %eax
112111 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
113112 ; X32-NEXT: movl %ecx, %edi
114113 ; X32-NEXT: movl %ecx, %ebp
115 ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
114 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
116115 ; X32-NEXT: addl %eax, %edi
117116 ; X32-NEXT: movl %ebx, %eax
118117 ; X32-NEXT: adcl %edx, %eax
142141 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
143142 ; X32-NEXT: adcl %ebx, %ecx
144143 ; X32-NEXT: movl %ebx, %esi
145 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
144 ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
146145 ; X32-NEXT: setb %bl
147146 ; X32-NEXT: addl %eax, %ecx
148147 ; X32-NEXT: movzbl %bl, %ebx
277276 ; X32-NEXT: adcl %ebx, %ecx
278277 ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
279278 ; X32-NEXT: movl %edi, %ebp
280 ; X32-NEXT: movl %ebp, %eax
279 ; X32-NEXT: movl %edi, %eax
281280 ; X32-NEXT: mull %esi
282281 ; X32-NEXT: movl %edx, %edi
283282 ; X32-NEXT: movl %eax, %ebx
432431 ; X32-NEXT: adcl %edi, %ecx
433432 ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
434433 ; X32-NEXT: movl %ebx, %edi
435 ; X32-NEXT: movl %edi, %eax
434 ; X32-NEXT: movl %ebx, %eax
436435 ; X32-NEXT: mull %esi
437436 ; X32-NEXT: movl %eax, %ebp
438437 ; X32-NEXT: addl %ecx, %ebp
898897 ; X32-NEXT: movl %ecx, %eax
899898 ; X32-NEXT: mull %esi
900899 ; X32-NEXT: movl %esi, %ecx
901 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
900 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
902901 ; X32-NEXT: movl %edx, %esi
903902 ; X32-NEXT: addl %ebx, %eax
904903 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
928927 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
929928 ; X32-NEXT: movl %ecx, %eax
930929 ; X32-NEXT: movl %ebx, %esi
931 ; X32-NEXT: mull %esi
930 ; X32-NEXT: mull %ebx
932931 ; X32-NEXT: movl %edx, %edi
933932 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
934933 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
10761075 ; X32-NEXT: addl %esi, %edx
10771076 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
10781077 ; X32-NEXT: movl %edi, %eax
1079 ; X32-NEXT: imull %eax, %esi
1078 ; X32-NEXT: imull %edi, %esi
10801079 ; X32-NEXT: addl %edx, %esi
10811080 ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
10821081 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
11761175 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
11771176 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
11781177 ; X32-NEXT: movl %esi, %ecx
1179 ; X32-NEXT: movl 40(%ecx), %ebx
1178 ; X32-NEXT: movl 40(%esi), %ebx
11801179 ; X32-NEXT: movl %ebx, %eax
11811180 ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
11821181 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
13731372 ; X32-NEXT: addl %edi, %edx
13741373 ; X32-NEXT: movl 60(%ebx), %ebx
13751374 ; X32-NEXT: movl %ecx, %eax
1376 ; X32-NEXT: imull %eax, %ebx
1375 ; X32-NEXT: imull %ecx, %ebx
13771376 ; X32-NEXT: addl %edx, %ebx
13781377 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
13791378 ; X32-NEXT: addl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
15451544 ; X64-NEXT: movq 8(%rsi), %rbp
15461545 ; X64-NEXT: movq %r15, %rax
15471546 ; X64-NEXT: movq %rdx, %rsi
1548 ; X64-NEXT: mulq %rsi
1547 ; X64-NEXT: mulq %rdx
15491548 ; X64-NEXT: movq %rdx, %r9
15501549 ; X64-NEXT: movq %rax, %r8
15511550 ; X64-NEXT: movq %r11, %rax
15681567 ; X64-NEXT: movq %r11, %rax
15691568 ; X64-NEXT: mulq %rbp
15701569 ; X64-NEXT: movq %rbp, %r14
1571 ; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill
1570 ; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill
15721571 ; X64-NEXT: movq %rdx, %rsi
15731572 ; X64-NEXT: movq %rax, %rbp
15741573 ; X64-NEXT: addq %rcx, %rbp
15751574 ; X64-NEXT: adcq %rbx, %rsi
15761575 ; X64-NEXT: xorl %ecx, %ecx
15771576 ; X64-NEXT: movq %r10, %rbx
1578 ; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
1579 ; X64-NEXT: movq %rbx, %rax
1577 ; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill
1578 ; X64-NEXT: movq %r10, %rax
15801579 ; X64-NEXT: mulq %rcx
15811580 ; X64-NEXT: movq %rdx, %r13
15821581 ; X64-NEXT: movq %rax, %r10
15841583 ; X64-NEXT: mulq %rcx
15851584 ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
15861585 ; X64-NEXT: movq %rax, %r15
1587 ; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill
1586 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
15881587 ; X64-NEXT: addq %r10, %r15
15891588 ; X64-NEXT: adcq %r13, %rdx
15901589 ; X64-NEXT: addq %rbp, %r15
16231622 ; X64-NEXT: mulq %rdx
16241623 ; X64-NEXT: movq %rdx, %r14
16251624 ; X64-NEXT: movq %rax, %r11
1626 ; X64-NEXT: addq %r11, %r10
1627 ; X64-NEXT: adcq %r14, %r13
1625 ; X64-NEXT: addq %rax, %r10
1626 ; X64-NEXT: adcq %rdx, %r13
16281627 ; X64-NEXT: addq %rbp, %r10
16291628 ; X64-NEXT: adcq %rsi, %r13
16301629 ; X64-NEXT: addq %r8, %r10
16361635 ; X64-NEXT: movq 16(%rsi), %r8
16371636 ; X64-NEXT: movq %rcx, %rax
16381637 ; X64-NEXT: movq %rcx, %r9
1639 ; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill
1638 ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
16401639 ; X64-NEXT: mulq %r8
16411640 ; X64-NEXT: movq %rdx, %rdi
16421641 ; X64-NEXT: movq %rax, %r12
16671666 ; X64-NEXT: mulq %rcx
16681667 ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
16691668 ; X64-NEXT: movq %rax, %rbp
1670 ; X64-NEXT: addq %rbp, %r11
1669 ; X64-NEXT: addq %rax, %r11
16711670 ; X64-NEXT: adcq %rdx, %r14
16721671 ; X64-NEXT: addq %r9, %r11
16731672 ; X64-NEXT: adcq %rbx, %r14
77 ; X64-NEXT: movq %rdx, %r8
88 ; X64-NEXT: imulq %rdi, %rcx
99 ; X64-NEXT: movq %rdi, %rax
10 ; X64-NEXT: mulq %r8
10 ; X64-NEXT: mulq %rdx
11