llvm.org GIT mirror llvm / a4ec08b
[CodeGen] Print register names in lowercase in both MIR and debug output As part of the unification of the debug format and the MIR format, always print registers as lowercase. * Only debug printing is affected. It now follows MIR. Differential Revision: https://reviews.llvm.org/D40417 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319187 91177308-0d34-0410-b5e6-96231b3b80d8 Francis Visoiu Mistrih 2 years ago
358 changed file(s) with 4047 addition(s) and 4045 deletion(s). Raw diff Collapse all Expand all
1919 /// register.
2020 ///
2121 /// X86 Example:
22 /// %YMM0 = ...
23 /// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0)
22 /// %ymm0 = ...
23 /// %xmm0 = ... (Kills %xmm0, all %xmm0s sub-registers, and %ymm0)
2424 ///
25 /// %YMM0 = ...
26 /// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive)
25 /// %ymm0 = ...
26 /// %xmm0 = ..., %ymm0 (%ymm0 and all its sub-registers are alive)
2727 //===----------------------------------------------------------------------===//
2828
2929 #ifndef LLVM_CODEGEN_LIVEPHYSREGS_H
370370
371371 /// substPhysReg - Substitute the current register with the physical register
372372 /// Reg, taking any existing SubReg into account. For instance,
373 /// substPhysReg(%EAX) will change %reg1024:sub_8bit to %AL.
373 /// substPhysReg(%eax) will change %reg1024:sub_8bit to %al.
374374 ///
375375 void substPhysReg(unsigned Reg, const TargetRegisterInfo&);
376376
11391139 /// %noreg - NoRegister
11401140 /// %vreg5 - a virtual register.
11411141 /// %vreg5:sub_8bit - a virtual register with sub-register index (with TRI).
1142 /// %EAX - a physical register
1142 /// %eax - a physical register
11431143 /// %physreg17 - a physical register when no TRI instance given.
11441144 ///
11451145 /// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n';
11501150 ///
11511151 /// Register units are named after their root registers:
11521152 ///
1153 /// AL - Single root.
1154 /// FP0~ST7 - Dual roots.
1153 /// al - Single root.
1154 /// fp0~st7 - Dual roots.
11551155 ///
11561156 /// Usage: OS << printRegUnit(Unit, TRI) << '\n';
11571157 Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI);
447447 // FIXME: The issue with predicated instruction is more complex. We are being
448448 // conservatively here because the kill markers cannot be trusted after
449449 // if-conversion:
450 // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
450 // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
451451 // ...
452 // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
453 // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
454 // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
452 // STR %r0, %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
453 // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
454 // STR %r0, %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
455455 //
456456 // The first R6 kill is not really a kill since it's killed by a predicated
457457 // instruction which may not be executed. The second R6 def may or may not
169169 // FIXME: The issue with predicated instruction is more complex. We are being
170170 // conservative here because the kill markers cannot be trusted after
171171 // if-conversion:
172 // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
172 // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
173173 // ...
174 // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
175 // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
176 // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
174 // STR %r0, %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
175 // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
176 // STR %r0, %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
177177 //
178178 // The first R6 kill is not really a kill since it's killed by a predicated
179179 // instruction which may not be executed. The second R6 def may or may not
103103 if (DstSubReg == InsReg) {
104104 // No need to insert an identity copy instruction.
105105 // Watch out for case like this:
106 // %RAX = SUBREG_TO_REG 0, %EAX, 3
107 // We must leave %RAX live.
106 // %rax = SUBREG_TO_REG 0, %eax, 3
107 // We must leave %rax live.
108108 if (DstReg != InsReg) {
109109 MI->setDesc(TII->get(TargetOpcode::KILL));
110110 MI->RemoveOperand(3); // SubIdx
497497
498498 // Starting with a code fragment like:
499499 //
500 // test %RAX, %RAX
500 // test %rax, %rax
501501 // jne LblNotNull
502502 //
503503 // LblNull:
507507 // Inst0
508508 // Inst1
509509 // ...
510 // Def = Load (%RAX + )
510 // Def = Load (%rax + )
511511 // ...
512512 //
513513 //
514514 // we want to end up with
515515 //
516 // Def = FaultingLoad (%RAX + ), LblNull
516 // Def = FaultingLoad (%rax + ), LblNull
517517 // jmp LblNotNull ;; explicit or fallthrough
518518 //
519519 // LblNotNull:
527527 //
528528 // To see why this is legal, consider the two possibilities:
529529 //
530 // 1. %RAX is null: since we constrain to be less than PageSize, the
530 // 1. %rax is null: since we constrain to be less than PageSize, the
531531 // load instruction dereferences the null page, causing a segmentation
532532 // fault.
533533 //
534 // 2. %RAX is not null: in this case we know that the load cannot fault, as
534 // 2. %rax is not null: in this case we know that the load cannot fault, as
535535 // otherwise the load would've faulted in the original program too and the
536536 // original program would've been undefined.
537537 //
697697 // Check if any of the regunits are live beyond the end of RI. That could
698698 // happen when a physreg is defined as a copy of a virtreg:
699699 //
700 // %EAX = COPY %vreg5
701 // FOO %vreg5 <--- MI, cancel kill because %EAX is live.
702 // BAR %EAX
700 // %eax = COPY %vreg5
701 // FOO %vreg5 <--- MI, cancel kill because %eax is live.
702 // BAR %eax
703703 //
704 // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
704 // There should be no kill flag on FOO when %vreg5 is rewritten as %eax.
705705 for (auto &RUP : RU) {
706706 const LiveRange &RURange = *RUP.first;
707707 LiveRange::const_iterator &I = RUP.second;
622622 // Go through implicit defs of CSMI and MI, and clear the kill flags on
623623 // their uses in all the instructions between CSMI and MI.
624624 // We might have made some of the kill flags redundant, consider:
625 // subs ... %NZCV <- CSMI
626 // csinc ... %NZCV <- this kill flag isn't valid anymore
627 // subs ... %NZCV <- MI, to be eliminated
628 // csinc ... %NZCV
625 // subs ... %nzcv <- CSMI
626 // csinc ... %nzcv <- this kill flag isn't valid anymore
627 // subs ... %nzcv <- MI, to be eliminated
628 // csinc ... %nzcv
629629 // Since we eliminated MI, and reused a register imp-def'd by CSMI
630 // (here %NZCV), that register, if it was killed before MI, should have
630 // (here %nzcv), that register, if it was killed before MI, should have
631631 // that kill flag removed, because it's lifetime was extended.
632632 if (CSMI->getParent() == MI->getParent()) {
633633 for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
225225
226226 // The two copies cancel out and the source of the first copy
227227 // hasn't been overridden, eliminate the second one. e.g.
228 // %ECX = COPY %EAX
229 // ... nothing clobbered EAX.
230 // %EAX = COPY %ECX
228 // %ecx = COPY %eax
229 // ... nothing clobbered eax.
230 // %eax = COPY %ecx
231231 // =>
232 // %ECX = COPY %EAX
232 // %ecx = COPY %eax
233233 //
234234 // or
235235 //
236 // %ECX = COPY %EAX
237 // ... nothing clobbered EAX.
238 // %ECX = COPY %EAX
236 // %ecx = COPY %eax
237 // ... nothing clobbered eax.
238 // %ecx = COPY %eax
239239 // =>
240 // %ECX = COPY %EAX
240 // %ecx = COPY %eax
241241 if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
242242 continue;
243243
245245 // BB#1: derived from LLVM BB %bb4.preheader
246246 // Predecessors according to CFG: BB#0
247247 // ...
248 // %reg16385 = DEC64_32r %reg16437, %EFLAGS
248 // %reg16385 = DEC64_32r %reg16437, %eflags
249249 // ...
250 // JE_4 , %EFLAGS
250 // JE_4 , %eflags
251251 // Successors according to CFG: BB#37 BB#2
252252 //
253253 // BB#2: derived from LLVM BB %bb.nph
15151515 unsigned DstReg = MI->getOperand(0).getReg();
15161516 unsigned SrcReg = MI->getOperand(1).getReg();
15171517 if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
1518 // %vreg = COPY %PHYSREG
1518 // %vreg = COPY %physreg
15191519 // Avoid using a datastructure which can track multiple live non-allocatable
15201520 // phys->virt copies since LLVM doesn't seem to do this.
15211521 NAPhysToVirtMIs.insert({SrcReg, MI});
15251525 if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
15261526 return false;
15271527
1528 // %PHYSREG = COPY %vreg
1528 // %physreg = COPY %vreg
15291529 auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
15301530 if (PrevCopy == NAPhysToVirtMIs.end()) {
15311531 // We can't remove the copy: there was an intervening clobber of the
16951695 // Track when a non-allocatable physical register is copied to a virtual
16961696 // register so that useless moves can be removed.
16971697 //
1698 // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG`
1699 // without any intervening re-definition of %PHYSREG.
1698 // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg`
1699 // without any intervening re-definition of %physreg.
17001700 DenseMap NAPhysToVirtMIs;
17011701
17021702 // Set of virtual registers that are copied from.
3232 bb27 ...
3333 ...
3434 %reg1037 = ADDri %reg1039, 1
35 %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
35 %reg1038 = ADDrs %reg1032, %reg1039, %noreg, 10
3636 Successors according to CFG: 0x8b03bf0 (#5)
3737
3838 bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
18191819 MachineInstr *CopyMI;
18201820 if (CP.isFlipped()) {
18211821 // Physreg is copied into vreg
1822 // %vregY = COPY %X
1823 // ... //< no other def of %X here
1822 // %vregY = COPY %x
1823 // ... //< no other def of %x here
18241824 // use %vregY
18251825 // =>
18261826 // ...
1827 // use %X
1827 // use %x
18281828 CopyMI = MRI->getVRegDef(SrcReg);
18291829 } else {
18301830 // VReg is copied into physreg:
18311831 // %vregX = def
1832 // ... //< no other def or use of %Y here
1833 // %Y = COPY %vregX
1832 // ... //< no other def or use of %y here
1833 // %y = COPY %vregX
18341834 // =>
1835 // %Y = def
1835 // %y = def
18361836 // ...
18371837 if (!MRI->hasOneNonDBGUse(SrcReg)) {
18381838 DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
1414 #include "llvm/ADT/ArrayRef.h"
1515 #include "llvm/ADT/BitVector.h"
1616 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/StringExtras.h"
1718 #include "llvm/CodeGen/MachineFrameInfo.h"
1819 #include "llvm/CodeGen/MachineFunction.h"
1920 #include "llvm/CodeGen/MachineRegisterInfo.h"
9293 OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
9394 else if (TargetRegisterInfo::isVirtualRegister(Reg))
9495 OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
95 else if (TRI && Reg < TRI->getNumRegs())
96 OS << '%' << TRI->getName(Reg);
97 else
96 else if (TRI && Reg < TRI->getNumRegs()) {
97 OS << '%';
98 printLowerCase(TRI->getName(Reg), OS);
99 } else
98100 OS << "%physreg" << Reg;
99101 if (SubIdx) {
100102 if (TRI)
588588 // e.g.
589589 // %reg1028 = EXTRACT_SUBREG %reg1027, 1
590590 // %reg1029 = MOV8rr %reg1028
591 // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS
591 // %reg1029 = SHR8ri %reg1029, 7, %eflags
592592 // insert => %reg1030 = MOV8rr %reg1028
593 // %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS
593 // %reg1030 = ADD8rr %reg1028, %reg1029, %eflags
594594 // In this case, it might not be possible to coalesce the second MOV8rr
595595 // instruction if the first one is coalesced. So it would be profitable to
596596 // commute it:
597597 // %reg1028 = EXTRACT_SUBREG %reg1027, 1
598598 // %reg1029 = MOV8rr %reg1028
599 // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS
599 // %reg1029 = SHR8ri %reg1029, 7, %eflags
600600 // insert => %reg1030 = MOV8rr %reg1029
601 // %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS
601 // %reg1030 = ADD8rr %reg1029, %reg1028, %eflags
602602
603603 if (!isPlainlyKilled(MI, regC, LIS))
604604 return false;
605605
606606 // Ok, we have something like:
607 // %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS
607 // %reg1030 = ADD8rr %reg1028, %reg1029, %eflags
608608 // let's see if it's worth commuting it.
609609
610610 // Look for situations like this:
379379 ++NumIdCopies;
380380
381381 // Copies like:
382 // %R0 = COPY %R0
383 // %AL = COPY %AL, %EAXdef>
382 // %r0 = COPY %r0def>
383 // %al = COPY %al, %eax
384384 // give us additional liveness information: The target (super-)register
385385 // must not be valid before this point. Replace the COPY with a KILL
386386 // instruction to maintain this information.
28002800 LiveIntervals *LIS) const {
28012801 // This is a bit of a hack. Consider this instruction:
28022802 //
2803 // %vreg0 = COPY %SP; GPR64all:%vreg0
2803 // %vreg0 = COPY %sp; GPR64all:%vreg0
28042804 //
28052805 // We explicitly chose GPR64all for the virtual register so such a copy might
28062806 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2807 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2807 // %vreg0 may even spill. We can't spill %sp, and since it is in the GPR64all
28082808 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
28092809 //
28102810 // To prevent that, we are going to constrain the %vreg0 register class here.
28292829 // Handle the case where a copy is being spilled or filled but the source
28302830 // and destination register class don't match. For example:
28312831 //
2832 // %vreg0 = COPY %XZR; GPR64common:%vreg0
2832 // %vreg0 = COPY %xzr; GPR64common:%vreg0
28332833 //
28342834 // In this case we can still safely fold away the COPY and generate the
28352835 // following spill code:
28362836 //
2837 // STRXui %XZR,
2837 // STRXui %xzr,
28382838 //
28392839 // This also eliminates spilled cross register class COPYs (e.g. between x and
28402840 // d regs) of the same size. For example:
28852885
28862886 // Handle cases like spilling def of:
28872887 //
2888 // %vreg0:sub_32 = COPY %WZR; GPR64common:%vreg0
2888 // %vreg0:sub_32 = COPY %wzr; GPR64common:%vreg0
28892889 //
28902890 // where the physical register source can be widened and stored to the full
28912891 // virtual reg destination stack slot, in this case producing:
28922892 //
2893 // STRXui %XZR,
2893 // STRXui %xzr,
28942894 //
28952895 if (IsSpill && DstMO.isUndef() &&
28962896 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
829829 if (SExtIdx != -1) {
830830 // Generate the sign extension for the proper result of the ldp.
831831 // I.e., with X1, that would be:
832 // %W1 = KILL %W1, %X1
833 // %X1 = SBFMXri %X1, 0, 31
832 // %w1 = KILL %w1, %x1
833 // %x1 = SBFMXri %x1, 0, 31
834834 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
835835 // Right now, DstMO has the extended register, since it comes from an
836836 // extended opcode.
143143 // to be caused by ALU instructions in the next instruction group that wrote
144144 // to the $src_gpr registers of the VTX_READ.
145145 // e.g.
146 // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24
147 // %T2_X = MOV %ZERO
146 // %t3_x = VTX_READ_PARAM_32_eg %t2_x, 24
147 // %t2_x = MOV %zero
148148 //Adding this constraint prevents this from happening.
149149 let Constraints = "$src_gpr.ptr = $dst_gpr";
150150 }
211211 // to be caused by ALU instructions in the next instruction group that wrote
212212 // to the $src_gpr registers of the VTX_READ.
213213 // e.g.
214 // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24
215 // %T2_X = MOV %ZERO
214 // %t3_x = VTX_READ_PARAM_32_eg %t2_x, 24
215 // %t2_x = MOV %zero
216216 //Adding this constraint prevents this from happening.
217217 let Constraints = "$src_gpr.ptr = $dst_gpr";
218218 }
970970 // Prevent folding operands backwards in the function. For example,
971971 // the COPY opcode must not be replaced by 1 in this example:
972972 //
973 // %vreg3 = COPY %VGPR0; VGPR_32:%vreg3
973 // %vreg3 = COPY %vgpr0; VGPR_32:%vreg3
974974 // ...
975 // %VGPR0 = V_MOV_B32_e32 1, %EXEC
975 // %vgpr0 = V_MOV_B32_e32 1, %exec
976976 MachineOperand &Dst = MI.getOperand(0);
977977 if (Dst.isReg() &&
978978 !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
65996599 I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
66006600 return;
66016601
6602 // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
6602 // Lane means which subreg of %vgpra_vgprb_vgprc_vgprd is used.
66036603 // Note that subregs are packed, i.e. Lane==0 is the first bit set
66046604 // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
66056605 // set, etc.
2020 /// EXEC to update the predicates.
2121 ///
2222 /// For example:
23 /// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
24 /// %SGPR0 = SI_IF %VCC
25 /// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
26 /// %SGPR0 = SI_ELSE %SGPR0
27 /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
28 /// SI_END_CF %SGPR0
23 /// %vcc = V_CMP_GT_F32 %vgpr1, %vgpr2
24 /// %sgpr0 = SI_IF %vcc
25 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0
26 /// %sgpr0 = SI_ELSE %sgpr0
27 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr0
28 /// SI_END_CF %sgpr0
2929 ///
3030 /// becomes:
3131 ///
32 /// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
33 /// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
32 /// %sgpr0 = S_AND_SAVEEXEC_B64 %vcc // Save and update the exec mask
33 /// %sgpr0 = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
3434 /// S_CBRANCH_EXECZ label0 // This instruction is an optional
3535 /// // optimization which allows us to
3636 /// // branch if all the bits of
3737 /// // EXEC are zero.
38 /// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
38 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
3939 ///
4040 /// label0:
41 /// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
42 /// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
41 /// %sgpr0 = S_OR_SAVEEXEC_B64 %exec // Restore the exec mask for the Then block
42 /// %exec = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
4343 /// S_BRANCH_EXECZ label1 // Use our branch optimization
4444 /// // instruction again.
45 /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
45 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the THEN block
4646 /// label1:
47 /// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
47 /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits
4848 //===----------------------------------------------------------------------===//
4949
5050 #include "AMDGPU.h"
18311831 if (!HasFP) {
18321832 if (SavedRegs.test(ARM::R7)) {
18331833 --RegDeficit;
1834 DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = "
1834 DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
18351835 << RegDeficit << "\n");
18361836 } else {
18371837 AvailableRegs.push_back(ARM::R7);
18381838 DEBUG(dbgs()
1839 << "%R7 is non-saved low register, adding to AvailableRegs\n");
1839 << "%r7 is non-saved low register, adding to AvailableRegs\n");
18401840 }
18411841 }
18421842
18581858 MF.getFrameInfo().isReturnAddressTaken())) {
18591859 if (SavedRegs.test(ARM::LR)) {
18601860 --RegDeficit;
1861 DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit
1861 DEBUG(dbgs() << "%lr is saved register, RegDeficit = " << RegDeficit
18621862 << "\n");
18631863 } else {
18641864 AvailableRegs.push_back(ARM::LR);
1865 DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n");
1865 DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
18661866 }
18671867 }
18681868
16961696 if (OddReg == EvenReg && EvenDeadKill) {
16971697 // If the two source operands are the same, the kill marker is
16981698 // probably on the first one. e.g.
1699 // t2STRDi8 %R5, %R5, %R9, 0, 14, %reg0
1699 // t2STRDi8 %r5, %r5, %r9, 0, 14, %reg0
17001700 EvenDeadKill = false;
17011701 OddDeadKill = true;
17021702 }
367367 }
368368 }
369369 // Defs and clobbers can overlap, e.g.
370 // %D0 = COPY %vreg5, %R0, %R1
370 // %d0 = COPY %vreg5, %r0, %r1
371371 for (RegisterRef R : Defs)
372372 Clobbers.erase(R);
373373
19731973 {
19741974 const MachineOperand &VO = MI.getOperand(1);
19751975 // The operand of CONST32 can be a blockaddress, e.g.
1976 // %vreg0 = CONST32 L)>
1976 // %vreg0 = CONST32 l)>
19771977 // Do this check for all instructions for safety.
19781978 if (!VO.isImm())
19791979 return false;
31433143 BrI.setDesc(JD);
31443144 while (BrI.getNumOperands() > 0)
31453145 BrI.RemoveOperand(0);
3146 // This ensures that all implicit operands (e.g. %R31, etc)
3146 // This ensures that all implicit operands (e.g. %r31, etc)
31473147 // are present in the rewritten branch.
31483148 for (auto &Op : NI->operands())
31493149 BrI.addOperand(Op);
350350 // kill flag for a register (a removeRegisterKilled() analogous to
351351 // addRegisterKilled) that handles aliased register correctly.
352352 // * or has a killed aliased register use of I1's use reg
353 // %D4 = A2_tfrpi 16
354 // %R6 = A2_tfr %R9
355 // %R8 = KILL %R8, %D4
353 // %d4 = A2_tfrpi 16
354 // %r6 = A2_tfr %r9
355 // %r8 = KILL %r8, %d4
356356 // If we want to move R6 = across the KILL instruction we would have
357 // to remove the %D4 operand. For now, we are
357 // to remove the %d4 operand. For now, we are
358358 // conservative and disallow the move.
359359 // we can't move I1 across it.
360360 if (MI.isDebugValue()) {
2626 //
2727 // %vreg40 = L2_loadrub_io %vreg39, 1
2828 // %vreg41 = S2_tstbit_i %vreg40, 0
29 // J2_jumpt %vreg41, , %PC
30 // J2_jump , %PC
29 // J2_jumpt %vreg41, , %pc
30 // J2_jump , %pc
3131 // Successors according to CFG: BB#4(62) BB#5(62)
3232 //
3333 // BB#4: derived from LLVM BB %if.then
4141 // %vreg12 = PHI %vreg6, , %vreg11,
4242 // %vreg13 = A2_addp %vreg7, %vreg12
4343 // %vreg42 = C2_cmpeqi %vreg9, 10
44 // J2_jumpf %vreg42, , %PC
45 // J2_jump , %PC
44 // J2_jumpf %vreg42, , %pc
45 // J2_jump , %pc
4646 // Successors according to CFG: BB#6(4) BB#3(124)
4747 //
4848 // would become:
5454 // %vreg46 = PS_pselect %vreg41, %vreg6, %vreg11
5555 // %vreg13 = A2_addp %vreg7, %vreg46
5656 // %vreg42 = C2_cmpeqi %vreg9, 10
57 // J2_jumpf %vreg42, , %PC
58 // J2_jump , %PC
57 // J2_jumpf %vreg42, , %pc
58 // J2_jump , %pc
5959 // Successors according to CFG: BB#6 BB#3
6060
6161 #include "Hexagon.h"
17191719 MachineOperand &MO = PredDef->getOperand(i);
17201720 if (MO.isReg()) {
17211721 // Skip all implicit references. In one case there was:
1722 // %vreg140 = FCMPUGT32_rr %vreg138, %vreg139, %USR
1722 // %vreg140 = FCMPUGT32_rr %vreg138, %vreg139, %usr
17231723 if (MO.isImplicit())
17241724 continue;
17251725 if (MO.isUse()) {
16151615 }
16161616
16171617 // Inspired by this pair:
1618 // %R13 = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
1619 // S2_storeri_io %R29, 132, %R1; flags: mem:ST4[FixedStack1]
1618 // %r13 = L2_loadri_io %r29, 136; mem:LD4[FixedStack0]
1619 // S2_storeri_io %r29, 132, %r1; flags: mem:ST4[FixedStack1]
16201620 // Currently AA considers the addresses in these instructions to be aliasing.
16211621 bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
16221622 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
35153515 case Hexagon::EH_RETURN_JMPR:
35163516 case Hexagon::PS_jmpret:
35173517 // jumpr r31
3518 // Actual form JMPR %PC, %R31, %R0.
3518 // Actual form JMPR %pc, %r31, %r0.
35193519 DstReg = MI.getOperand(0).getReg();
35203520 if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))
35213521 return HexagonII::HSIG_L2;
37053705 case Hexagon::C2_cmovenewif:
37063706 // if ([!]P0[.new]) Rd = #0
37073707 // Actual form:
3708 // %R16 = C2_cmovenewit %P0, 0, %R16;
3708 // %r16 = C2_cmovenewit %p0, 0, %r16;
37093709 DstReg = MI.getOperand(0).getReg();
37103710 SrcReg = MI.getOperand(1).getReg();
37113711 if (isIntRegForSubInst(DstReg) &&
128128 // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic
129129 // before the callsite of this function
130130 // But we can not as it comes in the following fashion.
131 // %D0 = Hexagon_S2_lsr_r_p %D0, %R2
132 // %R0 = KILL %R0, %D0
133 // %P0 = CMPEQri %R0, 0
131 // %d0 = Hexagon_S2_lsr_r_p %d0, %r2
132 // %r0 = KILL %r0, %d0
133 // %p0 = CMPEQri %r0, 0
134134 // Hence, we need to check if it's a KILL instruction.
135135 if (II->getOpcode() == TargetOpcode::KILL)
136136 return false;
192192 // to new value jump. If they are in the path, bail out.
193193 // KILL sets kill flag on the opcode. It also sets up a
194194 // single register, out of pair.
195 // %D0 = S2_lsr_r_p %D0, %R2
196 // %R0 = KILL %R0, %D0
197 // %P0 = C2_cmpeqi %R0, 0
195 // %d0 = S2_lsr_r_p %d0, %r2
196 // %r0 = KILL %r0, %d0
197 // %p0 = C2_cmpeqi %r0, 0
198198 // PHI can be anything after RA.
199199 // COPY can remateriaze things in between feeder, compare and nvj.
200200 if (MII->getOpcode() == TargetOpcode::KILL ||
1919 // ...
2020 // %vreg16 = NOT_p %vreg15
2121 // ...
22 // JMP_c %vreg16, , %PC
22 // JMP_c %vreg16, , %pc
2323 //
2424 // Into
2525 // %vreg15 = CMPGTrr %vreg6, %vreg2;
2626 // ...
27 // JMP_cNot %vreg15, , %PC;
27 // JMP_cNot %vreg15, , %pc;
2828 //
2929 // Note: The peephole pass makes the instrucstions like
3030 // %vreg170 = SXTW %vreg166 or %vreg16 = NOT_p %vreg15
219219 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
220220 DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
221221 // Prevent redundant register copies between two calls, which are caused by
222 // both the return value and the argument for the next call being in %R0.
222 // both the return value and the argument for the next call being in %r0.
223223 // Example:
224224 // 1:
225 // 2: %VregX = COPY %R0
226 // 3:
227 // 4: %R0 = ...
225 // 2: %vregX = COPY %r0
226 // 3:
227 // 4: %r0 = ...
228228 // 5:
229229 // The scheduler would often swap 3 and 4, so an additional register is
230230 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
231 // this. The same applies for %D0 and %V0/%W0, which are also handled.
231 // this. The same applies for %d0 and %v0/%w0, which are also handled.
232232 else if (SchedRetvalOptimization) {
233233 const MachineInstr *MI = DAG->SUnits[su].getInstr();
234234 if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
235235 MI->readsRegister(Hexagon::V0, &TRI))) {
236 // %vregX = COPY %R0
236 // %vregX = COPY %r0
237237 VRegHoldingRet = MI->getOperand(0).getReg();
238238 RetRegister = MI->getOperand(1).getReg();
239239 LastUseOfRet = nullptr;
241241 //
242242 LastUseOfRet = &DAG->SUnits[su];
243243 else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
244 // %R0 = ...
244 // %r0 = ...
245245 DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
246246 }
247247 }
771771
772772 // If data definition is because of implicit definition of the register,
773773 // do not newify the store. Eg.
774 // %R9 = ZXTH %R12, %D6, %R12
775 // S2_storerh_io %R8, 2, %R12; mem:ST2[%scevgep343]
774 // %r9 = ZXTH %r12, %d6, %r12
775 // S2_storerh_io %r8, 2, %r12; mem:ST2[%scevgep343]
776776 for (auto &MO : PacketMI.operands()) {
777777 if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
778778 return false;
786786 // Handle imp-use of super reg case. There is a target independent side
787787 // change that should prevent this situation but I am handling it for
788788 // just-in-case. For example, we cannot newify R2 in the following case:
789 // %R3 = A2_tfrsi 0;
790 // S2_storeri_io %R0, 0, %R2, %D1;
789 // %r3 = A2_tfrsi 0;
790 // S2_storeri_io %r0, 0, %r2, %d1;
791791 for (auto &MO : MI.operands()) {
792792 if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
793793 return false;
891891 // Go through the packet instructions and search for an anti dependency between
892892 // them and DepReg from MI. Consider this case:
893893 // Trying to add
894 // a) %R1 = TFRI_cdNotPt %P3, 2
894 // a) %r1 = TFRI_cdNotPt %p3, 2
895895 // to this packet:
896896 // {
897 // b) %P0 = C2_or %P3, %P0
898 // c) %P3 = C2_tfrrp %R23
899 // d) %R1 = C2_cmovenewit %P3, 4
897 // b) %p0 = C2_or %p3, %p0
898 // c) %p3 = C2_tfrrp %r23
899 // d) %r1 = C2_cmovenewit %p3, 4
900900 // }
901901 // The P3 from a) and d) will be complements after
902902 // a)'s P3 is converted to .new form
961961
962962 // One corner case deals with the following scenario:
963963 // Trying to add
964 // a) %R24 = A2_tfrt %P0, %R25
964 // a) %r24 = A2_tfrt %p0, %r25
965965 // to this packet:
966966 // {
967 // b) %R25 = A2_tfrf %P0, %R24
968 // c) %P0 = C2_cmpeqi %R26, 1
967 // b) %r25 = A2_tfrf %p0, %r24
968 // c) %p0 = C2_cmpeqi %r26, 1
969969 // }
970970 //
971971 // On general check a) and b) are complements, but presence of c) will
15421542
15431543 // There are certain anti-dependencies that cannot be ignored.
15441544 // Specifically:
1545 // J2_call ... %R0 ; SUJ
1545 // J2_call ... %r0 ; SUJ
15461546 // R0 = ... ; SUI
15471547 // Those cannot be packetized together, since the call will observe
15481548 // the effect of the assignment to R0.
271271 case Hexagon::J2_jumpr:
272272 case Hexagon::PS_jmpret:
273273 // jumpr r31
274 // Actual form JMPR %PC, %R31, %R0.
274 // Actual form JMPR %pc, %r31, %r0.
275275 DstReg = MCI.getOperand(0).getReg();
276276 if (Hexagon::R31 == DstReg)
277277 return HexagonII::HSIG_L2;
470470 case Hexagon::C2_cmovenewif:
471471 // if ([!]P0[.new]) Rd = #0
472472 // Actual form:
473 // %R16 = C2_cmovenewit %P0, 0, %R16;
473 // %r16 = C2_cmovenewit %p0, 0, %r16;
474474 DstReg = MCI.getOperand(0).getReg(); // Rd
475475 PredReg = MCI.getOperand(1).getReg(); // P0
476476 if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
112112
113113 if (!HexagonMCInstrInfo::bundleSize(MCB)) {
114114 // There once was a bundle:
115 // BUNDLE %D2, %R4, %R5, %D7, ...
116 // * %D2 = IMPLICIT_DEF; flags:
117 // * %D7 = IMPLICIT_DEF; flags:
115 // BUNDLE %d2, %r4, %r5, %d7, ...
116 // * %d2 = IMPLICIT_DEF; flags:
117 // * %d7 = IMPLICIT_DEF; flags:
118118 // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
119119 // became empty.
120120 DEBUG(dbgs() << "Skipping empty bundle");
136136
137137 if (!HexagonMCInstrInfo::bundleSize(MCB)) {
138138 // There once was a bundle:
139 // BUNDLE %D2, %R4, %R5, %D7, ...
140 // * %D2 = IMPLICIT_DEF; flags:
141 // * %D7 = IMPLICIT_DEF; flags:
139 // BUNDLE %d2, %r4, %r5, %d7, ...
140 // * %d2 = IMPLICIT_DEF; flags:
141 // * %d7 = IMPLICIT_DEF; flags:
142142 // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
143143 // became empty.
144144 DEBUG(dbgs() << "Skipping empty bundle");
479479 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc));
480480
481481 // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an
482 // immediate 0 as an operand and requires the removal of it's %RA
482 // immediate 0 as an operand and requires the removal of it's %ra
483483 // implicit operand as copying the implicit operations of the instructio we're
484484 // looking at will give us the correct flags.
485485 if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 ||
520520 return LowerPATCHPOINT(SM, *MI);
521521
522522 case PPC::MoveGOTtoLR: {
523 // Transform %LR = MoveGOTtoLR
523 // Transform %lr = MoveGOTtoLR
524524 // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4
525525 // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding
526526 // _GLOBAL_OFFSET_TABLE_) has exactly one instruction:
541541 }
542542 case PPC::MovePCtoLR:
543543 case PPC::MovePCtoLR8: {
544 // Transform %LR = MovePCtoLR
544 // Transform %lr = MovePCtoLR
545545 // Into this, where the label is the PIC base:
546546 // bl L1$pb
547547 // L1$pb:
559559 return;
560560 }
561561 case PPC::UpdateGBR: {
562 // Transform %Rd = UpdateGBR(%Rt, %Ri)
563 // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri)
564 // add %Rd, %Rt, %Ri
562 // Transform %rd = UpdateGBR(%rt, %ri)
563 // Into: lwz %rt, .L0$poff - .L0$pb(%ri)
564 // add %rd, %rt, %ri
565565 // Get the offset from the GOT Base Register to the GOT
566566 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
567567 MCSymbol *PICOffset =
576576 const MCOperand TR = TmpInst.getOperand(1);
577577 const MCOperand PICR = TmpInst.getOperand(0);
578578
579 // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
579 // Step 1: lwz %rt, .L$poff - .L$pb(%ri)
580580 TmpInst.getOperand(1) =
581581 MCOperand::createExpr(MCBinaryExpr::createSub(Exp, PB, OutContext));
582582 TmpInst.getOperand(0) = TR;
591591 return;
592592 }
593593 case PPC::LWZtoc: {
594 // Transform %R3 = LWZtoc , %R2
594 // Transform %r3 = LWZtoc , %r2
595595 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
596596
597597 // Change the opcode to LWZ, and the global address operand to be a
635635 case PPC::LDtocCPT:
636636 case PPC::LDtocBA:
637637 case PPC::LDtoc: {
638 // Transform %X3 = LDtoc , %X2
638 // Transform %x3 = LDtoc , %x2
639639 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
640640
641641 // Change the opcode to LD, and the global address operand to be a
666666 }
667667
668668 case PPC::ADDIStocHA: {
669 // Transform %Xd = ADDIStocHA %X2,
669 // Transform %xd = ADDIStocHA %x2,
670670 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
671671
672672 // Change the opcode to ADDIS8. If the global address is external, has
713713 return;
714714 }
715715 case PPC::LDtocL: {
716 // Transform %Xd = LDtocL , %Xs
716 // Transform %xd = LDtocL , %xs
717717 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
718718
719719 // Change the opcode to LD. If the global address is external, has
756756 return;
757757 }
758758 case PPC::ADDItocL: {
759 // Transform %Xd = ADDItocL %Xs,
759 // Transform %xd = ADDItocL %xs,
760760 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
761761
762762 // Change the opcode to ADDI8. If the global address is external, then
787787 return;
788788 }
789789 case PPC::ADDISgotTprelHA: {
790 // Transform: %Xd = ADDISgotTprelHA %X2,
791 // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
790 // Transform: %xd = ADDISgotTprelHA %x2,
791 // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha
792792 assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
793793 const MachineOperand &MO = MI->getOperand(2);
794794 const GlobalValue *GValue = MO.getGlobal();
804804 }
805805 case PPC::LDgotTprelL:
806806 case PPC::LDgotTprelL32: {
807 // Transform %Xd = LDgotTprelL , %Xs
807 // Transform %xd = LDgotTprelL , %xs
808808 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
809809
810810 // Change the opcode to LD.
865865 return;
866866 }
867867 case PPC::ADDIStlsgdHA: {
868 // Transform: %Xd = ADDIStlsgdHA %X2,
869 // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
868 // Transform: %xd = ADDIStlsgdHA %x2,
869 // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha
870870 assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
871871 const MachineOperand &MO = MI->getOperand(2);
872872 const GlobalValue *GValue = MO.getGlobal();
881881 return;
882882 }
883883 case PPC::ADDItlsgdL:
884 // Transform: %Xd = ADDItlsgdL %Xs,
885 // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
884 // Transform: %xd = ADDItlsgdL %xs,
885 // Into: %xd = ADDI8 %xs, sym@got@tlsgd@l
886886 case PPC::ADDItlsgdL32: {
887 // Transform: %Rd = ADDItlsgdL32 %Rs,
888 // Into: %Rd = ADDI %Rs, sym@got@tlsgd
887 // Transform: %rd = ADDItlsgdL32 %rs,
888 // Into: %rd = ADDI %rs, sym@got@tlsgd
889889 const MachineOperand &MO = MI->getOperand(2);
890890 const GlobalValue *GValue = MO.getGlobal();
891891 MCSymbol *MOSymbol = getSymbol(GValue);
901901 return;
902902 }
903903 case PPC::GETtlsADDR:
904 // Transform: %X3 = GETtlsADDR %X3,
904 // Transform: %x3 = GETtlsADDR %x3,
905905 // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
906906 case PPC::GETtlsADDR32: {
907 // Transform: %R3 = GETtlsADDR32 %R3,
907 // Transform: %r3 = GETtlsADDR32 %r3,
908908 // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
909909 EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
910910 return;
911911 }
912912 case PPC::ADDIStlsldHA: {
913 // Transform: %Xd = ADDIStlsldHA %X2,
914 // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
913 // Transform: %xd = ADDIStlsldHA %x2,
914 // Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha
915915 assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
916916 const MachineOperand &MO = MI->getOperand(2);
917917 const GlobalValue *GValue = MO.getGlobal();
926926 return;
927927 }
928928 case PPC::ADDItlsldL:
929 // Transform: %Xd = ADDItlsldL %Xs,
930 // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
929 // Transform: %xd = ADDItlsldL %xs,
930 // Into: %xd = ADDI8 %xs, sym@got@tlsld@l
931931 case PPC::ADDItlsldL32: {
932 // Transform: %Rd = ADDItlsldL32 %Rs,
933 // Into: %Rd = ADDI %Rs, sym@got@tlsld
932 // Transform: %rd = ADDItlsldL32 %rs,
933 // Into: %rd = ADDI %rs, sym@got@tlsld
934934 const MachineOperand &MO = MI->getOperand(2);
935935 const GlobalValue *GValue = MO.getGlobal();
936936 MCSymbol *MOSymbol = getSymbol(GValue);
946946 return;
947947 }
948948 case PPC::GETtlsldADDR:
949 // Transform: %X3 = GETtlsldADDR %X3,
949 // Transform: %x3 = GETtlsldADDR %x3,
950950 // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
951951 case PPC::GETtlsldADDR32: {
952 // Transform: %R3 = GETtlsldADDR32 %R3,
952 // Transform: %r3 = GETtlsldADDR32 %r3,
953953 // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
954954 EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
955955 return;
956956 }
957957 case PPC::ADDISdtprelHA:
958 // Transform: %Xd = ADDISdtprelHA %Xs,
959 // Into: %Xd = ADDIS8 %Xs, sym@dtprel@ha
958 // Transform: %xd = ADDISdtprelHA %xs,
959 // Into: %xd = ADDIS8 %xs, sym@dtprel@ha
960960 case PPC::ADDISdtprelHA32: {
961 // Transform: %Rd = ADDISdtprelHA32 %Rs,
962 // Into: %Rd = ADDIS %Rs, sym@dtprel@ha
961 // Transform: %rd = ADDISdtprelHA32 %rs,
962 // Into: %rd = ADDIS %rs, sym@dtprel@ha
963963 const MachineOperand &MO = MI->getOperand(2);
964964 const GlobalValue *GValue = MO.getGlobal();
965965 MCSymbol *MOSymbol = getSymbol(GValue);
975975 return;
976976 }
977977 case PPC::ADDIdtprelL:
978 // Transform: %Xd = ADDIdtprelL %Xs,
979 // Into: %Xd = ADDI8 %Xs, sym@dtprel@l
978 // Transform: %xd = ADDIdtprelL %xs,
979 // Into: %xd = ADDI8 %xs, sym@dtprel@l
980980 case PPC::ADDIdtprelL32: {
981 // Transform: %Rd = ADDIdtprelL32 %Rs,
982 // Into: %Rd = ADDI %Rs, sym@dtprel@l
981 // Transform: %rd = ADDIdtprelL32 %rs,
982 // Into: %rd = ADDI %rs, sym@dtprel@l
983983 const MachineOperand &MO = MI->getOperand(2);
984984 const GlobalValue *GValue = MO.getGlobal();
985985 MCSymbol *MOSymbol = getSymbol(GValue);
996996 case PPC::MFOCRF:
997997 case PPC::MFOCRF8:
998998 if (!Subtarget->hasMFOCRF()) {
999 // Transform: %R3 = MFOCRF %CR7
1000 // Into: %R3 = MFCR ;; cr7
999 // Transform: %r3 = MFOCRF %cr7
1000 // Into: %r3 = MFCR ;; cr7
10011001 unsigned NewOpcode =
10021002 MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
10031003 OutStreamer->AddComment(PPCInstPrinter::
10101010 case PPC::MTOCRF:
10111011 case PPC::MTOCRF8:
10121012 if (!Subtarget->hasMFOCRF()) {
1013 // Transform: %CR7 = MTOCRF %R3
1014 // Into: MTCRF mask, %R3 ;; cr7
1013 // Transform: %cr7 = MTOCRF %r3
1014 // Into: MTCRF mask, %r3 ;; cr7
10151015 unsigned NewOpcode =
10161016 MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8;
10171017 unsigned Mask = 0x80 >> OutContext.getRegisterInfo()
5959 /// expands to the following machine code:
6060 ///
6161 /// BB#0: derived from LLVM BB %entry
62 /// Live Ins: %F1 %F3 %X6
62 /// Live Ins: %f1 %f3 %x6
6363 ///
64 /// %vreg0 = COPY %F1; F8RC:%vreg0
64 /// %vreg0 = COPY %f1; F8RC:%vreg0
6565 /// %vreg5 = CMPLWI %vreg4, 0; CRRC:%vreg5 GPRC:%vreg4
66 /// %vreg8 = LXSDX %ZERO8, %vreg7, %RM;
66 /// %vreg8 = LXSDX %zero8, %vreg7, %rm;
6767 /// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
6868 /// BCC 76, %vreg5, ; CRRC:%vreg5
6969 /// Successors according to CFG: BB#1(?%) BB#2(?%)
8989 /// %vreg13 = PHI %vreg12, , %vreg2, ;
9090 /// F8RC:%vreg13,%vreg12,%vreg2
9191 ///
92 /// BLR8 %LR8, %RM, %F1
92 /// BLR8 %lr8, %rm, %f1
9393 ///
9494 /// When this pattern is detected, branch coalescing will try to collapse
9595 /// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3.
9797 /// If all conditions are meet, IR should collapse to:
9898 ///
9999 /// BB#0: derived from LLVM BB %entry
100 /// Live Ins: %F1 %F3 %X6
100 /// Live Ins: %f1 %f3 %x6
101101 ///
102 /// %vreg0 = COPY %F1; F8RC:%vreg0
102 /// %vreg0 = COPY %f1; F8RC:%vreg0
103103 /// %vreg5 = CMPLWI %vreg4, 0; CRRC:%vreg5 GPRC:%vreg4
104 /// %vreg8 = LXSDX %ZERO8, %vreg7, %RM;
104 /// %vreg8 = LXSDX %zero8, %vreg7, %rm;
105105 /// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
106106 ///
107107 /// BCC 76, %vreg5, ; CRRC:%vreg5
119119 /// %vreg13 = PHI %vreg12, , %vreg2, ;
120120 /// F8RC:%vreg13,%vreg12,%vreg2
121121 ///
122 /// BLR8 %LR8, %RM, %F1
122 /// BLR8 %lr8, %rm, %f1
123123 ///
124124 /// Branch Coalescing does not split blocks, it moves everything in the same
125125 /// direction ensuring it does not break use/definition semantics.
19901990 // or externally available linkage, a non-local function address, or a
19911991 // jump table address (not yet needed), or if we are generating code
19921992 // for large code model, we generate:
1993 // LDtocL(GV, ADDIStocHA(%X2, GV))
1993 // LDtocL(GV, ADDIStocHA(%x2, GV))
19941994 // Otherwise we generate:
1995 // ADDItocL(ADDIStocHA(%X2, GV), GV)
1995 // ADDItocL(ADDIStocHA(%x2, GV), GV)
19961996 // Either way, start with the ADDIStocHA:
19971997 unsigned HighPartReg = createResultReg(RC);
19981998 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
32173217
32183218 // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
32193219 // If it must be toc-referenced according to PPCSubTarget, we generate:
3220 // LDtocL(, ADDIStocHA(%X2, ))
3220 // LDtocL(, ADDIStocHA(%x2, ))
32213221 // Otherwise we generate:
3222 // ADDItocL(ADDIStocHA(%X2, ), )
3222 // ADDItocL(ADDIStocHA(%x2, ), )
32233223 SDValue GA = N->getOperand(0);
32243224 SDValue TOCbase = N->getOperand(1);
32253225 SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
261261 /// local dynamic TLS on PPC32.
262262 PPC32_PICGOT,
263263
264 /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
264 /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
265265 /// TLS model, produces an ADDIS8 instruction that adds the GOT
266266 /// base to sym\@got\@tprel\@ha.
267267 ADDIS_GOT_TPREL_HA,
280280 /// TLS sequence.
281281 ADD_TLS,
282282
283 /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
283 /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
284284 /// model, produces an ADDIS8 instruction that adds the GOT base
285285 /// register to sym\@got\@tlsgd\@ha.
286286 ADDIS_TLSGD_HA,
287287
288 /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
288 /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
289289 /// model, produces an ADDI8 instruction that adds G8RReg to
290290 /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
291291 /// ADDIS_TLSGD_L_ADDR until after register assignment.
292292 ADDI_TLSGD_L,
293293
294 /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
294 /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
295295 /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
296296 /// ADDIS_TLSGD_L_ADDR until after register assignment.
297297 GET_TLS_ADDR,
301301 /// register assignment.
302302 ADDI_TLSGD_L_ADDR,
303303
304 /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
304 /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
305305 /// model, produces an ADDIS8 instruction that adds the GOT base
306306 /// register to sym\@got\@tlsld\@ha.
307307 ADDIS_TLSLD_HA,
308308
309 /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
309 /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
310310 /// model, produces an ADDI8 instruction that adds G8RReg to
311311 /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
312312 /// ADDIS_TLSLD_L_ADDR until after register assignment.
313313 ADDI_TLSLD_L,
314314
315 /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
315 /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
316316 /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
317317 /// ADDIS_TLSLD_L_ADDR until after register assignment.
318318 GET_TLSLD_ADDR,
322322 /// following register assignment.
323323 ADDI_TLSLD_L_ADDR,
324324
325 /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
325 /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
326326 /// model, produces an ADDIS8 instruction that adds X3 to
327327 /// sym\@dtprel\@ha.
328328 ADDIS_DTPREL_HA,
23142314
23152315 // For a method return value, we check the ZExt/SExt flags in attribute.
23162316 // We assume the following code sequence for method call.
2317 // ADJCALLSTACKDOWN 32, %R1, %R1
2317 // ADJCALLSTACKDOWN 32, %r1, %r1
23182318 // BL8_NOP ,...
2319 // ADJCALLSTACKUP 32, 0, %R1, %R1
2320 // %vreg5 = COPY %X3; G8RC:%vreg5
2319 // ADJCALLSTACKUP 32, 0, %r1, %r1
2320 // %vreg5 = COPY %x3; G8RC:%vreg5
23212321 if (SrcReg == PPC::X3) {
23222322 const MachineBasicBlock *MBB = MI.getParent();
23232323 MachineBasicBlock::const_instr_iterator II =
7878 }
7979
8080 // We're looking for a sequence like this:
81 // %F0 = LFD 0, %X3, %QF0; mem:LD8[%a](tbaa=!2)
82 // %QF1 = QVESPLATI %QF0, 0, %RM
81 // %f0 = LFD 0, %x3, %qf0; mem:LD8[%a](tbaa=!2)
82 // %qf1 = QVESPLATI %qf0, 0, %rm
8383
8484 for (auto SI = Splats.begin(); SI != Splats.end();) {
8585 MachineInstr *SMI = *SI;
9191 // ...
9292 // %vreg5 = COPY %vreg9; VSLRC:%vreg5,%vreg9
9393 // %vreg5 = XSMADDADP %vreg5, %vreg17, %vreg16,
94 // %RM; VSLRC:%vreg5,%vreg17,%vreg16
94 // %rm; VSLRC:%vreg5,%vreg17,%vreg16
9595 // ...
9696 // %vreg9 = XSMADDADP %vreg9, %vreg17, %vreg19,
97 // %RM; VSLRC:%vreg9,%vreg17,%vreg19
97 // %rm; VSLRC:%vreg9,%vreg17,%vreg19
9898 // ...
9999 // Where we can eliminate the copy by changing from the A-type to the
100100 // M-type instruction. Specifically, for this example, this means:
101101 // %vreg5 = XSMADDADP %vreg5, %vreg17, %vreg16,
102 // %RM; VSLRC:%vreg5,%vreg17,%vreg16
102 // %rm; VSLRC:%vreg5,%vreg17,%vreg16
103103 // is replaced by:
104104 // %vreg16 = XSMADDMDP %vreg16, %vreg18, %vreg9,
105 // %RM; VSLRC:%vreg16,%vreg18,%vreg9
105 // %rm; VSLRC:%vreg16,%vreg18,%vreg9
106106 // and we remove: %vreg5 = COPY %vreg9; VSLRC:%vreg5,%vreg9
107107
108108 SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
149149 // walking the MIs we may as well test liveness here.
150150 //
151151 // FIXME: There is a case that occurs in practice, like this:
152 // %vreg9 = COPY %F1; VSSRC:%vreg9
152 // %vreg9 = COPY %f1; VSSRC:%vreg9
153153 // ...
154154 // %vreg6 = COPY %vreg9; VSSRC:%vreg6,%vreg9
155155 // %vreg7 = COPY %vreg9; VSSRC:%vreg7,%vreg9
305305
306306 return !(MFI.hasCalls() // has calls
307307 || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
308 || MRI.isPhysRegUsed(SP::O6) // %SP is used
309 || hasFP(MF)); // need %FP
308 || MRI.isPhysRegUsed(SP::O6) // %sp is used
309 || hasFP(MF)); // need %fp
310310 }
311311
312312 void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
435435 // Also do a forward search to handle cases where an instruction after the
436436 // compare can be converted like
437437 //
438 // LTEBRCompare %F0S, %F0S, %CC LTEBRCompare %F0S, %F0S, %CC
439 // %F2S = LER %F0S
438 // LTEBRCompare %f0s, %f0s, %cc LTEBRCompare %f0s, %f0s, %cc
439 // %f2s = LER %f0s
440440 //
441441 MBBI = Compare, MBBE = MBB.end();
442442 while (++MBBI != MBBE) {
144144
145145 cond_next140 (0xa910740, LLVM BB @0xa90beb0):
146146 %reg1078 = MOV32ri -3
147 %reg1079 = ADD32rm %reg1078, %reg1068, 1, %NOREG, 0
148 %reg1037 = MOV32rm %reg1024, 1, %NOREG, 40
147 %reg1079 = ADD32rm %reg1078, %reg1068, 1, %noreg, 0
148 %reg1037 = MOV32rm %reg1024, 1, %noreg, 40
149149 %reg1080 = IMUL32rr %reg1079, %reg1037
150 %reg1081 = MOV32rm %reg1058, 1, %NOREG, 0
150 %reg1081 = MOV32rm %reg1058, 1, %noreg, 0
151151 %reg1038 = LEA32r %reg1081, 1, %reg1080, -3
152 %reg1036 = MOV32rm %reg1024, 1, %NOREG, 32
152 %reg1036 = MOV32rm %reg1024, 1, %noreg, 32
153153 %reg1082 = SHL32ri %reg1038, 4
154154 %reg1039 = ADD32rr %reg1036, %reg1082
155 %reg1083 = MOVAPSrm %reg1059, 1, %NOREG, 0
155 %reg1083 = MOVAPSrm %reg1059, 1, %noreg, 0
156156 %reg1034 = SHUFPSrr %reg1083, %reg1083, 170
157157 %reg1032 = SHUFPSrr %reg1083, %reg1083, 0
158158 %reg1035 = SHUFPSrr %reg1083, %reg1083, 255
165165 Still ok. After register allocation:
166166
167167 cond_next140 (0xa910740, LLVM BB @0xa90beb0):
168 %EAX = MOV32ri -3
169 %EDX = MOV32rm , 1, %NOREG, 0
170 ADD32rm %EAX, %EDX, 1, %NOREG, 0
171 %EDX = MOV32rm , 1, %NOREG, 0
172 %EDX = MOV32rm %EDX, 1, %NOREG, 40
173 IMUL32rr %EAX, %EDX
174 %ESI = MOV32rm , 1, %NOREG, 0
175 %ESI = MOV32rm %ESI, 1, %NOREG, 0
176 MOV32mr , 1, %NOREG, 0, %ESI
177 %EAX = LEA32r %ESI, 1, %EAX, -3
178 %ESI = MOV32rm , 1, %NOREG, 0
179 %ESI = MOV32rm %ESI, 1, %NOREG, 32
180 %EDI = MOV32rr %EAX
181 SHL32ri %EDI, 4
182 ADD32rr %EDI, %ESI
183 %XMM0 = MOVAPSrm %ECX, 1, %NOREG, 0
184 %XMM1 = MOVAPSrr %XMM0
185 SHUFPSrr %XMM1, %XMM1, 170
186 %XMM2 = MOVAPSrr %XMM0
187 SHUFPSrr %XMM2, %XMM2, 0
188 %XMM3 = MOVAPSrr %XMM0
189 SHUFPSrr %XMM3, %XMM3, 255
190 SHUFPSrr %XMM0, %XMM0, 85
191 %EBX = MOV32rr %EDI
192 AND32ri8 %EBX, 15
193 CMP32ri8 %EBX, 0
168 %eax = MOV32ri -3
169 %edx = MOV32rm , 1, %noreg, 0
170 ADD32rm %eax, %edx, 1, %noreg, 0
171 %edx = MOV32rm , 1, %noreg, 0
172 %edx = MOV32rm %edx, 1, %noreg, 40
173 IMUL32rr %eax, %edx
174 %esi = MOV32rm , 1, %noreg, 0
175 %esi = MOV32rm %esi, 1, %noreg, 0
176 MOV32mr , 1, %noreg, 0, %esi
177 %eax = LEA32r %esi, 1, %eax, -3
178 %esi = MOV32rm , 1, %noreg, 0
179 %esi = MOV32rm %esi, 1, %noreg, 32
180 %edi = MOV32rr %eax
181 SHL32ri %edi, 4
182 ADD32rr %edi, %esi
183 %xmm0 = MOVAPSrm %ecx, 1, %noreg, 0
184 %xmm1 = MOVAPSrr %xmm0
185 SHUFPSrr %xmm1, %xmm1, 170
186 %xmm2 = MOVAPSrr %xmm0
187 SHUFPSrr %xmm2, %xmm2, 0
188 %xmm3 = MOVAPSrr %xmm0
189 SHUFPSrr %xmm3, %xmm3, 255
190 SHUFPSrr %xmm0, %xmm0, 85
191 %ebx = MOV32rr %edi
192 AND32ri8 %ebx, 15
193 CMP32ri8 %ebx, 0
194194 JE mbb
195195
196196 This looks really bad. The problem is shufps is a destructive opcode. Since it
102102
103103 Before regalloc, we have:
104104
105 %reg1025 = IMUL32rri8 %reg1024, 45, %EFLAGS
105 %reg1025 = IMUL32rri8 %reg1024, 45, %eflags
106106 JMP mbb
107107 Successors according to CFG: 0x203afb0 (#3)
108108
109109 bb1: 0x203af60, LLVM BB @0x1e02310, ID#2:
110110 Predecessors according to CFG: 0x203aec0 (#0)
111 %reg1026 = IMUL32rri8 %reg1024, 78, %EFLAGS
111 %reg1026 = IMUL32rri8 %reg1024, 78, %eflags
112112 Successors according to CFG: 0x203afb0 (#3)
113113
114114 bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3:
499499 // A SwiftError is passed in R12.
500500 CCIfSwiftError>>,
501501
502 // For Swift Calling Convention, pass sret in %RAX.
502 // For Swift Calling Convention, pass sret in %rax.
503503 CCIfCC<"CallingConv::Swift",
504504 CCIfSRet>>>,
505505
19751975 // Generate the DIV/IDIV instruction.
19761976 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
19771977 TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1978 // For i8 remainder, we can't reference AH directly, as we'll end
1979 // up with bogus copies like %R9B = COPY %AH. Reference AX
1980 // instead to prevent AH references in a REX instruction.
1978 // For i8 remainder, we can't reference ah directly, as we'll end
1979 // up with bogus copies like %r9b = COPY %ah. Reference ax
1980 // instead to prevent ah references in a rex instruction.
19811981 //
19821982 // The current assumption of the fast register allocator is that isel
19831983 // won't generate explicit references to the GR8_NOREX registers. If
188188 /// So, it handles pattern like this:
189189 ///
190190 /// BB#2: derived from LLVM BB %if.then
191 /// Live Ins: %RDI
191 /// Live Ins: %rdi
192192 /// Predecessors according to CFG: BB#0
193 /// %AX = MOV16rm %RDI, 1, %noreg, 0, %noreg, %EAX; mem:LD2[%p]
194 /// No %EAX
193 /// %ax = MOV16rm %rdi, 1, %noreg, 0, %noreg, %eax; mem:LD2[%p]
194 /// No %eax
195195 /// Successors according to CFG: BB#3(?%)
196196 ///
197197 /// BB#3: derived from LLVM BB %if.end
198 /// Live Ins: %EAX Only %AX is actually live
198 /// Live Ins: %eax Only %ax is actually live
199199 /// Predecessors according to CFG: BB#2 BB#1
200 /// %AX = KILL %AX, %EAX
201 /// RET 0, %AX
200 /// %ax = KILL %ax, %eax
201 /// RET 0, %ax
202202 static bool isLive(const MachineInstr &MI,
203203 const LivePhysRegs &LiveRegs,
204204 const TargetRegisterInfo *TRI,
515515
516516 // Push the fixed live-in registers.
517517 for (unsigned i = Bundle.FixCount; i > 0; --i) {
518 DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
518 DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %fp"
519519 << unsigned(Bundle.FixStack[i-1]) << '\n');
520520 pushReg(Bundle.FixStack[i-1]);
521521 }
892892 while (Kills && Defs) {
893893 unsigned KReg = countTrailingZeros(Kills);
894894 unsigned DReg = countTrailingZeros(Defs);
895 DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
895 DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg << "\n");
896896 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
897897 std::swap(RegMap[KReg], RegMap[DReg]);
898898 Kills &= ~(1 << KReg);
906906 unsigned KReg = getStackEntry(0);
907907 if (!(Kills & (1 << KReg)))
908908 break;
909 DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
909 DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
910910 popStackAfter(I2);
911911 Kills &= ~(1 << KReg);
912912 }
915915 // Manually kill the rest.
916916 while (Kills) {
917917 unsigned KReg = countTrailingZeros(Kills);
918 DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
918 DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
919919 freeStackSlotBefore(I, KReg);
920920 Kills &= ~(1 << KReg);
921921 }
923923 // Load zeros for all the imp-defs.
924924 while(Defs) {
925925 unsigned DReg = countTrailingZeros(Defs);
926 DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
926 DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
927927 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
928928 pushReg(DReg);
929929 Defs &= ~(1 << DReg);
1087810878 // FIXME: There are instructions which are being manually built without
1087910879 // explicit uses/defs so we also have to check the MCInstrDesc. We should be
1088010880 // able to remove the extra checks once those are fixed up. For example,
10881 // sometimes we might get something like %RAX = POP64r 1. This won't be
10881 // sometimes we might get something like %rax = POP64r 1. This won't be
1088210882 // caught by modifiesRegister or readsRegister even though the instruction
1088310883 // really ought to be formed so that modifiesRegister/readsRegister would
1088410884 // catch it.
960960 // This is an optimization that lets us get away without emitting a nop in
961961 // many cases.
962962 //
963 // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %R9) takes two
963 // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
964964 // bytes too, so the check on MinSize is important.
965965 MCI.setOpcode(X86::PUSH64rmr);
966966 } else {
295295
296296 ; Check that we correctly deal with repeated operands.
297297 ; The following testcase creates:
298 ; %D1 = FADDDrr %D0, %D0
298 ; %d1 = FADDDrr %d0, %d0
299299 ; We'll get a crash if we naively look at the first operand, remove it
300300 ; from the substitution list then look at the second operand.
301301
0 ; RUN: llc < %s -mtriple=arm64-apple-ios -verify-machineinstrs | FileCheck %s
11
22 ; LdStOpt bug created illegal instruction:
3 ; %D1, %D2 = LDPSi %X0, 1
3 ; %d1, %d2 = LDPSi %x0, 1
44 ; rdar://11512047
55
66 %0 = type opaque
99 ;
1010 ; CHECK: Before post-MI-sched:
1111 ; CHECK-LABEL: # Machine code for function test1:
12 ; CHECK: SU(2): STRWui %WZR
13 ; CHECK: SU(3): %X21, %X20 = LDPXi %SP
12 ; CHECK: SU(2): STRWui %wzr
13 ; CHECK: SU(3): %x21, %x20 = LDPXi %sp
1414 ; CHECK: Predecessors:
1515 ; CHECK-NEXT: SU(0): Out
1616 ; CHECK-NEXT: SU(0): Out
22 ; Check that the dead register definition pass is considering implicit defs.
33 ; When rematerializing through truncates, the coalescer may produce instructions
44 ; with dead defs, but live implicit-defs of subregs:
5 ; E.g. %X1 = MOVi64imm 2, %W1; %X1:GPR64, %W1:GPR32
5 ; E.g. %x1 = MOVi64imm 2, %w1; %x1:GPR64, %w1:GPR32
66 ; These instructions are live, and their definitions should not be rewritten.
77 ;
88 ;
88 ; CHECK: Successors:
99 ; CHECK-NEXT: SU(5): Data Latency=4 Reg=%vreg2
1010 ; CHECK-NEXT: SU(4): Ord Latency=0
11 ; CHECK: SU(3): STRWui %WZR, %vreg0, 0; mem:ST4[%ptr1] GPR64common:%vreg0
11 ; CHECK: SU(3): STRWui %wzr, %vreg0, 0; mem:ST4[%ptr1] GPR64common:%vreg0
1212 ; CHECK: Successors:
1313 ; CHECK: SU(4): Ord Latency=0
14 ; CHECK: SU(4): STRWui %WZR, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1
15 ; CHECK: SU(5): %W0 = COPY %vreg2; GPR32:%vreg2
14 ; CHECK: SU(4): STRWui %wzr, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1
15 ; CHECK: SU(5): %w0 = COPY %vreg2; GPR32:%vreg2
1616 ; CHECK: ** ScheduleDAGMI::schedule picking next node
1717 define i32 @misched_bug(i32* %ptr1, i32* %ptr2) {
1818 entry:
77 ; Check that no scheduling dependencies are created between the paired loads and the store during post-RA MI scheduling.
88 ;
99 ; CHECK-LABEL: # Machine code for function foo:
10 ; CHECK: SU(2): %W{{[0-9]+}}, %W{{[0-9]+}} = LDPWi
10 ; CHECK: SU(2): %w{{[0-9]+}}, %w{{[0-9]+}} = LDPWi
1111 ; CHECK: Successors:
1212 ; CHECK-NOT: ch SU(4)
1313 ; CHECK: SU(3)
14 ; CHECK: SU(4): STRWui %WZR, %X{{[0-9]+}}
14 ; CHECK: SU(4): STRWui %wzr, %x{{[0-9]+}}
1515 define i32 @foo() {
1616 entry:
1717 %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @G2, i64 0, i64 0), align 4
2121 body: |
2222 bb.0:
2323 ; CHECK: Adding MCLOH_AdrpAdrp:
24 ; CHECK-NEXT: %X1 = ADRP
25 ; CHECK-NEXT: %X1 = ADRP >
24 ; CHECK-NEXT: %x1 = ADRP >
25 ; CHECK-NEXT: %x1 = ADRP
2626 ; CHECK-NEXT: Adding MCLOH_AdrpAdrp:
27 ; CHECK-NEXT: %X1 = ADRP
28 ; CHECK-NEXT: %X1 = ADRP >
27 ; CHECK-NEXT: %x1 = ADRP >
28 ; CHECK-NEXT: %x1 = ADRP
2929 ; CHECK-NEXT: Adding MCLOH_AdrpAdrp:
30 ; CHECK-NEXT: %X0 = ADRP
31 ; CHECK-NEXT: %X0 = ADRP >
30 ; CHECK-NEXT: %x0 = ADRP >
31 ; CHECK-NEXT: %x0 = ADRP
3232 %x0 = ADRP target-flags(aarch64-page) @g0
3333 %x0 = ADRP target-flags(aarch64-page) @g1
3434 %x1 = ADRP target-flags(aarch64-page) @g2
3737
3838 bb.1:
3939 ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
40 ; CHECK-NEXT: %X20 = ADRP
41 ; CHECK-NEXT: %X3 = ADDXri %X20,
40 ; CHECK-NEXT: %x20 = ADRP
41 ; CHECK-NEXT: %x3 = ADDXri %x20,
4242 ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
43 ; CHECK-NEXT: %X1 = ADRP
44 ; CHECK-NEXT: %X1 = ADDXri %X1,
43 ; CHECK-NEXT: %x1 = ADRP
44 ; CHECK-NEXT: %x1 = ADDXri %x1,
4545 %x1 = ADRP target-flags(aarch64-page) @g0
4646 %x9 = SUBXri undef %x11, 5, 0 ; should not affect MCLOH formation
4747 %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0
7272
7373 bb.5:
7474 ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
75 ; CHECK-NEXT: %X5 = ADRP
76 ; CHECK-NEXT: %S6 = LDRSui %X5,
75 ; CHECK-NEXT: %x5 = ADRP
76 ; CHECK-NEXT: %s6 = LDRSui %x5,
7777 ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
78 ; CHECK-NEXT: %X4 = ADRP
79 ; CHECK-NEXT: %X4 = LDRXui %X4,
78 ; CHECK-NEXT: %x4 = ADRP
79 ; CHECK-NEXT: %x4 = LDRXui %x4,
8080 %x4 = ADRP target-flags(aarch64-page) @g2
8181 %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2
8282 %x5 = ADRP target-flags(aarch64-page) @g2
8484
8585 bb.6:
8686 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot:
87 ; CHECK-NEXT: %X5 = ADRP
88 ; CHECK-NEXT: %X6 = LDRXui %X5,
87 ; CHECK-NEXT: %x5 = ADRP
88 ; CHECK-NEXT: %x6 = LDRXui %x5,
8989 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot:
90 ; CHECK-NEXT: %X4 = ADRP
91 ; CHECK-NEXT: %X4 = LDRXui %X4,
90 ; CHECK-NEXT: %x4 = ADRP
91 ; CHECK-NEXT: %x4 = LDRXui %x4,
9292 %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2
9393 %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2
9494 %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2
103103
104104 bb.8:
105105 ; CHECK-NEXT: Adding MCLOH_AdrpAddLdr:
106 ; CHECK-NEXT: %X7 = ADRP [TF=1]
107 ; CHECK-NEXT: %X8 = ADDXri %X7,
108 ; CHECK-NEXT: %D1 = LDRDui %X8, 8
106 ; CHECK-NEXT: %x7 = ADRP [TF=1]
107 ; CHECK-NEXT: %x8 = ADDXri %x7,
108 ; CHECK-NEXT: %d1 = LDRDui %x8, 8
109109 %x7 = ADRP target-flags(aarch64-page) @g3
110110 %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0
111111 %d1 = LDRDui %x8, 8
112112
113113 bb.9:
114114 ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
115 ; CHECK-NEXT: %X3 = ADRP
116 ; CHECK-NEXT: %X3 = ADDXri %X3,
115 ; CHECK-NEXT: %x3 = ADRP
116 ; CHECK-NEXT: %x3 = ADDXri %x3,
117117 ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
118 ; CHECK-NEXT: %X5 = ADRP
119 ; CHECK-NEXT: %X2 = ADDXri %X5,
118 ; CHECK-NEXT: %x5 = ADRP
119 ; CHECK-NEXT: %x2 = ADDXri %x5,
120120 ; CHECK-NEXT: Adding MCLOH_AdrpAddStr:
121 ; CHECK-NEXT: %X1 = ADRP
122 ; CHECK-NEXT: %X1 = ADDXri %X1,
123 ; CHECK-NEXT: STRXui %XZR, %X1, 16
121 ; CHECK-NEXT: %x1 = ADRP
122 ; CHECK-NEXT: %x1 = ADDXri %x1,
123 ; CHECK-NEXT: STRXui %xzr, %x1, 16
124124 %x1 = ADRP target-flags(aarch64-page) @g3
125125 %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0
126126 STRXui %xzr, %x1, 16
137137
138138 bb.10:
139139 ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
140 ; CHECK-NEXT: %X2 = ADRP
141 ; CHECK-NEXT: %X2 = LDRXui %X2,
140 ; CHECK-NEXT: %x2 = ADRP
141 ; CHECK-NEXT: %x2 = LDRXui %x2,
142142 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr:
143 ; CHECK-NEXT: %X1 = ADRP
144 ; CHECK-NEXT: %X1 = LDRXui %X1,
145 ; CHECK-NEXT: %X1 = LDRXui %X1, 24
143 ; CHECK-NEXT: %x1 = ADRP
144 ; CHECK-NEXT: %x1 = LDRXui %x1,
145 ; CHECK-NEXT: %x1 = LDRXui %x1, 24
146146 %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4
147147 %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4
148148 %x1 = LDRXui %x1, 24
153153
154154 bb.11:
155155 ; CHECK-NEXT: Adding MCLOH_AdrpLdr
156 ; CHECK-NEXT: %X5 = ADRP
157 ; CHECK-NEXT: %X5 = LDRXui %X5,
156 ; CHECK-NEXT: %x5 = ADRP
157 ; CHECK-NEXT: %x5 = LDRXui %x5,
158158 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr:
159 ; CHECK-NEXT: %X1 = ADRP
160 ; CHECK-NEXT: %X1 = LDRXui %X1,
161 ; CHECK-NEXT: STRXui %XZR, %X1, 32
159 ; CHECK-NEXT: %x1 = ADRP
160 ; CHECK-NEXT: %x1 = LDRXui %x1,
161 ; CHECK-NEXT: STRXui %xzr, %x1, 32
162162 %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4
163163 %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4
164164 STRXui %xzr, %x1, 32
170170 bb.12:
171171 ; CHECK-NOT: MCLOH_AdrpAdrp
172172 ; CHECK: Adding MCLOH_AdrpAddLdr
173 ; %X9 = ADRP
174 ; %X9 = ADDXri %X9,
175 ; %X5 = LDRXui %X9, 0
173 ; %x9 = ADRP
174 ; %x9 = ADDXri %x9,
175 ; %x5 = LDRXui %x9, 0
176176 %x9 = ADRP target-flags(aarch64-page, aarch64-got) @g4
177177 %x9 = ADDXri %x9, target-flags(aarch64-pageoff, aarch64-got) @g4, 0
178178 %x5 = LDRXui %x9, 0
11
22 ; This file check a bug in MachineCopyPropagation pass. The last COPY will be
33 ; incorrectly removed if the machine instructions are as follows:
4 ; %Q5_Q6 = COPY %Q2_Q3
5 ; %D5 =
6 ; %D3 =
7 ; %D3 = COPY %D6
4 ; %q5_q6 = COPY %q2_q3
5 ; %d5 =
6 ; %d3 =
7 ; %d3 = COPY %d6
88 ; This is caused by a bug in function SourceNoLongerAvailable(), which fails to
9 ; remove the relationship of D6 and "%Q5_Q6 = COPY %Q2_Q3".
9 ; remove the relationship of D6 and "%q5_q6 = COPY %q2_q3".
1010
1111 @failed = internal unnamed_addr global i1 false
1212
2929 ; CHECK: ldr w[[REG:[0-9]+]], [sp, #8]
3030 ; CHECK-NEXT: .Ltmp
3131 call void @llvm.dbg.value(metadata i32 %.0, i64 0, metadata !15, metadata !13), !dbg !16
32 ; CHECK-NEXT: //DEBUG_VALUE: func:c <- %W[[REG]]
32 ; CHECK-NEXT: //DEBUG_VALUE: func:c <- %w[[REG]]
3333 %5 = add nsw i32 %.0, %0, !dbg !22
3434 call void @llvm.dbg.value(metadata i32 %5, i64 0, metadata !15, metadata !13), !dbg !16
3535 ret i32 %5, !dbg !23
66 # Check that the instructions are not dependent on each other, even though
77 # they all read/write to the zero register.
88 # CHECK-LABEL: MI Scheduling
9 # CHECK: SU(0): %WZR = SUBSWri %W1, 0, 0, %NZCV
9 # CHECK: SU(0): %wzr = SUBSWri %w1, 0, 0, %nzcv
1010 # CHECK: # succs left : 0
1111 # CHECK-NOT: Successors:
12 # CHECK: SU(1): %W2 = COPY %WZR
12 # CHECK: SU(1): %w2 = COPY %wzr
1313 # CHECK: # succs left : 0
1414 # CHECK-NOT: Successors:
15 # CHECK: SU(2): %WZR = SUBSWri %W3, 0, 0, %NZCV
15 # CHECK: SU(2): %wzr = SUBSWri %w3, 0, 0, %nzcv
1616 # CHECK: # succs left : 0
1717 # CHECK-NOT: Successors:
18 # CHECK: SU(3): %W4 = COPY %WZR
18 # CHECK: SU(3): %w4 = COPY %wzr
1919 # CHECK: # succs left : 0
2020 # CHECK-NOT: Successors:
2121 name: func
4444 ; %2 = load i32, i32 addrspace(1)* %in
4545 ;
4646 ; The instruction selection phase will generate ISA that looks like this:
47 ; %OQAP = LDS_READ_RET
48 ; %vreg0 = MOV %OQAP
47 ; %oqap = LDS_READ_RET
48 ; %vreg0 = MOV %oqap
4949 ; %vreg1 = VTX_READ_32
5050 ; %vreg2 = ADD_INT %vreg1, %vreg0
5151 ;
5252 ; The bottom scheduler will schedule the two ALU instructions first:
5353 ;
5454 ; UNSCHEDULED:
55 ; %OQAP = LDS_READ_RET
55 ; %oqap = LDS_READ_RET
5656 ; %vreg1 = VTX_READ_32
5757 ;
5858 ; SCHEDULED:
5959 ;
60 ; vreg0 = MOV %OQAP
60 ; vreg0 = MOV %oqap
6161 ; vreg2 = ADD_INT %vreg1, %vreg2
6262 ;
6363 ; The lack of proper aliasing results in the local memory read (LDS_READ_RET)
6666 ; final program which looks like this:
6767 ;
6868 ; Alu clause:
69 ; %OQAP = LDS_READ_RET
69 ; %oqap = LDS_READ_RET
7070 ; VTX clause:
7171 ; %vreg1 = VTX_READ_32
7272 ; Alu clause:
73 ; vreg0 = MOV %OQAP
73 ; vreg0 = MOV %oqap
7474 ; vreg2 = ADD_INT %vreg1, %vreg2
7575 ;
76 ; This is an illegal program because the OQAP def and use know occur in
76 ; This is an illegal program because the oqap def and use know occur in
7777 ; different ALU clauses.
7878 ;
7979 ; This test checks this scenario and makes sure it doesn't result in an
33 ; CHECK: s_load_dwordx2 s[4:5]
44
55 ; FIXME: Why is the SGPR4_SGPR5 reference being removed from DBG_VALUE?
6 ; CHECK: ; kill: %SGPR4_SGPR5 %SGPR4_SGPR5
6 ; CHECK: ; kill: %sgpr4_sgpr5 %sgpr4_sgpr5
77 ; CHECK-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- undef
88
99 ; CHECK: buffer_store_dword
33 # Check there is no SReg_32 pressure created by DS_* instructions because of M0 use
44
55 # CHECK: ScheduleDAGMILive::schedule starting
6 # CHECK: SU({{.*}} = DS_READ_B32 {{.*}} %M0, %EXEC
6 # CHECK: SU({{.*}} = DS_READ_B32 {{.*}} %m0, %exec
77 # CHECK: Pressure Diff : {{$}}
88 # CHECK: SU({{.*}} DS_WRITE_B32
99
33 define void @vst(i8* %m, [4 x i64] %v) {
44 entry:
55 ; CHECK: vst:
6 ; CHECK: VST1d64Q %R{{[0-9]+}}, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}
6 ; CHECK: VST1d64Q %r{{[0-9]+}}, 8, %d{{[0-9]+}}, pred:14, pred:%noreg, %q{{[0-9]+}}_q{{[0-9]+}}
77
88 %v0 = extractvalue [4 x i64] %v, 0
99 %v1 = extractvalue [4 x i64] %v, 1
3636 %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
3737 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
3838 ; CHECK: vtbx4:
39 ; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}
39 ; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %q{{[0-9]+}}_q{{[0-9]+}}
4040 %tmp1 = load <8 x i8>, <8 x i8>* %A
4141 %tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
4242 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
88 ret void
99 }
1010
11 ; CHECK: tBL pred:14, pred:%noreg, , %LR, %SP, %R4, %R4, %R12, %CPSR
11 ; CHECK: tBL pred:14, pred:%noreg, , %lr, %sp, %r4, %r4, %r12, %cpsr
1212
1010 tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, metadata !13, metadata !DIExpression()), !dbg !21
1111 tail call void @llvm.dbg.value(metadata i64 %x, metadata !14, metadata !DIExpression()), !dbg !22
1212 tail call void @llvm.dbg.value(metadata i64 %y, metadata !17, metadata !DIExpression()), !dbg !23
13 ;CHECK: @DEBUG_VALUE: foo:y <- [DW_OP_plus_uconst 8] [%R7+0]
13 ;CHECK: @DEBUG_VALUE: foo:y <- [DW_OP_plus_uconst 8] [%r7+0]
1414 tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr1, metadata !18, metadata !DIExpression()), !dbg !24
1515 tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr2, metadata !19, metadata !DIExpression()), !dbg !25
1616 %1 = icmp eq %struct.tag_s* %c, null, !dbg !26
44 ;CHECK: vadd.f32 q4, q8, q8
55 ;CHECK-NEXT: LBB0_1
66
7 ;CHECK: @DEBUG_VALUE: x <- %Q4{{$}}
8 ;CHECK-NEXT: @DEBUG_VALUE: y <- %Q4{{$}}
7 ;CHECK: @DEBUG_VALUE: x <- %q4{{$}}
8 ;CHECK-NEXT: @DEBUG_VALUE: y <- %q4{{$}}
99 ;CHECK: beq LBB0_1
1010
1111
3131 ; debug value as KILL'ed, resulting in a DEBUG_VALUE node changing codegen! (or
3232 ; hopefully, triggering an assert).
3333
34 ; CHECK: BUNDLE %ITSTATE
35 ; CHECK: * DBG_VALUE %R1, %noreg, !"u"
36 ; CHECK-NOT: * DBG_VALUE %R1, %noreg, !"u"
34 ; CHECK: BUNDLE %itstate
35 ; CHECK: * DBG_VALUE %r1, %noreg, !"u"
36 ; CHECK-NOT: * DBG_VALUE %r1, %noreg, !"u"
3737
3838 declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1
3939
310310 ; CHECK-LABEL: bpf_prog2:
311311 ; CHECK: r0 = *(u16 *)skb[12] # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
312312 ; CHECK: r0 = *(u16 *)skb[16] # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
313 ; CHECK: implicit-def: %R1
313 ; CHECK: implicit-def: %r1
314314 ; CHECK: r1 =
315315 ; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
316316 ; CHECK: call 2 # encoding: [0x85,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
160160
161161 define hidden void @thunk_undef_double(i32 %this, double %volume) unnamed_addr align 2 {
162162 ; ALL-LABEL: thunk_undef_double:
163 ; O32: # implicit-def: %A2
164 ; O32: # implicit-def: %A3
163 ; O32: # implicit-def: %a2
164 ; O32: # implicit-def: %a3
165165 ; NOT-R6C: jr $[[TGT]]
166166 ; R6C: jrc $[[TGT]]
167167
2222 ; CHECK-NEXT: cmpld 7, 4, 5
2323 ; CHECK-NEXT: mfocrf 10, 1
2424 ; CHECK-NEXT: rlwinm 10, 10, 29, 31, 31
25 ; CHECK-NEXT: # implicit-def: %X4
25 ; CHECK-NEXT: # implicit-def: %x4
2626 ; CHECK-NEXT: mr 4, 10
2727 ; CHECK-NEXT: clrldi 4, 4, 32
2828 ; CHECK-NEXT: std 4, 0(3)
99 lnext:
1010 %elementArray = load i32*, i32** %elementArrayPtr, align 8
1111 ; CHECK: lwz [[LDREG:[0-9]+]], 124(1) # 4-byte Folded Reload
12 ; CHECK: # implicit-def: %X[[TEMPREG:[0-9]+]]
12 ; CHECK: # implicit-def: %x[[TEMPREG:[0-9]+]]
1313 %element = load i32, i32* %elementArray, align 4
1414 ; CHECK: mr [[TEMPREG]], [[LDREG]]
1515 ; CHECK: clrldi 4, [[TEMPREG]], 32
1212
1313 ; Make sure that the MMO on the store has no offset from the byval
1414 ; variable itself (we used to have mem:ST8[%v+64]).
15 ; CHECK: STD %X5, 176, %X1; mem:ST8[%v](align=16)
15 ; CHECK: STD %x5, 176, %x1; mem:ST8[%v](align=16)
1616
99 ; CHECK-NEXT: xori 3, 3, 65534
1010 ; CHECK-NEXT: cntlzw 3, 3
1111 ; CHECK-NEXT: srwi 3, 3, 5
12 ; CHECK-NEXT: # implicit-def: %X4
12 ; CHECK-NEXT: # implicit-def: %x4
1313 ; CHECK-NEXT: mr 4, 3
1414 ; CHECK-NEXT: mr 3, 4
1515 ; CHECK-NEXT: blr
66 %2 = zext i32 %1 to i64
77 %3 = shl i64 %2, 48
88 %4 = ashr exact i64 %3, 48
9 ; CHECK: ANDIo8 {{[^,]+}}, 65520, %CR0;
9 ; CHECK: ANDIo8 {{[^,]+}}, 65520, %cr0;
1010 ; CHECK: CMPLDI
1111 ; CHECK: BCC
1212
13 ; CHECK: ANDIo8 {{[^,]+}}, 65520, %CR0;
14 ; CHECK: COPY %CR0
13 ; CHECK: ANDIo8 {{[^,]+}}, 65520, %cr0;
14 ; CHECK: COPY %cr0
1515 ; CHECK: BCC
1616 %5 = icmp eq i64 %4, 0
1717 br i1 %5, label %foo, label %bar
2525
2626 ; CHECK-LABEL: fn2
2727 define signext i32 @fn2(i64 %a, i64 %b) {
28 ; CHECK: OR8o {{[^, ]+}}, {{[^, ]+}}, %CR0;
29 ; CHECK: [[CREG:[^, ]+]] = COPY %CR0
28 ; CHECK: OR8o {{[^, ]+}}, {{[^, ]+}}, %cr0;
29 ; CHECK: [[CREG:[^, ]+]] = COPY %cr0
3030 ; CHECK: BCC 12, [[CREG]]
3131 %1 = or i64 %b, %a
3232 %2 = icmp sgt i64 %1, -1
4141
4242 ; CHECK-LABEL: fn3
4343 define signext i32 @fn3(i32 %a) {
44 ; CHECK: ANDIo {{[^, ]+}}, 10, %CR0;
45 ; CHECK: [[CREG:[^, ]+]] = COPY %CR0
44 ; CHECK: ANDIo {{[^, ]+}}, 10, %cr0;
45 ; CHECK: [[CREG:[^, ]+]] = COPY %cr0
4646 ; CHECK: BCC 76, [[CREG]]
4747 %1 = and i32 %a, 10
4848 %2 = icmp ne i32 %1, 0
1313
1414 ; CHECK: ********** Function: foo
1515 ; CHECK: ********** FAST REGISTER ALLOCATION **********
16 ; CHECK: %X3 = COPY %vreg
17 ; CHECK-NEXT: %X4 = COPY %vreg
16 ; CHECK: %x3 = COPY %vreg
17 ; CHECK-NEXT: %x4 = COPY %vreg
1818 ; CHECK-NEXT: BLR
0 ; Test 32-bit signed division and remainder.
11 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s
33
44 declare i32 @foo()
55
0 ; Test 32-bit unsigned division and remainder.
11 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s
33
44 declare i32 @foo()
55
0 ; Test 64-bit signed division and remainder when the divisor is
11 ; a signed-extended i32.
22 ;
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s
44
55 declare i64 @foo()
66
0 ; Testg 64-bit signed division and remainder.
11 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s
33
44 declare i64 @foo()
55
0 ; Testg 64-bit unsigned division and remainder.
11 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s
33
44 declare i64 @foo()
55
0 ; Test that divisions by constants are implemented as multiplications.
11 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s
33
44 ; Check signed 32-bit division.
55 define i32 @f1(i32 %a) {
0 ; Test high-part i64->i128 multiplications.
11 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s
33
44 declare i64 @foo()
55
0 ; Test signed high-part i64->i128 multiplications on z14.
11 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -asm-verbose=0 | FileCheck %s
33
44 declare i64 @foo()
55
99 ; CHECK-NEXT: lbh %r1, 0(%r2)
1010 ; CHECK-NEXT: ldgr %f0, %r1
1111 ; CHECK-NEXT: ldgr %f2, %r0
12 ; CHECK-NEXT: # kill: %F0S %F0S %F0D
13 ; CHECK-NEXT: # kill: %F2S %F2S %F2D
12 ; CHECK-NEXT: # kill: %f0s %f0s %f0d
13 ; CHECK-NEXT: # kill: %f2s %f2s %f2d
1414 ; CHECK-NEXT: br %r14
1515 %L17 = load <2 x i8>, <2 x i8>* %a
1616 %Se21 = sext <2 x i8> %L17 to <2 x i32>
0 ; RUN: llc < %s -verify-machineinstrs
11 ;
22 ; This test case is transformed into a single basic block by the machine
3 ; branch folding pass. That makes a complete mess of the %EFLAGS liveness, but
3 ; branch folding pass. That makes a complete mess of the %eflags liveness, but
44 ; we don't care about liveness this late anyway.
55
66 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
11 ; rdar://7842028
22
33 ; Do not delete partially dead copy instructions.
4 ; %RDI = MOV64rr %RAX, %EDI
5 ; REP_MOVSD %ECX, %EDI, %ESI, %ECX, %EDI, %ESI>
4 ; %rdi = MOV64rr %rax, %edi>
5 ; REP_MOVSD %ecx, %edi, %esi, %ecx, %edi, %esi
66
77
88 %struct.F = type { %struct.FC*, i32, i32, i8, i32, i32, i32 }
55 ;BB#5: derived from LLVM BB %bb10
66 ; Predecessors according to CFG: BB#4 BB#5
77 ; %reg1024 = MOV_Fp8080 %reg1034
8 ; %reg1025 = MUL_Fp80m32 %reg1024, %RIP, 1, %reg0, , %reg0; mem:LD4[ConstantPool]
8 ; %reg1025 = MUL_Fp80m32 %reg1024, %rip, 1, %reg0, , %reg0; mem:LD4[ConstantPool]
99 ; %reg1034 = MOV_Fp8080 %reg1025
10 ; FP_REG_KILL %FP0, %FP1, %FP2, %FP3, %FP4, %FP5, %FP6
10 ; FP_REG_KILL %fp0, %fp1, %fp2, %fp3, %fp4, %fp5, %fp6
1111 ; JMP_4
1212 ; Successors according to CFG: BB#5
1313 ;
14 ; The X86FP pass needs good kill flags, like on %FP0 representing %reg1034:
14 ; The X86FP pass needs good kill flags, like on %fp0 representing %reg1034:
1515 ;BB#5: derived from LLVM BB %bb10
1616 ; Predecessors according to CFG: BB#4 BB#5
17 ; %FP0 = LD_Fp80m , 1, %reg0, 0, %reg0; mem:LD10[FixedStack3](align=4)
18 ; %FP1 = MOV_Fp8080 %FP0
19 ; %FP2 = MUL_Fp80m32 %FP1, %RIP, 1, %reg0, , %reg0; mem:LD4[ConstantPool]
20 ; %FP0 = MOV_Fp8080 %FP2
21 ; ST_FpP80m , 1, %reg0, 0, %reg0, %FP0; mem:ST10[FixedStack3](align=4)
22 ; ST_FpP80m , 1, %reg0, 0, %reg0, %FP1; mem:ST10[FixedStack4](align=4)
23 ; ST_FpP80m , 1, %reg0, 0, %reg0, %FP2; mem:ST10[FixedStack5](align=4)
24 ; FP_REG_KILL %FP0, %FP1, %FP2, %FP3, %FP4, %FP5, %FP6
17 ; %fp0 = LD_Fp80m , 1, %reg0, 0, %reg0; mem:LD10[FixedStack3](align=4)
18 ; %fp1 = MOV_Fp8080 %fp0
19 ; %fp2 = MUL_Fp80m32 %fp1, %rip, 1, %reg0, , %reg0; mem:LD4[ConstantPool]
20 ; %fp0 = MOV_Fp8080 %fp2
21 ; ST_FpP80m , 1, %reg0, 0, %reg0, %fp0; mem:ST10[FixedStack3](align=4)
22 ; ST_FpP80m , 1, %reg0, 0, %reg0, %fp1; mem:ST10[FixedStack4](align=4)
23 ; ST_FpP80m , 1, %reg0, 0, %reg0, %fp2; mem:ST10[FixedStack5](align=4)
24 ; FP_REG_KILL %fp0, %fp1, %fp2, %fp3, %fp4, %fp5, %fp6
2525 ; JMP_4
2626 ; Successors according to CFG: BB#5
2727
4444 !18 = !DIFile(filename: "f.c", directory: "/tmp")
4545 !19 = !{}
4646
47 ;CHECK: DEBUG_VALUE: bar:x <- %E
47 ;CHECK: DEBUG_VALUE: bar:x <- %e
4848 ;CHECK: Ltmp
4949 ;CHECK: DEBUG_VALUE: foo:y <- 1{{$}}
5050 !20 = !{i32 1, !"Debug Info Version", i32 3}
1010 ; Function Attrs: noinline nounwind optsize readnone ssp
1111 define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) #0 align 2 !dbg !4 {
1212 entry:
13 ; CHECK: DEBUG_VALUE: baz:this <- %RDI{{$}}
13 ; CHECK: DEBUG_VALUE: baz:this <- %rdi{{$}}
1414 tail call void @llvm.dbg.value(metadata %struct.foo* %this, i64 0, metadata !13, metadata !16), !dbg !17
1515 tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !18, metadata !16), !dbg !17
1616 %0 = mul nsw i32 %x, 7, !dbg !19
2727 define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
2828 ; X64-LABEL: test_add_i32:
2929 ; X64: # BB#0:
30 ; X64-NEXT: # kill: %EDI %EDI %RDI
31 ; X64-NEXT: # kill: %ESI %ESI %RSI
30 ; X64-NEXT: # kill: %edi %edi %rdi
31 ; X64-NEXT: # kill: %esi %esi %rsi
3232 ; X64-NEXT: leal (%rsi,%rdi), %eax
3333 ; X64-NEXT: retq
3434 ;
4444 define i16 @test_add_i16(i16 %arg1, i16 %arg2) {
4545 ; X64-LABEL: test_add_i16:
4646 ; X64: # BB#0:
47 ; X64-NEXT: # kill: %EDI %EDI %RDI
48 ; X64-NEXT: # kill: %ESI %ESI %RSI
47 ; X64-NEXT: # kill: %edi %edi %rdi
48 ; X64-NEXT: # kill: %esi %esi %rsi
4949 ; X64-NEXT: leal (%rsi,%rdi), %eax
50 ; X64-NEXT: # kill: %AX %AX %EAX
50 ; X64-NEXT: # kill: %ax %ax %eax
5151 ; X64-NEXT: retq
5252 ;
5353 ; X32-LABEL: test_add_i16:
55 define i64 @test_zext_i1(i8 %a) {
66 ; X64-LABEL: test_zext_i1:
77 ; X64: # BB#0:
8 ; X64-NEXT: # kill: %EDI %EDI %RDI
8 ; X64-NEXT: # kill: %edi %edi %rdi
99 ; X64-NEXT: andq $1, %rdi
1010 ; X64-NEXT: movq %rdi, %rax
1111 ; X64-NEXT: retq
1212 ; X32: # BB#0:
1313 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1414 ; X32-NEXT: andb $1, %al
15 ; X32-NEXT: # kill: %AL %AL %EAX
15 ; X32-NEXT: # kill: %al %al %eax
1616 ; X32-NEXT: retl
1717 %val = trunc i32 %a to i1
1818 %r = zext i1 %val to i8
3030 ; X32: # BB#0:
3131 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3232 ; X32-NEXT: andw $1, %ax
33 ; X32-NEXT: # kill: %AX %AX %EAX
33 ; X32-NEXT: # kill: %ax %ax %eax
3434 ; X32-NEXT: retl
3535 %val = trunc i32 %a to i1
3636 %r = zext i1 %val to i16
1212 ;
1313 ; X64-LABEL: test_gep_i8:
1414 ; X64: # BB#0:
15 ; X64-NEXT: # kill: %ESI %ESI %RSI
15 ; X64-NEXT: # kill: %esi %esi %rsi
1616 ; X64-NEXT: movsbq %sil, %rax
1717 ; X64-NEXT: leaq (%rdi,%rax,4), %rax
1818 ; X64-NEXT: retq
4646 ;
4747 ; X64-LABEL: test_gep_i16:
4848 ; X64: # BB#0:
49 ; X64-NEXT: # kill: %ESI %ESI %RSI
49 ; X64-NEXT: # kill: %esi %esi %rsi
5050 ; X64-NEXT: movswq %si, %rax
5151 ; X64-NEXT: leaq (%rdi,%rax,4), %rax
5252 ; X64-NEXT: retq
99 ; CHECK: ## BB#0: ## %entry
1010 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1111 ; CHECK-NEXT: negl %eax
12 ; CHECK-NEXT: ## kill: %AL %AL %EAX
12 ; CHECK-NEXT: ## kill: %al %al %eax
1313 ; CHECK-NEXT: retl
1414 entry:
1515 %or = or i64 %argc, -4294967296
175175 ;
176176 ; X64-LINUX-LABEL: test6:
177177 ; X64-LINUX: # BB#0: # %entry
178 ; X64-LINUX-NEXT: # kill: %ESI %ESI %RSI
178 ; X64-LINUX-NEXT: # kill: %esi %esi %rsi
179179 ; X64-LINUX-NEXT: shlq $32, %rsi
180180 ; X64-LINUX-NEXT: leaq (%rsi,%rdi), %rax
181181 ; X64-LINUX-NEXT: retq
182182 ;
183183 ; X64-WIN32-LABEL: test6:
184184 ; X64-WIN32: # BB#0: # %entry
185 ; X64-WIN32-NEXT: # kill: %EDX %EDX %RDX
185 ; X64-WIN32-NEXT: # kill: %edx %edx %rdx
186186 ; X64-WIN32-NEXT: shlq $32, %rdx
187187 ; X64-WIN32-NEXT: leaq (%rdx,%rcx), %rax
188188 ; X64-WIN32-NEXT: retq
8383 define i8 @e(i32* nocapture %a, i32 %b) nounwind {
8484 ; CHECK-LABEL: e:
8585 ; CHECK: # BB#0:
86 ; CHECK-NEXT: # kill: %ESI %ESI %RSI
86 ; CHECK-NEXT: # kill: %esi %esi %rsi
8787 ; CHECK-NEXT: movl (%rdi), %ecx
8888 ; CHECK-NEXT: leal (%rsi,%rcx), %edx
8989 ; CHECK-NEXT: addl %esi, %edx
77 ; X32-LABEL: foo:
88 ; X32: # BB#0:
99 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
10 ; X32-NEXT: # kill: %EAX %EAX %AX
10 ; X32-NEXT: # kill: %eax %eax %ax
1111 ; X32-NEXT: divb {{[0-9]+}}(%esp)
1212 ; X32-NEXT: movzbl %al, %eax
1313 ; X32-NEXT: andl $1, %eax
1616 ; X64-LABEL: foo:
1717 ; X64: # BB#0:
1818 ; X64-NEXT: movzbl %dil, %eax
19 ; X64-NEXT: # kill: %EAX %EAX %AX
19 ; X64-NEXT: # kill: %eax %eax %ax
2020 ; X64-NEXT: divb %sil
2121 ; X64-NEXT: movzbl %al, %eax
2222 ; X64-NEXT: andl $1, %eax
3434 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
3535 ; X32-NEXT: xorl %edx, %edx
3636 ; X32-NEXT: divw {{[0-9]+}}(%esp)
37 ; X32-NEXT: # kill: %AX %AX %EAX
37 ; X32-NEXT: # kill: %ax %ax %eax
3838 ; X32-NEXT: andl $1, %eax
3939 ; X32-NEXT: retl
4040 ;
4343 ; X64-NEXT: xorl %edx, %edx
4444 ; X64-NEXT: movl %edi, %eax
4545 ; X64-NEXT: divw %si
46 ; X64-NEXT: # kill: %AX %AX %EAX
46 ; X64-NEXT: # kill: %ax %ax %eax
4747 ; X64-NEXT: andl $1, %eax
4848 ; X64-NEXT: retq
4949 %q = trunc i32 %p to i16
9292 ; CHECK-NEXT: movl $1, %eax
9393 ; CHECK-NEXT: lock xaddq %rax, (%rdi)
9494 ; CHECK-NEXT: shrq $63, %rax
95 ; CHECK-NEXT: # kill: %AL %AL %RAX
95 ; CHECK-NEXT: # kill: %al %al %rax
9696 ; CHECK-NEXT: retq
9797 entry:
9898 %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
88 define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
99 ; AVX-LABEL: castA:
1010 ; AVX: ## BB#0:
11 ; AVX-NEXT: ## kill: %XMM0 %XMM0 %YMM0
11 ; AVX-NEXT: ## kill: %xmm0 %xmm0 %ymm0
1212 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1313 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1414 ; AVX-NEXT: retq
1919 define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
2020 ; AVX-LABEL: castB:
2121 ; AVX: ## BB#0:
22 ; AVX-NEXT: ## kill: %XMM0 %XMM0 %YMM0
22 ; AVX-NEXT: ## kill: %xmm0 %xmm0 %ymm0
2323 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2424 ; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
2525 ; AVX-NEXT: retq
3232 define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
3333 ; AVX1-LABEL: castC:
3434 ; AVX1: ## BB#0:
35 ; AVX1-NEXT: ## kill: %XMM0 %XMM0 %YMM0
35 ; AVX1-NEXT: ## kill: %xmm0 %xmm0 %ymm0
3636 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
3737 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
3838 ; AVX1-NEXT: retq
3939 ;
4040 ; AVX2-LABEL: castC:
4141 ; AVX2: ## BB#0:
42 ; AVX2-NEXT: ## kill: %XMM0 %XMM0 %YMM0
42 ; AVX2-NEXT: ## kill: %xmm0 %xmm0 %ymm0
4343 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
4444 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4545 ; AVX2-NEXT: retq
5353 define <4 x float> @castD(<8 x float> %m) nounwind uwtable readnone ssp {
5454 ; AVX-LABEL: castD:
5555 ; AVX: ## BB#0:
56 ; AVX-NEXT: ## kill: %XMM0 %XMM0 %YMM0
56 ; AVX-NEXT: ## kill: %xmm0 %xmm0 %ymm0
5757 ; AVX-NEXT: vzeroupper
5858 ; AVX-NEXT: retq
5959 %shuffle.i = shufflevector <8 x float> %m, <8 x float> %m, <4 x i32>
6363 define <2 x i64> @castE(<4 x i64> %m) nounwind uwtable readnone ssp {
6464 ; AVX-LABEL: castE:
6565 ; AVX: ## BB#0:
66 ; AVX-NEXT: ## kill: %XMM0 %XMM0 %YMM0
66 ; AVX-NEXT: ## kill: %xmm0 %xmm0 %ymm0
6767 ; AVX-NEXT: vzeroupper
6868 ; AVX-NEXT: retq
6969 %shuffle.i = shufflevector <4 x i64> %m, <4 x i64> %m, <2 x i32>
7373 define <2 x double> @castF(<4 x double> %m) nounwind uwtable readnone ssp {
7474 ; AVX-LABEL: castF:
7575 ; AVX: ## BB#0:
76 ; AVX-NEXT: ## kill: %XMM0 %XMM0 %YMM0
76 ; AVX-NEXT: ## kill: %xmm0 %xmm0 %ymm0
7777 ; AVX-NEXT: vzeroupper
7878 ; AVX-NEXT: retq
7979 %shuffle.i = shufflevector <4 x double> %m, <4 x double> %m, <2 x i32>
196196 ; CHECK-NEXT: vcmpeqsd %xmm0, %xmm0, %xmm0
197197 ; CHECK-NEXT: vmovq %xmm0, %rax
198198 ; CHECK-NEXT: andl $1, %eax
199 ; CHECK-NEXT: # kill: %EAX %EAX %RAX
199 ; CHECK-NEXT: # kill: %eax %eax %rax
200200 ; CHECK-NEXT: retq
201201 %cmp29 = fcmp oeq double undef, 0.000000e+00
202202 %res = zext i1 %cmp29 to i32
315315 define <4 x double> @test_mm256_castpd128_pd256(<2 x double> %a0) nounwind {
316316 ; X32-LABEL: test_mm256_castpd128_pd256:
317317 ; X32: # BB#0:
318 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
318 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
319319 ; X32-NEXT: retl
320320 ;
321321 ; X64-LABEL: test_mm256_castpd128_pd256:
322322 ; X64: # BB#0:
323 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
323 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
324324 ; X64-NEXT: retq
325325 %res = shufflevector <2 x double> %a0, <2 x double> %a0, <4 x i32>
326326 ret <4 x double> %res
329329 define <2 x double> @test_mm256_castpd256_pd128(<4 x double> %a0) nounwind {
330330 ; X32-LABEL: test_mm256_castpd256_pd128:
331331 ; X32: # BB#0:
332 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
332 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
333333 ; X32-NEXT: vzeroupper
334334 ; X32-NEXT: retl
335335 ;
336336 ; X64-LABEL: test_mm256_castpd256_pd128:
337337 ; X64: # BB#0:
338 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
338 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
339339 ; X64-NEXT: vzeroupper
340340 ; X64-NEXT: retq
341341 %res = shufflevector <4 x double> %a0, <4 x double> %a0, <2 x i32>
369369 define <8 x float> @test_mm256_castps128_ps256(<4 x float> %a0) nounwind {
370370 ; X32-LABEL: test_mm256_castps128_ps256:
371371 ; X32: # BB#0:
372 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
372 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
373373 ; X32-NEXT: retl
374374 ;
375375 ; X64-LABEL: test_mm256_castps128_ps256:
376376 ; X64: # BB#0:
377 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
377 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
378378 ; X64-NEXT: retq
379379 %res = shufflevector <4 x float> %a0, <4 x float> %a0, <8 x i32>
380380 ret <8 x float> %res
383383 define <4 x float> @test_mm256_castps256_ps128(<8 x float> %a0) nounwind {
384384 ; X32-LABEL: test_mm256_castps256_ps128:
385385 ; X32: # BB#0:
386 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
386 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
387387 ; X32-NEXT: vzeroupper
388388 ; X32-NEXT: retl
389389 ;
390390 ; X64-LABEL: test_mm256_castps256_ps128:
391391 ; X64: # BB#0:
392 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
392 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
393393 ; X64-NEXT: vzeroupper
394394 ; X64-NEXT: retq
395395 %res = shufflevector <8 x float> %a0, <8 x float> %a0, <4 x i32>
399399 define <4 x i64> @test_mm256_castsi128_si256(<2 x i64> %a0) nounwind {
400400 ; X32-LABEL: test_mm256_castsi128_si256:
401401 ; X32: # BB#0:
402 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
402 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
403403 ; X32-NEXT: retl
404404 ;
405405 ; X64-LABEL: test_mm256_castsi128_si256:
406406 ; X64: # BB#0:
407 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
407 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
408408 ; X64-NEXT: retq
409409 %res = shufflevector <2 x i64> %a0, <2 x i64> %a0, <4 x i32>
410410 ret <4 x i64> %res
437437 define <2 x i64> @test_mm256_castsi256_si128(<4 x i64> %a0) nounwind {
438438 ; X32-LABEL: test_mm256_castsi256_si128:
439439 ; X32: # BB#0:
440 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
440 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
441441 ; X32-NEXT: vzeroupper
442442 ; X32-NEXT: retl
443443 ;
444444 ; X64-LABEL: test_mm256_castsi256_si128:
445445 ; X64: # BB#0:
446 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
446 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
447447 ; X64-NEXT: vzeroupper
448448 ; X64-NEXT: retq
449449 %res = shufflevector <4 x i64> %a0, <4 x i64> %a0, <2 x i32>
10421042 define <4 x double> @test_mm256_insertf128_pd(<4 x double> %a0, <2 x double> %a1) nounwind {
10431043 ; X32-LABEL: test_mm256_insertf128_pd:
10441044 ; X32: # BB#0:
1045 ; X32-NEXT: # kill: %XMM1 %XMM1 %YMM1
1045 ; X32-NEXT: # kill: %xmm1 %xmm1 %ymm1
10461046 ; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
10471047 ; X32-NEXT: retl
10481048 ;
10491049 ; X64-LABEL: test_mm256_insertf128_pd:
10501050 ; X64: # BB#0:
1051 ; X64-NEXT: # kill: %XMM1 %XMM1 %YMM1
1051 ; X64-NEXT: # kill: %xmm1 %xmm1 %ymm1
10521052 ; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
10531053 ; X64-NEXT: retq
10541054 %ext = shufflevector <2 x double> %a1, <2 x double> %a1, <4 x i32>
10741074 define <4 x i64> @test_mm256_insertf128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind {
10751075 ; X32-LABEL: test_mm256_insertf128_si256:
10761076 ; X32: # BB#0:
1077 ; X32-NEXT: # kill: %XMM1 %XMM1 %YMM1
1077 ; X32-NEXT: # kill: %xmm1 %xmm1 %ymm1
10781078 ; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
10791079 ; X32-NEXT: retl
10801080 ;
10811081 ; X64-LABEL: test_mm256_insertf128_si256:
10821082 ; X64: # BB#0:
1083 ; X64-NEXT: # kill: %XMM1 %XMM1 %YMM1
1083 ; X64-NEXT: # kill: %xmm1 %xmm1 %ymm1
10841084 ; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
10851085 ; X64-NEXT: retq
10861086 %ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32>
21872187 define <8 x float> @test_mm256_set_m128(<4 x float> %a0, <4 x float> %a1) nounwind {
21882188 ; X32-LABEL: test_mm256_set_m128:
21892189 ; X32: # BB#0:
2190 ; X32-NEXT: # kill: %XMM1 %XMM1 %YMM1
2190 ; X32-NEXT: # kill: %xmm1 %xmm1 %ymm1
21912191 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
21922192 ; X32-NEXT: retl
21932193 ;
21942194 ; X64-LABEL: test_mm256_set_m128:
21952195 ; X64: # BB#0:
2196 ; X64-NEXT: # kill: %XMM1 %XMM1 %YMM1
2196 ; X64-NEXT: # kill: %xmm1 %xmm1 %ymm1
21972197 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
21982198 ; X64-NEXT: retq
21992199 %res = shufflevector <4 x float> %a1, <4 x float> %a0, <8 x i32>
22032203 define <4 x double> @test_mm256_set_m128d(<2 x double> %a0, <2 x double> %a1) nounwind {
22042204 ; X32-LABEL: test_mm256_set_m128d:
22052205 ; X32: # BB#0:
2206 ; X32-NEXT: # kill: %XMM1 %XMM1 %YMM1
2206 ; X32-NEXT: # kill: %xmm1 %xmm1 %ymm1
22072207 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
22082208 ; X32-NEXT: retl
22092209 ;
22102210 ; X64-LABEL: test_mm256_set_m128d:
22112211 ; X64: # BB#0:
2212 ; X64-NEXT: # kill: %XMM1 %XMM1 %YMM1
2212 ; X64-NEXT: # kill: %xmm1 %xmm1 %ymm1
22132213 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
22142214 ; X64-NEXT: retq
22152215 %arg0 = bitcast <2 x double> %a0 to <4 x float>
22222222 define <4 x i64> @test_mm256_set_m128i(<2 x i64> %a0, <2 x i64> %a1) nounwind {
22232223 ; X32-LABEL: test_mm256_set_m128i:
22242224 ; X32: # BB#0:
2225 ; X32-NEXT: # kill: %XMM1 %XMM1 %YMM1
2225 ; X32-NEXT: # kill: %xmm1 %xmm1 %ymm1
22262226 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
22272227 ; X32-NEXT: retl
22282228 ;
22292229 ; X64-LABEL: test_mm256_set_m128i:
22302230 ; X64: # BB#0:
2231 ; X64-NEXT: # kill: %XMM1 %XMM1 %YMM1
2231 ; X64-NEXT: # kill: %xmm1 %xmm1 %ymm1
22322232 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
22332233 ; X64-NEXT: retq
22342234 %arg0 = bitcast <2 x i64> %a0 to <4 x float>
28242824 define <8 x float> @test_mm256_setr_m128(<4 x float> %a0, <4 x float> %a1) nounwind {
28252825 ; X32-LABEL: test_mm256_setr_m128:
28262826 ; X32: # BB#0:
2827 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
2827 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
28282828 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
28292829 ; X32-NEXT: retl
28302830 ;
28312831 ; X64-LABEL: test_mm256_setr_m128:
28322832 ; X64: # BB#0:
2833 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
2833 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
28342834 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
28352835 ; X64-NEXT: retq
28362836 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32>
28402840 define <4 x double> @test_mm256_setr_m128d(<2 x double> %a0, <2 x double> %a1) nounwind {
28412841 ; X32-LABEL: test_mm256_setr_m128d:
28422842 ; X32: # BB#0:
2843 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
2843 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
28442844 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
28452845 ; X32-NEXT: retl
28462846 ;
28472847 ; X64-LABEL: test_mm256_setr_m128d:
28482848 ; X64: # BB#0:
2849 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
2849 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
28502850 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
28512851 ; X64-NEXT: retq
28522852 %arg0 = bitcast <2 x double> %a0 to <4 x float>
28592859 define <4 x i64> @test_mm256_setr_m128i(<2 x i64> %a0, <2 x i64> %a1) nounwind {
28602860 ; X32-LABEL: test_mm256_setr_m128i:
28612861 ; X32: # BB#0:
2862 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
2862 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
28632863 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
28642864 ; X32-NEXT: retl
28652865 ;
28662866 ; X64-LABEL: test_mm256_setr_m128i:
28672867 ; X64: # BB#0:
2868 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
2868 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
28692869 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
28702870 ; X64-NEXT: retq
28712871 %arg0 = bitcast <2 x i64> %a0 to <4 x float>
3838 define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
3939 ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
4040 ; CHECK: # BB#0:
41 ; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1
41 ; CHECK-NEXT: # kill: %xmm1 %xmm1 %ymm1
4242 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
4343 ; CHECK-NEXT: ret{{[l|q]}}
4444 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
8787 define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
8888 ; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
8989 ; CHECK: # BB#0:
90 ; CHECK-NEXT: # kill: %XMM0 %XMM0 %YMM0
90 ; CHECK-NEXT: # kill: %xmm0 %xmm0 %ymm0
9191 ; CHECK-NEXT: vzeroupper
9292 ; CHECK-NEXT: ret{{[l|q]}}
9393 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
8484 ; CHECK_O0-LABEL: mov00:
8585 ; CHECK_O0: # BB#0:
8686 ; CHECK_O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
87 ; CHECK_O0-NEXT: # implicit-def: %YMM1
87 ; CHECK_O0-NEXT: # implicit-def: %ymm1
8888 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
8989 ; CHECK_O0-NEXT: vxorps %xmm2, %xmm2, %xmm2
9090 ; CHECK_O0-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm2[1,2,3,4,5,6,7]
103103 ; CHECK_O0-LABEL: mov01:
104104 ; CHECK_O0: # BB#0:
105105 ; CHECK_O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
106 ; CHECK_O0-NEXT: # implicit-def: %YMM1
106 ; CHECK_O0-NEXT: # implicit-def: %ymm1
107107 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
108108 ; CHECK_O0-NEXT: vxorps %xmm2, %xmm2, %xmm2
109109 ; CHECK_O0-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm2[1,2,3]
120120 ;
121121 ; CHECK_O0-LABEL: storev16i16:
122122 ; CHECK_O0: # BB#0:
123 ; CHECK_O0-NEXT: # implicit-def: %RAX
123 ; CHECK_O0-NEXT: # implicit-def: %rax
124124 ; CHECK_O0-NEXT: vmovdqa %ymm0, (%rax)
125125 store <16 x i16> %a, <16 x i16>* undef, align 32
126126 unreachable
134134 ;
135135 ; CHECK_O0-LABEL: storev16i16_01:
136136 ; CHECK_O0: # BB#0:
137 ; CHECK_O0-NEXT: # implicit-def: %RAX
137 ; CHECK_O0-NEXT: # implicit-def: %rax
138138 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rax)
139139 store <16 x i16> %a, <16 x i16>* undef, align 4
140140 unreachable
147147 ;
148148 ; CHECK_O0-LABEL: storev32i8:
149149 ; CHECK_O0: # BB#0:
150 ; CHECK_O0-NEXT: # implicit-def: %RAX
150 ; CHECK_O0-NEXT: # implicit-def: %rax
151151 ; CHECK_O0-NEXT: vmovdqa %ymm0, (%rax)
152152 store <32 x i8> %a, <32 x i8>* undef, align 32
153153 unreachable
161161 ;
162162 ; CHECK_O0-LABEL: storev32i8_01:
163163 ; CHECK_O0: # BB#0:
164 ; CHECK_O0-NEXT: # implicit-def: %RAX
164 ; CHECK_O0-NEXT: # implicit-def: %rax
165165 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rax)
166166 store <32 x i8> %a, <32 x i8>* undef, align 4
167167 unreachable
168168 }
169169
170 ; It is faster to make two saves, if the data is already in XMM registers. For
170 ; It is faster to make two saves, if the data is already in xmm registers. For
171171 ; example, after making an integer operation.
172172 define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
173173 ; CHECK-LABEL: double_save:
178178 ;
179179 ; CHECK_O0-LABEL: double_save:
180180 ; CHECK_O0: # BB#0:
181 ; CHECK_O0-NEXT: # implicit-def: %YMM2
181 ; CHECK_O0-NEXT: # implicit-def: %ymm2
182182 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
183183 ; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
184184 ; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
210210 ;
211211 ; CHECK_O0-LABEL: f_f:
212212 ; CHECK_O0: # BB#0: # %allocas
213 ; CHECK_O0-NEXT: # implicit-def: %AL
213 ; CHECK_O0-NEXT: # implicit-def: %al
214214 ; CHECK_O0-NEXT: testb $1, %al
215215 ; CHECK_O0-NEXT: jne .LBB8_1
216216 ; CHECK_O0-NEXT: jmp .LBB8_2
217217 ; CHECK_O0-NEXT: .LBB8_1: # %cif_mask_all
218218 ; CHECK_O0-NEXT: .LBB8_2: # %cif_mask_mixed
219 ; CHECK_O0-NEXT: # implicit-def: %AL
219 ; CHECK_O0-NEXT: # implicit-def: %al
220220 ; CHECK_O0-NEXT: testb $1, %al
221221 ; CHECK_O0-NEXT: jne .LBB8_3
222222 ; CHECK_O0-NEXT: jmp .LBB8_4
224224 ; CHECK_O0-NEXT: movl $-1, %eax
225225 ; CHECK_O0-NEXT: vmovd %eax, %xmm0
226226 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
227 ; CHECK_O0-NEXT: # implicit-def: %RCX
228 ; CHECK_O0-NEXT: # implicit-def: %YMM2
227 ; CHECK_O0-NEXT: # implicit-def: %rcx
228 ; CHECK_O0-NEXT: # implicit-def: %ymm2
229229 ; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rcx)
230230 ; CHECK_O0-NEXT: .LBB8_4: # %cif_mixed_test_any_check
231231 allocas:
258258 ; CHECK_O0: # BB#0:
259259 ; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm0
260260 ; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1
261 ; CHECK_O0-NEXT: # implicit-def: %YMM2
261 ; CHECK_O0-NEXT: # implicit-def: %ymm2
262262 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
263263 ; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
264264 ; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
303303 ; CHECK_O0: # BB#0:
304304 ; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm0
305305 ; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1
306 ; CHECK_O0-NEXT: # implicit-def: %YMM2
306 ; CHECK_O0-NEXT: # implicit-def: %ymm2
307307 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
308308 ; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
309309 ; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
6060 ; CHECK: # BB#0: # %for_exit499
6161 ; CHECK-NEXT: xorl %eax, %eax
6262 ; CHECK-NEXT: testb %al, %al
63 ; CHECK-NEXT: # implicit-def: %YMM0
63 ; CHECK-NEXT: # implicit-def: %ymm0
6464 ; CHECK-NEXT: jne .LBB4_2
6565 ; CHECK-NEXT: # BB#1: # %load.i1247
6666 ; CHECK-NEXT: pushq %rbp
7474 define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
7575 ; CHECK-LABEL: insert_undef_pd:
7676 ; CHECK: # BB#0:
77 ; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1
77 ; CHECK-NEXT: # kill: %xmm1 %xmm1 %ymm1
7878 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
7979 ; CHECK-NEXT: retq
8080 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
8585 define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
8686 ; CHECK-LABEL: insert_undef_ps:
8787 ; CHECK: # BB#0:
88 ; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1
88 ; CHECK-NEXT: # kill: %xmm1 %xmm1 %ymm1
8989 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
9090 ; CHECK-NEXT: retq
9191 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
9696 define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
9797 ; CHECK-LABEL: insert_undef_si:
9898 ; CHECK: # BB#0:
99 ; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1
99 ; CHECK-NEXT: # kill: %xmm1 %xmm1 %ymm1
100100 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
101101 ; CHECK-NEXT: retq
102102 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX
22 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX512
3 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx,+fast-partial-ymm-or-zmm-write | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=FAST-YMM-ZMM
3 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx,+fast-partial-ymm-or-zmm-write | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=FAST-ymm-zmm
44 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BTVER2
55
66 declare i32 @foo()
8181 ; VZ-LABEL: test02:
8282 ; VZ: # BB#0:
8383 ; VZ-NEXT: vaddps %ymm1, %ymm0, %ymm0
84 ; VZ-NEXT: # kill: %XMM0 %XMM0 %YMM0
84 ; VZ-NEXT: # kill: %xmm0 %xmm0 %ymm0
8585 ; VZ-NEXT: vzeroupper
8686 ; VZ-NEXT: jmp do_sse # TAILCALL
8787 ;
8888 ; NO-VZ-LABEL: test02:
8989 ; NO-VZ: # BB#0:
9090 ; NO-VZ-NEXT: vaddps %ymm1, %ymm0, %ymm0
91 ; NO-VZ-NEXT: # kill: %XMM0 %XMM0 %YMM0
91 ; NO-VZ-NEXT: # kill: %xmm0 %xmm0 %ymm0
9292 ; NO-VZ-NEXT: jmp do_sse # TAILCALL
9393 %add.i = fadd <8 x float> %a, %b
9494 %add.low = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %add.i, i8 0)
221221 ; VZ-LABEL: test04:
222222 ; VZ: # BB#0:
223223 ; VZ-NEXT: pushq %rax
224 ; VZ-NEXT: # kill: %XMM0 %XMM0 %YMM0
224 ; VZ-NEXT: # kill: %xmm0 %xmm0 %ymm0
225225 ; VZ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
226226 ; VZ-NEXT: callq do_avx
227 ; VZ-NEXT: # kill: %XMM0 %XMM0 %YMM0
227 ; VZ-NEXT: # kill: %xmm0 %xmm0 %ymm0
228228 ; VZ-NEXT: popq %rax
229229 ; VZ-NEXT: vzeroupper
230230 ; VZ-NEXT: retq
232232 ; NO-VZ-LABEL: test04:
233233 ; NO-VZ: # BB#0:
234234 ; NO-VZ-NEXT: pushq %rax
235 ; NO-VZ-NEXT: # kill: %XMM0 %XMM0 %YMM0
235 ; NO-VZ-NEXT: # kill: %xmm0 %xmm0 %ymm0
236236 ; NO-VZ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
237237 ; NO-VZ-NEXT: callq do_avx
238 ; NO-VZ-NEXT: # kill: %XMM0 %XMM0 %YMM0
238 ; NO-VZ-NEXT: # kill: %xmm0 %xmm0 %ymm0
239239 ; NO-VZ-NEXT: popq %rax
240240 ; NO-VZ-NEXT: retq
241241 %shuf = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32>
66 ; X32: # BB#0:
77 ; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
88 ; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
9 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
9 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
1010 ; X32-NEXT: vzeroupper
1111 ; X32-NEXT: retl
1212 ;
1414 ; X64: # BB#0:
1515 ; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1616 ; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
17 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
17 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
1818 ; X64-NEXT: vzeroupper
1919 ; X64-NEXT: retq
2020 %B = trunc <4 x i64> %A to <4 x i32>
2626 ; X32: # BB#0:
2727 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2828 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
29 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
29 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
3030 ; X32-NEXT: vzeroupper
3131 ; X32-NEXT: retl
3232 ;
3434 ; X64: # BB#0:
3535 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
3636 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
37 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
37 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
3838 ; X64-NEXT: vzeroupper
3939 ; X64-NEXT: retq
4040 %B = trunc <8 x i32> %A to <8 x i16>
354354 define <4 x i64> @test_mm256_broadcastsi128_si256(<2 x i64> %a0) {
355355 ; CHECK-LABEL: test_mm256_broadcastsi128_si256:
356356 ; CHECK: # BB#0:
357 ; CHECK-NEXT: # kill: %XMM0 %XMM0 %YMM0
357 ; CHECK-NEXT: # kill: %xmm0 %xmm0 %ymm0
358358 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
359359 ; CHECK-NEXT: ret{{[l|q]}}
360360 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32>
14461446 define <4 x i64> @test0_mm256_inserti128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind {
14471447 ; CHECK-LABEL: test0_mm256_inserti128_si256:
14481448 ; CHECK: # BB#0:
1449 ; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1
1449 ; CHECK-NEXT: # kill: %xmm1 %xmm1 %ymm1
14501450 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
14511451 ; CHECK-NEXT: ret{{[l|q]}}
14521452 %ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32>
2929 ; NOGATHER: # BB#0: # %entry
3030 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
3131 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
32 ; NOGATHER-NEXT: # implicit-def: %XMM2
32 ; NOGATHER-NEXT: # implicit-def: %xmm2
3333 ; NOGATHER-NEXT: testb $1, %al
3434 ; NOGATHER-NEXT: je .LBB0_2
3535 ; NOGATHER-NEXT: # BB#1: # %cond.load
7979 ; NOGATHER: # BB#0: # %entry
8080 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
8181 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
82 ; NOGATHER-NEXT: # implicit-def: %XMM2
82 ; NOGATHER-NEXT: # implicit-def: %xmm2
8383 ; NOGATHER-NEXT: testb $1, %al
8484 ; NOGATHER-NEXT: je .LBB1_2
8585 ; NOGATHER-NEXT: # BB#1: # %cond.load
130130 ; NOGATHER: # BB#0: # %entry
131131 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
132132 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
133 ; NOGATHER-NEXT: # implicit-def: %XMM2
133 ; NOGATHER-NEXT: # implicit-def: %xmm2
134134 ; NOGATHER-NEXT: testb $1, %al
135135 ; NOGATHER-NEXT: je .LBB2_2
136136 ; NOGATHER-NEXT: # BB#1: # %cond.load
177177 ; NOGATHER: # BB#0: # %entry
178178 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
179179 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
180 ; NOGATHER-NEXT: # implicit-def: %XMM2
180 ; NOGATHER-NEXT: # implicit-def: %xmm2
181181 ; NOGATHER-NEXT: testb $1, %al
182182 ; NOGATHER-NEXT: je .LBB3_2
183183 ; NOGATHER-NEXT: # BB#1: # %cond.load
222222 ; NOGATHER-LABEL: masked_gather_v4i32:
223223 ; NOGATHER: # BB#0: # %entry
224224 ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
225 ; NOGATHER-NEXT: # implicit-def: %XMM3
225 ; NOGATHER-NEXT: # implicit-def: %xmm3
226226 ; NOGATHER-NEXT: testb $1, %al
227227 ; NOGATHER-NEXT: je .LBB4_2
228228 ; NOGATHER-NEXT: # BB#1: # %cond.load
280280 ; NOGATHER-LABEL: masked_gather_v4float:
281281 ; NOGATHER: # BB#0: # %entry
282282 ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
283 ; NOGATHER-NEXT: # implicit-def: %XMM3
283 ; NOGATHER-NEXT: # implicit-def: %xmm3
284284 ; NOGATHER-NEXT: testb $1, %al
285285 ; NOGATHER-NEXT: je .LBB5_2
286286 ; NOGATHER-NEXT: # BB#1: # %cond.load
350350 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4
351351 ; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3
352352 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
353 ; NOGATHER-NEXT: # implicit-def: %YMM2
353 ; NOGATHER-NEXT: # implicit-def: %ymm2
354354 ; NOGATHER-NEXT: testb $1, %al
355355 ; NOGATHER-NEXT: je .LBB6_2
356356 ; NOGATHER-NEXT: # BB#1: # %cond.load
465465 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4
466466 ; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3
467467 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
468 ; NOGATHER-NEXT: # implicit-def: %YMM2
468 ; NOGATHER-NEXT: # implicit-def: %ymm2
469469 ; NOGATHER-NEXT: testb $1, %al
470470 ; NOGATHER-NEXT: je .LBB7_2
471471 ; NOGATHER-NEXT: # BB#1: # %cond.load
578578 ; NOGATHER: # BB#0: # %entry
579579 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
580580 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
581 ; NOGATHER-NEXT: # implicit-def: %YMM2
581 ; NOGATHER-NEXT: # implicit-def: %ymm2
582582 ; NOGATHER-NEXT: testb $1, %al
583583 ; NOGATHER-NEXT: je .LBB8_2
584584 ; NOGATHER-NEXT: # BB#1: # %cond.load
655655 ; NOGATHER: # BB#0: # %entry
656656 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
657657 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
658 ; NOGATHER-NEXT: # implicit-def: %YMM2
658 ; NOGATHER-NEXT: # implicit-def: %ymm2
659659 ; NOGATHER-NEXT: testb $1, %al
660660 ; NOGATHER-NEXT: je .LBB9_2
661661 ; NOGATHER-NEXT: # BB#1: # %cond.load
726726 ; NOGATHER: # BB#0: # %entry
727727 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
728728 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
729 ; NOGATHER-NEXT: # implicit-def: %XMM2
729 ; NOGATHER-NEXT: # implicit-def: %xmm2
730730 ; NOGATHER-NEXT: testb $1, %al
731731 ; NOGATHER-NEXT: je .LBB10_2
732732 ; NOGATHER-NEXT: # BB#1: # %cond.load
771771 ; NOGATHER: # BB#0: # %entry
772772 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
773773 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
774 ; NOGATHER-NEXT: # implicit-def: %XMM2
774 ; NOGATHER-NEXT: # implicit-def: %xmm2
775775 ; NOGATHER-NEXT: testb $1, %al
776776 ; NOGATHER-NEXT: je .LBB11_2
777777 ; NOGATHER-NEXT: # BB#1: # %cond.load
531531 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
532532 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
533533 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
534 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
534 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
535535 ; X32-NEXT: vzeroupper
536536 ; X32-NEXT: retl
537537 ;
542542 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
543543 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
544544 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
545 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
545 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
546546 ; X64-NEXT: vzeroupper
547547 ; X64-NEXT: retq
548548 %res = shl <8 x i16> %lhs, %rhs
581581 ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
582582 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
583583 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
584 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
584 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
585585 ; X32-NEXT: vzeroupper
586586 ; X32-NEXT: retl
587587 ;
592592 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
593593 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
594594 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
595 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
595 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
596596 ; X64-NEXT: vzeroupper
597597 ; X64-NEXT: retq
598598 %res = lshr <8 x i16> %lhs, %rhs
408408 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
409409 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
410410 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
411 ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0
411 ; X32-NEXT: # kill: %xmm0 %xmm0 %ymm0
412412 ; X32-NEXT: vzeroupper
413413 ; X32-NEXT: retl
414414 ;
419419 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
420420 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
421421 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
422 ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0
422 ; X64-NEXT: # kill: %xmm0 %xmm0 %ymm0
423423 ; X64-NEXT: vzeroupper
424424 ; X64-NEXT: retq
425425 %shl = shl <8 x i16> %r, %a