llvm.org GIT mirror llvm / d0e875c
[Power9] Part-word VSX integer scalar loads/stores and sign extend instructions This patch corresponds to review: https://reviews.llvm.org/D23155 This patch removes the VSHRC register class (based on D20310) and adds exploitation of the Power9 sub-word integer loads into VSX registers as well as vector sign extensions. The new instructions are useful for a few purposes: Int to Fp conversions of 1 or 2-byte values loaded from memory Building vectors of 1 or 2-byte integers with values loaded from memory Storing individual 1 or 2-byte elements from integer vectors This patch implements all of those uses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283190 91177308-0d34-0410-b5e6-96231b3b80d8 Nemanja Ivanovic 3 years ago
33 changed file(s) with 1893 addition(s) and 399 deletion(s). Raw diff Collapse all Expand all
8282 PPC::F24, PPC::F25, PPC::F26, PPC::F27,
8383 PPC::F28, PPC::F29, PPC::F30, PPC::F31
8484 };
85 static const MCPhysReg VFRegs[32] = {
86 PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
87 PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
88 PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
89 PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
90 PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
91 PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
92 PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
93 PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
94 };
8595 static const MCPhysReg VRegs[32] = {
8696 PPC::V0, PPC::V1, PPC::V2, PPC::V3,
8797 PPC::V4, PPC::V5, PPC::V6, PPC::V7,
102112 PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
103113 PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
104114
105 PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
106 PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
107 PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
108 PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
109 PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
110 PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
111 PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
112 PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
115 PPC::V0, PPC::V1, PPC::V2, PPC::V3,
116 PPC::V4, PPC::V5, PPC::V6, PPC::V7,
117 PPC::V8, PPC::V9, PPC::V10, PPC::V11,
118 PPC::V12, PPC::V13, PPC::V14, PPC::V15,
119 PPC::V16, PPC::V17, PPC::V18, PPC::V19,
120 PPC::V20, PPC::V21, PPC::V22, PPC::V23,
121 PPC::V24, PPC::V25, PPC::V26, PPC::V27,
122 PPC::V28, PPC::V29, PPC::V30, PPC::V31
113123 };
114124 static const MCPhysReg VSFRegs[64] = {
115125 PPC::F0, PPC::F1, PPC::F2, PPC::F3,
594604 void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
595605 assert(N == 1 && "Invalid number of operands!");
596606 Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
607 }
608
609 void addRegVFRCOperands(MCInst &Inst, unsigned N) const {
610 assert(N == 1 && "Invalid number of operands!");
611 Inst.addOperand(MCOperand::createReg(VFRegs[getReg()]));
597612 }
598613
599614 void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
8888 PPC::F28, PPC::F29, PPC::F30, PPC::F31
8989 };
9090
91 static const unsigned VFRegs[] = {
92 PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
93 PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
94 PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
95 PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
96 PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
97 PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
98 PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
99 PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
100 };
101
91102 static const unsigned VRegs[] = {
92103 PPC::V0, PPC::V1, PPC::V2, PPC::V3,
93104 PPC::V4, PPC::V5, PPC::V6, PPC::V7,
109120 PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
110121 PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
111122
112 PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
113 PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
114 PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
115 PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
116 PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
117 PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
118 PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
119 PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
123 PPC::V0, PPC::V1, PPC::V2, PPC::V3,
124 PPC::V4, PPC::V5, PPC::V6, PPC::V7,
125 PPC::V8, PPC::V9, PPC::V10, PPC::V11,
126 PPC::V12, PPC::V13, PPC::V14, PPC::V15,
127 PPC::V16, PPC::V17, PPC::V18, PPC::V19,
128 PPC::V20, PPC::V21, PPC::V22, PPC::V23,
129 PPC::V24, PPC::V25, PPC::V26, PPC::V27,
130 PPC::V28, PPC::V29, PPC::V30, PPC::V31
120131 };
121132
122133 static const unsigned VSFRegs[] = {
239250 uint64_t Address,
240251 const void *Decoder) {
241252 return decodeRegisterClass(Inst, RegNo, FRegs);
253 }
254
255 static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
256 uint64_t Address,
257 const void *Decoder) {
258 return decodeRegisterClass(Inst, RegNo, VFRegs);
242259 }
243260
244261 static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
1111 //===----------------------------------------------------------------------===//
1212
1313 #include "PPCInstPrinter.h"
14 #include "PPCInstrInfo.h"
1415 #include "MCTargetDesc/PPCMCTargetDesc.h"
1516 #include "MCTargetDesc/PPCPredicates.h"
1617 #include "llvm/MC/MCExpr.h"
446447 /// stripRegisterPrefix - This method strips the character prefix from a
447448 /// register name so that only the number is left. Used by for linux asm.
448449 static const char *stripRegisterPrefix(const char *RegName) {
449 if (FullRegNames)
450 if (FullRegNames || ShowVSRNumsAsVR)
450451 return RegName;
451452
452453 switch (RegName[0]) {
467468 raw_ostream &O) {
468469 const MCOperand &Op = MI->getOperand(OpNo);
469470 if (Op.isReg()) {
470 const char *RegName = getRegisterName(Op.getReg());
471 if (ShowVSRNumsAsVR) {
472 unsigned RegNum = Op.getReg();
473 if (RegNum >= PPC::VSH0 && RegNum <= PPC::VSH31)
474 O << 'v' << RegNum - PPC::VSH0;
475 else
476 O << RegName;
477 return;
478 }
471 unsigned Reg = Op.getReg();
472
473 // There are VSX instructions that use VSX register numbering (vs0 - vs63)
474 // as well as those that use VMX register numbering (v0 - v31 which
475 // correspond to vs32 - vs63). If we have an instruction that uses VSX
476 // numbering, we need to convert the VMX registers to VSX registers.
477 // Namely, we print 32-63 when the instruction operates on one of the
478 // VMX registers.
479 // (Please synchronize with PPCAsmPrinter::printOperand)
480 if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) &&
481 !ShowVSRNumsAsVR) {
482 if (PPCInstrInfo::isVRRegister(Reg))
483 Reg = PPC::VSX32 + (Reg - PPC::V0);
484 else if (PPCInstrInfo::isVFRegister(Reg))
485 Reg = PPC::VSX32 + (Reg - PPC::VF0);
486 }
487
488 const char *RegName = getRegisterName(Reg);
479489 // The linux and AIX assembler does not take register prefixes.
480490 if (!isDarwinSyntax())
481491 RegName = stripRegisterPrefix(RegName);
1010 //
1111 //===----------------------------------------------------------------------===//
1212
13 #include "PPCInstrInfo.h"
1314 #include "MCTargetDesc/PPCMCTargetDesc.h"
1415 #include "MCTargetDesc/PPCFixupKinds.h"
1516 #include "llvm/ADT/Statistic.h"
349350 return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
350351 }
351352
352
353353 unsigned PPCMCCodeEmitter::
354354 getMachineOpValue(const MCInst &MI, const MCOperand &MO,
355355 SmallVectorImpl &Fixups,
360360 assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
361361 MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
362362 MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
363 return CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
363 unsigned Reg = MO.getReg();
364 unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg);
365
366 if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg))
367 if (PPCInstrInfo::isVRRegister(Reg))
368 Encode += 32;
369
370 return Encode;
364371 }
365372
366373 assert(MO.isImm() &&
166166
167167 switch (MO.getType()) {
168168 case MachineOperand::MO_Register: {
169 const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
169 unsigned Reg = MO.getReg();
170
171 // There are VSX instructions that use VSX register numbering (vs0 - vs63)
172 // as well as those that use VMX register numbering (v0 - v31 which
173 // correspond to vs32 - vs63). If we have an instruction that uses VSX
174 // numbering, we need to convert the VMX registers to VSX registers.
175 // Namely, we print 32-63 when the instruction operates on one of the
176 // VMX registers.
177 // (Please synchronize with PPCInstPrinter::printOperand)
178 if (MI->getDesc().TSFlags & PPCII::UseVSXReg) {
179 if (PPCInstrInfo::isVRRegister(Reg))
180 Reg = PPC::VSX32 + (Reg - PPC::V0);
181 else if (PPCInstrInfo::isVFRegister(Reg))
182 Reg = PPC::VSX32 + (Reg - PPC::VF0);
183 }
184 const char *RegName = PPCInstPrinter::getRegisterName(Reg);
185
170186 // Linux assembler (Others?) does not take register mnemonics.
171187 // FIXME - What about special registers used in mfspr/mtspr?
172188 if (!Subtarget->isDarwin())
6767
6868 // Vector types returned as "direct" go into V2 .. V9; note that only the
6969 // ELFv2 ABI fully utilizes all these registers.
70 CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
70 CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
7171 CCIfSubtarget<"hasAltivec()",
72 CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
73 CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
74 CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
72 CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
7573 ]>;
7674
7775 // No explicit register is specified for the AnyReg calling convention. The
120118 CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
121119 CCIfType<[v4f64, v4f32, v4i1],
122120 CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
123 CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
121 CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
124122 CCIfSubtarget<"hasAltivec()",
125 CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
126 CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
127 CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
123 CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
128124 ]>;
129125
130126 //===----------------------------------------------------------------------===//
192188 CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
193189
194190 // The first 12 Vector arguments are passed in AltiVec registers.
195 CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
191 CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
196192 CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
197193 V8, V9, V10, V11, V12, V13]>>>,
198 CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
199 CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
200 VSH10, VSH11, VSH12, VSH13]>>>,
201194
202195 CCDelegateTo
203196 ]>;
286279 (sequence "V%u", 0, 31))>;
287280
288281 def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
289 (sequence "VSL%u", 0, 31),
290 (sequence "VSH%u", 0, 31))>;
291
282 (sequence "VSL%u", 0, 31))>;
283
684684 }
685685
686686 if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
687 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Legal);
687 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
688688 }
689689
690690 if (Subtarget.hasQPX()) {
10741074 case PPCISD::STBRX: return "PPCISD::STBRX";
10751075 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
10761076 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1077 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1078 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1079 case PPCISD::VEXTS: return "PPCISD::VEXTS";
10771080 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
10781081 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
10791082 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
29852988 break;
29862989 case MVT::v2f64:
29872990 case MVT::v2i64:
2988 RC = &PPC::VSHRCRegClass;
2991 RC = &PPC::VRRCRegClass;
29892992 break;
29902993 case MVT::v4f64:
29912994 RC = &PPC::QFRCRegClass;
31673170 static const MCPhysReg VR[] = {
31683171 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
31693172 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3170 };
3171 static const MCPhysReg VSRH[] = {
3172 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
3173 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
31743173 };
31753174
31763175 const unsigned Num_GPR_Regs = array_lengthof(GPR);
34473446 // passed directly. The latter are used to implement ELFv2 homogenous
34483447 // vector aggregates.
34493448 if (VR_idx != Num_VR_Regs) {
3450 unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
3451 MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
3452 MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3449 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
34533450 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
34543451 ++VR_idx;
34553452 } else {
50545051 static const MCPhysReg VR[] = {
50555052 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
50565053 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5057 };
5058 static const MCPhysReg VSRH[] = {
5059 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
5060 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
50615054 };
50625055
50635056 const unsigned NumGPRs = array_lengthof(GPR);
54855478 SDValue Load =
54865479 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
54875480 MemOpChains.push_back(Load.getValue(1));
5488
5489 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5490 Arg.getSimpleValueType() == MVT::v2i64) ?
5491 VSRH[VR_idx] : VR[VR_idx];
5492 ++VR_idx;
5493
5494 RegsToPass.push_back(std::make_pair(VReg, Load));
5481 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
54955482 }
54965483 ArgOffset += 16;
54975484 for (unsigned i=0; i<16; i+=PtrByteSize) {
55095496
55105497 // Non-varargs Altivec params go into VRs or on the stack.
55115498 if (VR_idx != NumVRs) {
5512 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5513 Arg.getSimpleValueType() == MVT::v2i64) ?
5514 VSRH[VR_idx] : VR[VR_idx];
5515 ++VR_idx;
5516
5517 RegsToPass.push_back(std::make_pair(VReg, Arg));
5499 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
55185500 } else {
55195501 if (CallConv == CallingConv::Fast)
55205502 ComputePtrOff();
70937075 }
70947076
70957077 static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
7096 if (BVN->getValueType(0) != Type)
7078 if (BVN->isConstant() || BVN->getValueType(0) != Type)
70977079 return false;
70987080 auto OpZero = BVN->getOperand(0);
70997081 for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
72297211 auto OpZero = BVN->getOperand(0);
72307212 bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
72317213 BVN->isOnlyUserOf(OpZero.getNode());
7232 if (Subtarget.isISA3_0() &&
7233 isNonConstSplatBV(BVN, MVT::v4i32) && !CanLoadAndSplat)
7214 if (Subtarget.isISA3_0() && !CanLoadAndSplat &&
7215 (isNonConstSplatBV(BVN, MVT::v4i32) ||
7216 isNonConstSplatBV(BVN, MVT::v2i64)))
72347217 return Op;
72357218 return SDValue();
72367219 }
1057010553 SDLoc dl(N);
1057110554 SDValue Op(N, 0);
1057210555
10556 SDValue FirstOperand(Op.getOperand(0));
10557 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
10558 (FirstOperand.getValueType() == MVT::i8 ||
10559 FirstOperand.getValueType() == MVT::i16);
10560 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
10561 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
10562 bool DstDouble = Op.getValueType() == MVT::f64;
10563 unsigned ConvOp = Signed ?
10564 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
10565 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
10566 SDValue WidthConst =
10567 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
10568 dl, false);
10569 LoadSDNode *LDN = cast(FirstOperand.getNode());
10570 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
10571 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
10572 DAG.getVTList(MVT::f64, MVT::Other),
10573 Ops, MVT::i8, LDN->getMemOperand());
10574
10575 // For signed conversion, we need to sign-extend the value in the VSR
10576 if (Signed) {
10577 SDValue ExtOps[] = { Ld, WidthConst };
10578 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
10579 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
10580 } else
10581 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
10582 }
10583
1057310584 // Don't handle ppc_fp128 here or i1 conversions.
1057410585 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
1057510586 return SDValue();
1078210793 case ISD::UINT_TO_FP:
1078310794 return combineFPToIntToFP(N, DCI);
1078410795 case ISD::STORE: {
10796 EVT Op1VT = N->getOperand(1).getValueType();
10797 bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
10798 (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
10799
1078510800 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
1078610801 if (Subtarget.hasSTFIWX() && !cast(N)->isTruncatingStore() &&
1078710802 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
10788 N->getOperand(1).getValueType() == MVT::i32 &&
10803 ValidTypeForStoreFltAsInt &&
1078910804 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
1079010805 SDValue Val = N->getOperand(1).getOperand(0);
1079110806 if (Val.getValueType() == MVT::f32) {
1079510810 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
1079610811 DCI.AddToWorklist(Val.getNode());
1079710812
10798 SDValue Ops[] = {
10799 N->getOperand(0), Val, N->getOperand(2),
10800 DAG.getValueType(N->getOperand(1).getValueType())
10801 };
10802
10803 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
10804 DAG.getVTList(MVT::Other), Ops,
10805 cast(N)->getMemoryVT(),
10806 cast(N)->getMemOperand());
10813 if (Op1VT == MVT::i32) {
10814 SDValue Ops[] = {
10815 N->getOperand(0), Val, N->getOperand(2),
10816 DAG.getValueType(N->getOperand(1).getValueType())
10817 };
10818
10819 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
10820 DAG.getVTList(MVT::Other), Ops,
10821 cast(N)->getMemoryVT(),
10822 cast(N)->getMemOperand());
10823 } else {
10824 unsigned WidthInBytes =
10825 N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
10826 SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
10827
10828 SDValue Ops[] = {
10829 N->getOperand(0), Val, N->getOperand(2), WidthConst,
10830 DAG.getValueType(N->getOperand(1).getValueType())
10831 };
10832 Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
10833 DAG.getVTList(MVT::Other), Ops,
10834 cast(N)->getMemoryVT(),
10835 cast(N)->getMemOperand());
10836 }
10837
1080710838 DCI.AddToWorklist(Val.getNode());
1080810839 return Val;
1080910840 }
4949 /// unsigned integers.
5050 FCTIDUZ, FCTIWUZ,
5151
52 /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
53 /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
54 VEXTS,
55
5256 /// Reciprocal estimate instructions (unary FP ops).
5357 FRE, FRSQRTE,
5458
363367 /// load which zero-extends from a 32-bit integer value into the
364368 /// destination 64-bit register.
365369 LFIWZX,
370
371 /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
372 /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
373 /// This can be used for converting loaded integers to floating point.
374 LXSIZX,
375
376 /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
377 /// chain, then an f64 value to store, then an address to store it to,
378 /// followed by a byte-width for the store.
379 STXSIX,
366380
367381 /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
368382 /// Maps directly to an lxvd2x instruction that will be followed by
705705 "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
706706 [(set v16i8:$vD,
707707 (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
708 let isCodeGenOnly = 1 in {
709 def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
710 "vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>;
711 def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
712 "vsplth $vD, $vB, $UIMM", IIC_VecPerm, []>;
713 }
708714
709715 def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>;
710716 def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
12691275 class VX_VT5_EO5_VB5 xo, bits<5> eo, string opc, list pattern>
12701276 : VXForm_RD5_XO5_RS5
12711277 !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>;
1278 class VX_VT5_EO5_VB5s xo, bits<5> eo, string opc, list pattern>
1279 : VXForm_RD5_XO5_RS5
1280 !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>;
12721281
12731282 // Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]
12741283 def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs g8rc:$rD), (ins vrrc:$vB),
12911300 def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>;
12921301 def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>;
12931302 def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>;
1303 let isCodeGenOnly = 1 in {
1304 def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>;
1305 def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>;
1306 def VEXTSB2Ds : VX_VT5_EO5_VB5s<1538, 24, "vextsb2d", []>;
1307 def VEXTSH2Ds : VX_VT5_EO5_VB5s<1538, 25, "vextsh2d", []>;
1308 def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>;
1309 }
12941310
12951311 // Vector Integer Negate
12961312 def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", []>;
3737 let TSFlags{2} = PPC970_Cracked;
3838 let TSFlags{5-3} = PPC970_Unit;
3939
40 /// Indicate that the VSX instruction is to use VSX numbering/encoding.
41 /// Since ISA 3.0, there are scalar instructions that use the upper
42 /// half of the VSX register set only. Rather than adding further complexity
43 /// to the register class set, the VSX registers just include the Altivec
44 /// registers and this flag decides the numbering to be used for them.
45 bits<1> UseVSXReg = 0;
46 let TSFlags{6} = UseVSXReg;
47
4048 // Fields used for relation models.
4149 string BaseName = "";
4250
6068 class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
6169 class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
6270 class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
71
72 class UseVSXReg { bits<1> UseVSXReg = 1; }
6373
6474 // Two joined instructions; used to emit two adjacent instructions as one.
6575 // The itinerary from the first instruction is used for scheduling and
858858 llvm_unreachable("nop VSX copy");
859859
860860 DestReg = SuperReg;
861 } else if (PPC::VRRCRegClass.contains(DestReg) &&
862 PPC::VSRCRegClass.contains(SrcReg)) {
863 unsigned SuperReg =
864 TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
865
866 if (VSXSelfCopyCrash && SrcReg == SuperReg)
867 llvm_unreachable("nop VSX copy");
868
869 DestReg = SuperReg;
870861 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
871862 PPC::VSRCRegClass.contains(DestReg)) {
872863 unsigned SuperReg =
873864 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
874
875 if (VSXSelfCopyCrash && DestReg == SuperReg)
876 llvm_unreachable("nop VSX copy");
877
878 SrcReg = SuperReg;
879 } else if (PPC::VRRCRegClass.contains(SrcReg) &&
880 PPC::VSRCRegClass.contains(DestReg)) {
881 unsigned SuperReg =
882 TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
883865
884866 if (VSXSelfCopyCrash && DestReg == SuperReg)
885867 llvm_unreachable("nop VSX copy");
10721054 PPCFunctionInfo *FuncInfo = MF.getInfo();
10731055 FuncInfo->setHasSpills();
10741056
1057 // We need to avoid a situation in which the value from a VRRC register is
1058 // spilled using an Altivec instruction and reloaded into a VSRC register
1059 // using a VSX instruction. The issue with this is that the VSX
1060 // load/store instructions swap the doublewords in the vector and the Altivec
1061 // ones don't. The register classes on the spill/reload may be different if
1062 // the register is defined using an Altivec instruction and is then used by a
1063 // VSX instruction.
1064 RC = updatedRC(RC);
1065
10751066 bool NonRI = false, SpillsVRS = false;
10761067 if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
10771068 NonRI, SpillsVRS))
11831174
11841175 PPCFunctionInfo *FuncInfo = MF.getInfo();
11851176 FuncInfo->setHasSpills();
1177
1178 // We need to avoid a situation in which the value from a VRRC register is
1179 // spilled using an Altivec instruction and reloaded into a VSRC register
1180 // using a VSX instruction. The issue with this is that the VSX
1181 // load/store instructions swap the doublewords in the vector and the Altivec
1182 // ones don't. The register classes on the spill/reload may be different if
1183 // the register is defined using an Altivec instruction and is then used by a
1184 // VSX instruction.
1185 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
1186 RC = &PPC::VSRCRegClass;
11861187
11871188 bool NonRI = false, SpillsVRS = false;
11881189 if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
18831884 }
18841885 return false;
18851886 }
1887
1888 const TargetRegisterClass *
1889 PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
1890 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
1891 return &PPC::VSRCRegClass;
1892 return RC;
1893 }
6060 PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
6161 PPC970_BRU = 7 << PPC970_Shift // Branch Unit
6262 };
63
64 enum {
65 /// Shift count to bypass PPC970 flags
66 NewDef_Shift = 6,
67
68 /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX
69 /// register (v0-v31).
70 UseVSXReg = 0x1 << NewDef_Shift
71 };
6372 } // end namespace PPCII
6473
6574 class PPCSubtarget;
272281
273282 // Lower pseudo instructions after register allocation.
274283 bool expandPostRAPseudo(MachineInstr &MI) const override;
284
285 static bool isVFRegister(unsigned Reg) {
286 return Reg >= PPC::VF0 && Reg <= PPC::VF31;
287 }
288 static bool isVRRegister(unsigned Reg) {
289 return Reg >= PPC::V0 && Reg <= PPC::V31;
290 }
291 const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
275292 };
276293
277294 }
2121 ]>;
2222 def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
2323 SDTCisVT<0, f64>, SDTCisPtrTy<1>
24 ]>;
25 def SDT_PPCLxsizx : SDTypeProfile<1, 2, [
26 SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
27 ]>;
28 def SDT_PPCstxsix : SDTypeProfile<0, 3, [
29 SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
30 ]>;
31 def SDT_PPCVexts : SDTypeProfile<1, 2, [
32 SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
2433 ]>;
2534
2635 def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
107116 [SDNPHasChain, SDNPMayLoad]>;
108117 def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
109118 [SDNPHasChain, SDNPMayLoad]>;
119 def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
120 [SDNPHasChain, SDNPMayLoad]>;
121 def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
122 [SDNPHasChain, SDNPMayStore]>;
123 def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
110124
111125 // Extract FPSCR (not modeled at the DAG level).
112126 def PPCmffs : SDNode<"PPCISD::MFFS",
443457 }
444458 def vrrc : RegisterOperand {
445459 let ParserMatchClass = PPCRegVRRCAsmOperand;
460 }
461 def PPCRegVFRCAsmOperand : AsmOperandClass {
462 let Name = "RegVFRC"; let PredicateMethod = "isRegNumber";
463 }
464 def vfrc : RegisterOperand {
465 let ParserMatchClass = PPCRegVFRCAsmOperand;
446466 }
447467 def PPCRegCRBITRCAsmOperand : AsmOperandClass {
448468 let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
8888 }
8989 }
9090
91 // Instruction form with a single input register for instructions such as
92 // XXPERMDI. The reason for defining this is that specifying multiple chained
93 // operands (such as loads) to an instruction will perform both chained
94 // operations rather than coalescing them into a single register - even though
95 // the source memory location is the same. This simply forces the instruction
96 // to use the same register for both inputs.
97 // For example, an output DAG such as this:
98 // (XXPERMDI (LXSIBZX xoaddr:$src), (LXSIBZX xoaddr:$src ), 0))
99 // would result in two load instructions emitted and used as separate inputs
100 // to the XXPERMDI instruction.
101 class XX3Form_2s opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
102 InstrItinClass itin, list pattern>
103 : XX3Form_2 {
104 let XB = XA;
105 }
106
91107 def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
92108 def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
93109 def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
95111
96112 let Predicates = [HasVSX] in {
97113 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
114 let UseVSXReg = 1 in {
98115 let hasSideEffects = 0 in { // VSX instructions don't have side effects.
99116 let Uses = [RM] in {
100117
782799 def XXPERMDI : XX3Form_2<60, 10,
783800 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
784801 "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
802 let isCodeGenOnly = 1 in
803 def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA, u2imm:$DM),
804 "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
785805 def XXSEL : XX4Form<60, 3,
786806 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
787807 "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
796816 "xxspltw $XT, $XB, $UIM", IIC_VecPerm,
797817 [(set v4i32:$XT,
798818 (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
819 let isCodeGenOnly = 1 in
820 def XXSPLTWs : XX2Form_2<60, 164,
821 (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
822 "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
799823 } // hasSideEffects
824 } // UseVSXReg = 1
800825
801826 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
802827 // instruction selection into a branch sequence.
848873 (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
849874 def : InstAlias<"xxswapd $XT, $XB",
850875 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
876 def : InstAlias<"xxspltd $XT, $XB, 0",
877 (XXPERMDIs vsrc:$XT, vfrc:$XB, 0)>;
878 def : InstAlias<"xxspltd $XT, $XB, 1",
879 (XXPERMDIs vsrc:$XT, vfrc:$XB, 3)>;
880 def : InstAlias<"xxswapd $XT, $XB",
881 (XXPERMDIs vsrc:$XT, vfrc:$XB, 2)>;
851882
852883 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
853884
10701101 } // AddedComplexity
10711102 } // HasVSX
10721103
1104 def ScalarLoads {
1105 dag Li8 = (i32 (extloadi8 xoaddr:$src));
1106 dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
1107 dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src));
1108 dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
1109 dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
1110
1111 dag Li16 = (i32 (extloadi16 xoaddr:$src));
1112 dag ZELi16 = (i32 (zextloadi16 xoaddr:$src));
1113 dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
1114 dag SELi16 = (i32 (sextloadi16 xoaddr:$src));
1115 dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
1116
1117 dag Li32 = (i32 (load xoaddr:$src));
1118 }
1119
10731120 // The following VSX instructions were introduced in Power ISA 2.07
10741121 /* FIXME: if the operands are v2i64, these patterns will not match.
10751122 we should define new patterns or otherwise match the same patterns
10791126 def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
10801127 let Predicates = [HasP8Vector] in {
10811128 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
1082 let isCommutable = 1 in {
1129 let isCommutable = 1, UseVSXReg = 1 in {
10831130 def XXLEQV : XX3Form<60, 186,
10841131 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
10851132 "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
10891136 "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
10901137 [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
10911138 v4i32:$XB)))]>;
1092 } // isCommutable
1139 } // isCommutable, UseVSXReg
10931140
10941141 def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
10951142 (XXLEQV $A, $B)>;
10961143
1144 let UseVSXReg = 1 in {
10971145 def XXLORC : XX3Form<60, 170,
10981146 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
10991147 "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
11211169 "stxsiwx $XT, $dst", IIC_LdStSTFD,
11221170 [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
11231171 } // mayStore
1172 } // UseVSXReg = 1
11241173
11251174 def : Pat<(f64 (extloadf32 xoaddr:$src)),
11261175 (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
11481197 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
11491198 (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
11501199
1200 let UseVSXReg = 1 in {
11511201 // VSX Elementary Scalar FP arithmetic (SP)
11521202 let isCommutable = 1 in {
11531203 def XSADDSP : XX3Form<60, 0,
12721322 "xscvdpspn $XT, $XB", IIC_VecFP, []>;
12731323 def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
12741324 "xscvspdpn $XT, $XB", IIC_VecFP, []>;
1325 } // UseVSXReg = 1
12751326
12761327 let Predicates = [IsLittleEndian] in {
12771328 def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
12941345 def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
12951346 (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
12961347 }
1348 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
1349 (v4i32 (XXSPLTWs (LXSIWAX xoaddr:$src), 1))>;
12971350 } // AddedComplexity = 400
12981351 } // HasP8Vector
12991352
1353 let UseVSXReg = 1 in {
13001354 let Predicates = [HasDirectMove] in {
13011355 // VSX direct move instructions
13021356 def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
13311385 []>, Requires<[In64BitMode]>;
13321386
13331387 } // IsISA3_0, HasDirectMove
1388 } // UseVSXReg = 1
13341389
13351390 /* Direct moves of various widths from GPR's into VSR's. Each move lines
13361391 the value up into element 0 (both BE and LE). Namely, entities smaller than
19101965 : X_RD5_XO5_RS5
19111966 !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
19121967
1968 let UseVSXReg = 1 in {
19131969 // [PO T XO B XO BX /]
19141970 class XX2_RT5_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc,
19151971 list pattern>
19281984 InstrItinClass itin, list pattern>
19291985 : XX3Form
19301986 !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
1987 } // UseVSXReg = 1
19311988
19321989 // [PO VRT VRA VRB XO /]
19331990 class X_VT5_VA5_VB5 opcode, bits<10> xo, string opc,
19962053 // DP/QP Compare Exponents
19972054 def XSCMPEXPDP : XX3Form_1<60, 59,
19982055 (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
1999 "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
2056 "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>,
2057 UseVSXReg;
20002058 def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
20012059
20022060 // DP Compare ==, >=, >, !=
20102068 IIC_FPCompare, []>;
20112069 def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc,
20122070 IIC_FPCompare, []>;
2071 let UseVSXReg = 1 in {
20132072 // Vector Compare Not Equal
20142073 def XVCMPNEDP : XX3Form_Rc<60, 123,
20152074 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
20272086 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
20282087 "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>,
20292088 isDOT;
2089 } // UseVSXReg = 1
20302090
20312091 //===--------------------------------------------------------------------===//
20322092 // Quad-Precision Floating-Point Conversion Instructions:
20332093
20342094 // Convert DP -> QP
2035 def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>;
2095 def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, []>;
20362096
20372097 // Round & Convert QP -> DP (dword[1] is set to zero)
20382098 def XSCVQPDP : X_VT5_XO5_VB5 <63, 20, 836, "xscvqpdp" , []>;
20452105 def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>;
20462106
20472107 // Convert (Un)Signed DWord -> QP
2048 def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>;
2049 def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vsfrc, []>;
2108 def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
2109 def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>;
20502110
20512111 //===--------------------------------------------------------------------===//
20522112 // Round to Floating-Point Integer Instructions
20832143 // Insert Exponent DP/QP
20842144 // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
20852145 def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
2086 "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
2146 "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg;
20872147 // vB NOTE: only vB.dword[0] is used, that's why we don't use
20882148 // X_VT5_VA5_VB5 form
20892149 def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
20922152 // Extract Exponent/Significand DP/QP
20932153 def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
20942154 def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
2155
20952156 def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
20962157 def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
20972158
20982159 // Vector Insert Word
2160 let UseVSXReg = 1 in {
20992161 // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
21002162 def XXINSERTW :
21012163 XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
21092171 def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
21102172 (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
21112173 "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
2174 } // UseVSXReg = 1
21122175
21132176 // Vector Insert Exponent DP/SP
21142177 def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
21252188 //===--------------------------------------------------------------------===//
21262189
21272190 // Test Data Class SP/DP/QP
2191 let UseVSXReg = 1 in {
21282192 def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
21292193 (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
21302194 "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
21312195 def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
21322196 (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
21332197 "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
2198 } // UseVSXReg = 1
21342199 def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
21352200 (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
21362201 "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
21372202
21382203 // Vector Test Data Class SP/DP
2204 let UseVSXReg = 1 in {
21392205 def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
21402206 (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
21412207 "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, []>;
21422208 def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
21432209 (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
21442210 "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, []>;
2211 } // UseVSXReg = 1
21452212
21462213 //===--------------------------------------------------------------------===//
21472214
21722239
21732240 // Vector Splat Immediate Byte
21742241 def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
2175 "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
2242 "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg;
21762243
21772244 //===--------------------------------------------------------------------===//
21782245 // Vector/Scalar Load/Store Instructions
21802247 let mayLoad = 1 in {
21812248 // Load Vector
21822249 def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
2183 "lxv $XT, $src", IIC_LdStLFD, []>;
2250 "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
21842251 // Load DWord
2185 def LXSD : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src),
2252 def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
21862253 "lxsd $vD, $src", IIC_LdStLFD, []>;
21872254 // Load SP from src, convert it to DP, and place in dword[0]
2188 def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src),
2255 def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
21892256 "lxssp $vD, $src", IIC_LdStLFD, []>;
21902257
21912258 // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
21932260 class X_XT6_RA5_RB5 opcode, bits<10> xo, string opc,
21942261 RegisterOperand vtype, list pattern>
21952262 : XX1Form
2196 !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
2263 !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg;
21972264
21982265 // Load as Integer Byte/Halfword & Zero Indexed
2199 def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, []>;
2200 def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, []>;
2266 def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
2267 [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>;
2268 def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
2269 [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>;
22012270
22022271 // Load Vector Halfword*8/Byte*16 Indexed
22032272 def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
22132282
22142283 // Load Vector Word & Splat Indexed
22152284 def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
2216 } // end mayLoad
2285 } // mayLoad
22172286
22182287 let mayStore = 1 in {
22192288 // Store Vector
22202289 def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
2221 "stxv $XT, $dst", IIC_LdStSTFD, []>;
2290 "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
22222291 // Store DWord
2223 def STXSD : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst),
2292 def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
22242293 "stxsd $vS, $dst", IIC_LdStSTFD, []>;
22252294 // Convert DP of dword[0] to SP, and Store to dst
2226 def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst),
2295 def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
22272296 "stxssp $vS, $dst", IIC_LdStSTFD, []>;
22282297
22292298 // [PO S RA RB XO SX]
22302299 class X_XS6_RA5_RB5 opcode, bits<10> xo, string opc,
22312300 RegisterOperand vtype, list pattern>
22322301 : XX1Form
2233 !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
2302 !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg;
22342303
22352304 // Store as Integer Byte/Halfword Indexed
2236 def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, []>;
2237 def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, []>;
2305 def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
2306 [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>;
2307 def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
2308 [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
2309 let isCodeGenOnly = 1 in {
2310 def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vrrc, []>;
2311 def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vrrc, []>;
2312 }
22382313
22392314 // Store Vector Halfword*8/Byte*16 Indexed
22402315 def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>;
22472322 // Store Vector (Left-justified) with Length
22482323 def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>;
22492324 def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>;
2250 } // end mayStore
2325 } // mayStore
22512326
22522327 // Patterns for which instructions from ISA 3.0 are a better match
22532328 let Predicates = [IsLittleEndian, HasP9Vector] in {
23402415 (v4i32 (XXSPLTIB 255))>;
23412416 def : Pat<(v2i64 immAllOnesV),
23422417 (v2i64 (XXSPLTIB 255))>;
2418
2419 // Build vectors from i8 loads
2420 def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
2421 (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
2422 def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
2423 (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
2424 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
2425 (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
2426 def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
2427 (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
2428 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
2429 (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
2430 def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
2431 (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
2432
2433 // Build vectors from i16 loads
2434 def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
2435 (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
2436 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
2437 (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
2438 def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
2439 (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
2440 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
2441 (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
2442 def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
2443 (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
2444
2445 let Predicates = [IsBigEndian, HasP9Vector] in {
2446 // Scalar stores of i8
2447 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
2448 (STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>;
2449 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
2450 (STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
2451 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
2452 (STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>;
2453 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
2454 (STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
2455 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
2456 (STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>;
2457 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
2458 (STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
2459 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
2460 (STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>;
2461 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
2462 (STXSIBXv $S, xoaddr:$dst)>;
2463 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
2464 (STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>;
2465 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
2466 (STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
2467 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
2468 (STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>;
2469 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
2470 (STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
2471 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
2472 (STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>;
2473 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
2474 (STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
2475 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
2476 (STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>;
2477 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
2478 (STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
2479
2480 // Scalar stores of i16
2481 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
2482 (STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
2483 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
2484 (STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
2485 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
2486 (STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
2487 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
2488 (STXSIHXv $S, xoaddr:$dst)>;
2489 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
2490 (STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
2491 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
2492 (STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
2493 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
2494 (STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
2495 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
2496 (STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
2497 } // IsBigEndian, HasP9Vector
2498
2499 let Predicates = [IsLittleEndian, HasP9Vector] in {
2500 // Scalar stores of i8
2501 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
2502 (STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
2503 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
2504 (STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>;
2505 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
2506 (STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
2507 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
2508 (STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>;
2509 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
2510 (STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
2511 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
2512 (STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>;
2513 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
2514 (STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
2515 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
2516 (STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>;
2517 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
2518 (STXSIBXv $S, xoaddr:$dst)>;
2519 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
2520 (STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>;
2521 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
2522 (STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
2523 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
2524 (STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>;
2525 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
2526 (STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
2527 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
2528 (STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>;
2529 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
2530 (STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
2531 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
2532 (STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>;
2533
2534 // Scalar stores of i16
2535 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
2536 (STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
2537 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
2538 (STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
2539 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
2540 (STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
2541 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
2542 (STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
2543 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
2544 (STXSIHXv $S, xoaddr:$dst)>;
2545 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
2546 (STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
2547 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
2548 (STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
2549 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
2550 (STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
2551 } // IsLittleEndian, HasP9Vector
2552
2553 // Vector sign extensions
2554 def : Pat<(f64 (PPCVexts f64:$A, 1)),
2555 (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
2556 def : Pat<(f64 (PPCVexts f64:$A, 2)),
2557 (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
23432558 } // end HasP9Vector, AddedComplexity
23442559
23452560 let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
169169 ToErase = &MI;
170170 Simplified = true;
171171 }
172 } else if ((Immed == 0 || Immed == 3) &&
173 DefMI && DefMI->getOpcode() == PPC::XXPERMDIs) {
174 // Splat fed by another splat - switch the output of the first
175 // and remove the second.
176 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
177 ToErase = &MI;
178 Simplified = true;
179 DEBUG(dbgs() << "Removing redundant splat: ");
180 DEBUG(MI.dump());
172181 }
182 }
183 }
184 break;
185 }
186 case PPC::VSPLTB:
187 case PPC::VSPLTH:
188 case PPC::XXSPLTW: {
189 unsigned MyOpcode = MI.getOpcode();
190 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
191 unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
192 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
193 if (!DefMI)
194 break;
195 unsigned DefOpcode = DefMI->getOpcode();
196 bool SameOpcode = (MyOpcode == DefOpcode) ||
197 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
198 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
199 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs);
200 // Splat fed by another splat - switch the output of the first
201 // and remove the second.
202 if (SameOpcode) {
203 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
204 ToErase = &MI;
205 Simplified = true;
206 DEBUG(dbgs() << "Removing redundant splat: ");
207 DEBUG(MI.dump());
208 }
209 // Splat fed by a shift. Usually when we align value to splat into
210 // vector element zero.
211 if (DefOpcode == PPC::XXSLDWI) {
212 unsigned ShiftRes = DefMI->getOperand(0).getReg();
213 unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
214 unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
215 unsigned ShiftImm = DefMI->getOperand(3).getImm();
216 unsigned SplatImm = MI.getOperand(2).getImm();
217 if (ShiftOp1 == ShiftOp2) {
218 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
219 if (MRI->hasOneNonDBGUse(ShiftRes)) {
220 DEBUG(dbgs() << "Removing redundant shift: ");
221 DEBUG(DefMI->dump());
222 ToErase = DefMI;
223 }
224 Simplified = true;
225 DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
226 " to " << NewElem << " in instruction: ");
227 DEBUG(MI.dump());
228 MI.getOperand(1).setReg(ShiftOp1);
229 MI.getOperand(2).setImm(NewElem);
173230 }
174231 }
175232 break;
302302 case PPC::VRRCRegClassID:
303303 case PPC::VFRCRegClassID:
304304 case PPC::VSLRCRegClassID:
305 case PPC::VSHRCRegClassID:
306305 return 32 - DefaultSafety;
307306 case PPC::VSRCRegClassID:
308307 case PPC::VSFRCRegClassID:
1616 def sub_un : SubRegIndex<1, 3>;
1717 def sub_32 : SubRegIndex<32>;
1818 def sub_64 : SubRegIndex<64>;
19 def sub_128 : SubRegIndex<128>;
2019 }
2120
2221
7877 let SubRegIndices = [sub_64];
7978 }
8079
81 // VSRH - One of the 32 128-bit VSX registers that overlap with the vector
82 // registers.
83 class VSRH : PPCReg {
84 let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
85 let HWEncoding{5} = 1;
86 let SubRegs = [SubReg];
87 let SubRegIndices = [sub_128];
88 }
89
9080 // CR - One of the 8 4-bit condition registers
9181 class CR num, string n, list subregs> : PPCReg {
9282 let HWEncoding{2-0} = num;
115105 DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
116106 }
117107
118 // Floating-point vector subregisters (for VSX)
119 foreach Index = 0-31 in {
120 def VF#Index : VF;
108 // 64-bit Floating-point subregisters of Altivec registers
109 // Note: the register names are v0-v31 or vs32-vs63 depending on the use.
110 // Custom C++ code is used to produce the correct name and encoding.
111 foreach Index = 0-31 in {
112 def VF#Index : VF,
113 DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
121114 }
122115
123116 // QPX Floating-point registers
137130 def VSL#Index : VSRL("F"#Index), "vs"#Index>,
138131 DwarfRegAlias("F"#Index)>;
139132 }
140 foreach Index = 0-31 in {
141 def VSH#Index : VSRH("V"#Index), "vs" # !add(Index, 32)>,
142 DwarfRegAlias("V"#Index)>;
133
134 // Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
135 // asm printing.
136 foreach Index = 32-63 in {
137 def VSX#Index : PPCReg<"vs"#Index>;
143138 }
144139
145140 // The reprsentation of r0 when treated as the constant 0.
287282 (sequence "F%u", 31, 14))>;
288283 def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
289284
290 def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
285 def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64], 128,
291286 (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
292287 V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
293288 V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
297292 def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
298293 (add (sequence "VSL%u", 0, 13),
299294 (sequence "VSL%u", 31, 14))>;
300 def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
301 (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
302 VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
303 VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
304 VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
305 VSH22, VSH21, VSH20)>;
306295 def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
307 (add VSLRC, VSHRC)>;
296 (add VSLRC, VRRC)>;
308297
309298 // Register classes for the 64-bit "scalar" VSX subregisters.
310299 def VFRC : RegisterClass<"PPC", [f64], 64,
100100 // This is a copy *to* a VSX register from a non-VSX register.
101101 Changed = true;
102102
103 const TargetRegisterClass *SrcRC =
104 IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
105 &PPC::VSLRCRegClass;
103 const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass;
106104 assert((IsF8Reg(SrcMO.getReg(), MRI) ||
107 IsVRReg(SrcMO.getReg(), MRI) ||
108105 IsVSSReg(SrcMO.getReg(), MRI) ||
109106 IsVSFReg(SrcMO.getReg(), MRI)) &&
110107 "Unknown source for a VSX copy");
115112 .addImm(1) // add 1, not 0, because there is no implicit clearing
116113 // of the high bits.
117114 .addOperand(SrcMO)
118 .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128
119 : PPC::sub_64);
115 .addImm(PPC::sub_64);
120116
121117 // The source of the original copy is now the new virtual register.
122118 SrcMO.setReg(NewVReg);
125121 // This is a copy *from* a VSX register to a non-VSX register.
126122 Changed = true;
127123
128 const TargetRegisterClass *DstRC =
129 IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
130 &PPC::VSLRCRegClass;
124 const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass;
131125 assert((IsF8Reg(DstMO.getReg(), MRI) ||
132126 IsVSFReg(DstMO.getReg(), MRI) ||
133 IsVSSReg(DstMO.getReg(), MRI) ||
134 IsVRReg(DstMO.getReg(), MRI)) &&
127 IsVSSReg(DstMO.getReg(), MRI)) &&
135128 "Unknown destination for a VSX copy");
136129
137130 // Copy the VSX value into a new VSX register of the correct subclass.
142135
143136 // Transform the original copy into a subregister extraction copy.
144137 SrcMO.setReg(NewVReg);
145 SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
146 PPC::sub_64);
138 SrcMO.setSubReg(PPC::sub_64);
147139 }
148140 }
149141
66 ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type
77 tail call void asm sideeffect "nop", "{vsl1}"(i32 %x) nounwind
88
9 ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type
10 tail call void asm sideeffect "nop", "{vsh1}"(i32 %x) nounwind
11
129 ret void
1310 }
0 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
11 ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s
22 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \
3 ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s \
4 ; RUN: --check-prefix=CHECK-BE
3 ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s
54
65 define <16 x i8> @test(i32* %s, i32* %t) {
76 entry:
109 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32>
1110 ret <16 x i8> %2
1211 ; CHECK-LABEL: test
13 ; CHECK: lwz [[GPR:[0-9]+]], 0(3)
14 ; CHECK: mtvsrd [[VSR:[0-9]+]], [[GPR]]
15 ; CHECK: xxswapd [[SWP:[0-9]+]], [[VSR]]
16 ; CHECK: xxspltw 34, [[SWP]], 3
17 ; CHECK-BE-LABEL: test
18 ; CHECK-BE: lwz [[GPR:[0-9]+]], 0(3)
19 ; CHECK-BE: sldi [[SHL:[0-9]+]], [[GPR]], 32
20 ; CHECK-BE: mtvsrd [[VSR:[0-9]+]], [[SHL]]
21 ; CHECK-BE: xxspltw 34, [[VSR]], 0
12 ; CHECK: lxsiwax 34, 0, 3
13 ; CHECK: xxspltw 34, 34, 1
2214 }
9797 ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
9898 ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
9999 ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
100 ; CHECK-PWR: # kill
101100 ; CHECK-NEXT: blr
102101
103102 %t0 = fadd <4 x float> %x0, %x1
115114 ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
116115 ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
117116 ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
118 ; CHECK-PWR: # kill
119117 ; CHECK-NEXT: blr
120118
121119 %t0 = fadd <4 x float> %x0, %x1
133131 ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
134132 ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
135133 ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
136 ; CHECK-PWR: # kill
137134 ; CHECK-NEXT: blr
138135
139136 %t0 = fadd <4 x float> %x0, %x1
151148 ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
152149 ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
153150 ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
154 ; CHECK-PWR: # kill
155151 ; CHECK-NEXT: blr
156152
157153 %t0 = fadd <4 x float> %x0, %x1
6262 ret <2 x i64> %splat.splat
6363 ; CHECK: mtvsrd {{[0-9]+}}, 3
6464 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
65 ; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0
65 ; CHECK-LE: xxspltd 34, [[REG1]], 0
6666 }
6767
6868 ; Function Attrs: nounwind
7474 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
7575 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
7676 ret <4 x float> %splat.splat
77 ; CHECK: xscvdpspn {{[0-9]+}}, 1
77 ; CHECK: xscvdpspn [[REG1:[0-9]+]], 1
78 ; CHECK: xxspltw 34, [[REG1]]
7879 ; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1
79 ; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1
80 ; CHECK-LE: xxspltw 34, [[REG1]]
8081 }
8182
8283 ; The optimization to remove stack operations from PPCDAGToDAGISel::Select
66
77 define <2 x i64> @test1(i64 %a, i64 %b) {
88 entry:
9 ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
10 ; which will happen in a subsequent patch.
911 ; CHECK-LABEL: test1
10 ; CHECK: mtvsrdd 34, 4, 3
12 ; FIXME: mtvsrdd 34, 4, 3
13 ; CHECK: mtvsrd {{[0-9]+}}, 3
14 ; CHECK: mtvsrd {{[0-9]+}}, 4
15 ; CHECK: xxmrgld
1116 ; CHECK-BE-LABEL: test1
12 ; CHECK-BE: mtvsrdd 34, 3, 4
17 ; FIXME-BE: mtvsrdd 34, 3, 4
18 ; CHECK-BE: mtvsrd {{[0-9]+}}, 4
19 ; CHECK-BE: mtvsrd {{[0-9]+}}, 3
20 ; CHECK-BE: xxmrghd
1321 %vecins = insertelement <2 x i64> undef, i64 %a, i32 0
1422 %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
1523 ret <2 x i64> %vecins1
5454 ; CHECK-LE: blr
5555
5656 ; CHECK-P9-LABEL: @v1i128_increment_by_one
57 ; CHECK-P9-DAG: li [[R1:r[0-9]+]], 1
58 ; CHECK-P9-DAG: li [[R2:r[0-9]+]], 0
59 ; CHECK-P9: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
57 ; The below FIXME is due to the lowering for BUILD_VECTOR that will be fixed
58 ; in a subsequent patch.
59 ; FIXME: li [[R1:r[0-9]+]], 1
60 ; FIXME: li [[R2:r[0-9]+]], 0
61 ; FIXME: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
62 ; CHECK-P9: lxvx [[V1:v[0-9]+]]
6063 ; CHECK-P9: vadduqm v2, v2, [[V1]]
6164 ; CHECK-P9: blr
6265
713713 %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
714714 ret <4 x float> %cond
715715
716 ; FIXME: This test (and the other v4f32 tests) should use the same bclr
717 ; technique as the v2f64 tests below.
718
719716 ; CHECK-LABEL: @testv4floatslt
720717 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
721718 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
722 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
723 ; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
724 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
725 ; CHECK: xxlor [[REG2]], 35, 35
726 ; CHECK: .LBB[[BB]]:
727 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
719 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
720 ; CHECK: bclr 12, [[REG1]], 0
721 ; CHECK: vor 2, 3, 3
728722 ; CHECK: blr
729723 }
730724
739733 ; CHECK-LABEL: @testv4floatult
740734 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
741735 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
742 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
743 ; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
744 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
745 ; CHECK: xxlor [[REG2]], 35, 35
746 ; CHECK: .LBB[[BB]]:
747 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
736 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
737 ; CHECK: bclr 12, [[REG1]], 0
738 ; CHECK: vor 2, 3, 3
748739 ; CHECK: blr
749740 }
750741
759750 ; CHECK-LABEL: @testv4floatsle
760751 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
761752 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
762 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
763 ; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
764 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
765 ; CHECK: xxlor [[REG2]], 35, 35
766 ; CHECK: .LBB[[BB]]:
767 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
753 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
754 ; CHECK: bclr 12, [[REG1]], 0
755 ; CHECK: vor 2, 3, 3
768756 ; CHECK: blr
769757 }
770758
779767 ; CHECK-LABEL: @testv4floatule
780768 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
781769 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
782 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
783 ; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
784 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
785 ; CHECK: xxlor [[REG2]], 35, 35
786 ; CHECK: .LBB[[BB]]:
787 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
770 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
771 ; CHECK: bclr 12, [[REG1]], 0
772 ; CHECK: vor 2, 3, 3
788773 ; CHECK: blr
789774 }
790775
799784 ; CHECK-LABEL: @testv4floateq
800785 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
801786 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
802 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 35, 35
803 ; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
804 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
805 ; CHECK: xxlor [[REG2]], 34, 34
806 ; CHECK: .LBB[[BB]]:
807 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
787 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
788 ; CHECK: bc 12, [[REG1]], .LBB[[BB1:[0-9_]+]]
789 ; CHECK: vor 3, 2, 2
790 ; CHECK: .LBB[[BB1]]
791 ; CHECK: vor 2, 3, 3
808792 ; CHECK: blr
809793 }
810794
819803 ; CHECK-LABEL: @testv4floatsge
820804 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
821805 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
822 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
823 ; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
824 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
825 ; CHECK: xxlor [[REG2]], 35, 35
826 ; CHECK: .LBB[[BB]]:
827 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
806 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
807 ; CHECK: bclr 12, [[REG1]], 0
808 ; CHECK: vor 2, 3, 3
828809 ; CHECK: blr
829810 }
830811
839820 ; CHECK-LABEL: @testv4floatuge
840821 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
841822 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
842 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
843 ; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
844 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
845 ; CHECK: xxlor [[REG2]], 35, 35
846 ; CHECK: .LBB[[BB]]:
847 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
823 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
824 ; CHECK: bclr 12, [[REG1]], 0
825 ; CHECK: vor 2, 3, 3
848826 ; CHECK: blr
849827 }
850828
859837 ; CHECK-LABEL: @testv4floatsgt
860838 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
861839 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
862 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
863 ; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
864 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
865 ; CHECK: xxlor [[REG2]], 35, 35
866 ; CHECK: .LBB[[BB]]:
867 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
840 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
841 ; CHECK: bclr 12, [[REG1]], 0
842 ; CHECK: vor 2, 3, 3
868843 ; CHECK: blr
869844 }
870845
879854 ; CHECK-LABEL: @testv4floatugt
880855 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
881856 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
882 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
883 ; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
884 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
885 ; CHECK: xxlor [[REG2]], 35, 35
886 ; CHECK: .LBB[[BB]]:
887 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
857 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
858 ; CHECK: bclr 12, [[REG1]], 0
859 ; CHECK: vor 2, 3, 3
888860 ; CHECK: blr
889861 }
890862
899871 ; CHECK-LABEL: @testv4floatne
900872 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
901873 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
902 ; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
903 ; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
904 ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
905 ; CHECK: xxlor [[REG2]], 35, 35
906 ; CHECK: .LBB[[BB]]:
907 ; CHECK: xxlor 34, [[REG2]], [[REG2]]
874 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
875 ; CHECK: bclr 12, [[REG1]], 0
876 ; CHECK: vor 2, 3, 3
908877 ; CHECK: blr
909878 }
910879
1022991 ; CHECK: bc 12, [[REG1]], .LBB[[BB55:[0-9_]+]]
1023992 ; CHECK: vor 3, 2, 2
1024993 ; CHECK: .LBB[[BB55]]
1025 ; CHECK: xxlor 34, 35, 35
994 ; CHECK: vor 2, 3, 3
1026995 ; CHECK: blr
1027996 }
1028997
6565 ; CHECK-NOT: mfspr
6666
6767 ; CHECK-DAG: stfd
68 ; CHECK-DAG: stvx
68 ; CHECK-DAG: stxvd2x
6969
7070 ; CHECK-DAG: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
7171 ; CHECK-DAG: std 31, env_sigill@toc@l([[REG]])
8181 ; CHECK: .LBB1_4:
8282
8383 ; CHECK: lfd
84 ; CHECK: lvx
84 ; CHECK: lxvd2x
8585 ; CHECK: ld
8686 ; CHECK: blr
8787
9292 ; CHECK: li 3, 0
9393
9494 ; CHECK-NOAV: @main
95 ; CHECK-NOAV-NOT: stvx
95 ; CHECK-NOAV-NOT: stxvd2x
9696 ; CHECK-NOAV: bcl
9797 ; CHECK-NOAV: mflr
9898 ; CHECK-NOAV: bl foo
99 ; CHECK-NOAV-NOT: lvx
99 ; CHECK-NOAV-NOT: lxvd2x
100100 ; CHECK-NOAV: blr
101101 }
102102
0 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
1 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck %s
1 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | \
2 ; RUN: FileCheck -check-prefix=CHECK-FISL %s
23 target datalayout = "E-m:e-i64:64-n32:64"
34 target triple = "powerpc64-unknown-linux-gnu"
45
1213
1314 ; CHECK-LABEL: @main
1415 ; CHECK-DAG: vor [[V:[0-9]+]], 2, 2
15 ; CHECK-DAG: xxlor 34, 35, 35
16 ; CHECK-DAG: xxlor 35, 36, 36
16 ; CHECK-DAG: vor 2, 3, 3
17 ; CHECK-DAG: vor 3, 4, 4
1718 ; CHECK-DAG: vor 4, [[V]], [[V]]
18 ; CHECK-DAG: bl sv
19 ; CHECK-DAG: lxvd2x [[VC:[0-9]+]],
19 ; CHECK: bl sv
20 ; CHECK: lxvd2x [[VC:[0-9]+]],
2021 ; CHECK: xvadddp 34, 34, [[VC]]
2122 ; CHECK: blr
23
24 ; CHECK-FISL-LABEL: @main
25 ; CHECK-FISL: stxvd2x 34
26 ; CHECK-FISL: vor 2, 3, 3
27 ; CHECK-FISL: vor 3, 4, 4
28 ; CHECK-FISL: lxvd2x 36
29 ; CHECK-FISL: bl sv
30 ; CHECK-FISL: lxvd2x [[VC:[0-9]+]],
31 ; CHECK-FISL: xvadddp 34, 34, [[VC]]
32 ; CHECK-FISL: blr
2233 }
2334
2435 attributes #0 = { noinline nounwind readnone }
1010 br label %vector.body
1111
1212 ; CHECK-LABEL: @_Z8example9Pj
13 ; CHECK: xxlor
13 ; CHECK: vor
14 ; CHECK: vor
15 ; CHECK: vor
16 ; CHECK: vor
17 ; CHECK: vor
18 ; CHECK: vor
19 ; CHECK: vor
20 ; CHECK: vor
21 ; CHECK: vor
1422
1523 vector.body: ; preds = %vector.body, %entry
1624 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3333 ; CHECK-REG: blr
3434
3535 ; CHECK-FISL-LABEL: @test32u
36 ; CHECK-FISL: lxvw4x 0, 0, 3
37 ; CHECK-FISL: xxlor 34, 0, 0
36 ; CHECK-FISL: lxvw4x 34, 0, 3
3837 ; CHECK-FISL: blr
3938 }
4039
4746 ; CHECK-REG: blr
4847
4948 ; CHECK-FISL-LABEL: @test33u
50 ; CHECK-FISL: vor 3, 2, 2
51 ; CHECK-FISL: stxvw4x 35, 0, 3
49 ; CHECK-FISL: stxvw4x 34, 0, 3
5250 ; CHECK-FISL: blr
5351 }
5452
0 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
1 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
2 ; RUN: --check-prefix=CHECK-BE
3 ; Function Attrs: norecurse nounwind readonly
4 define <16 x i8> @vecucuc(i8* nocapture readonly %ptr) {
5 entry:
6 %0 = load i8, i8* %ptr, align 1
7 %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
8 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
9 ret <16 x i8> %splat.splat
10 ; CHECK-LABEL: vecucuc
11 ; CHECK: lxsibzx 34, 0, 3
12 ; CHECK-NEXT: vspltb 2, 2, 7
13 ; CHECK-BE-LABEL: vecucuc
14 ; CHECK-BE: lxsibzx 34, 0, 3
15 ; CHECK-BE-NEXT: vspltb 2, 2, 7
16 }
17
18 ; Function Attrs: norecurse nounwind readonly
19 define <8 x i16> @vecusuc(i8* nocapture readonly %ptr) {
20 entry:
21 %0 = load i8, i8* %ptr, align 1
22 %conv = zext i8 %0 to i16
23 %splat.splatinsert = insertelement <8 x i16> undef, i16 %conv, i32 0
24 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
25 ret <8 x i16> %splat.splat
26 ; CHECK-LABEL: vecusuc
27 ; CHECK: lxsibzx 34, 0, 3
28 ; CHECK-NEXT: vsplth 2, 2, 3
29 ; CHECK-BE-LABEL: vecusuc
30 ; CHECK-BE: lxsibzx 34, 0, 3
31 ; CHECK-BE-NEXT: vsplth 2, 2, 3
32 }
33
34 ; Function Attrs: norecurse nounwind readonly
35 define <4 x i32> @vecuiuc(i8* nocapture readonly %ptr) {
36 entry:
37 %0 = load i8, i8* %ptr, align 1
38 %conv = zext i8 %0 to i32
39 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
40 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
41 ret <4 x i32> %splat.splat
42 ; CHECK-LABEL: vecuiuc
43 ; CHECK: lxsibzx 34, 0, 3
44 ; CHECK-NEXT: xxspltw 34, 34, 1
45 ; CHECK-BE-LABEL: vecuiuc
46 ; CHECK-BE: lxsibzx 34, 0, 3
47 ; CHECK-BE-NEXT: xxspltw 34, 34, 1
48 }
49
50 ; Function Attrs: norecurse nounwind readonly
51 define <2 x i64> @veculuc(i8* nocapture readonly %ptr) {
52 entry:
53 %0 = load i8, i8* %ptr, align 1
54 %conv = zext i8 %0 to i64
55 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
56 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
57 ret <2 x i64> %splat.splat
58 ; CHECK-LABEL: veculuc
59 ; CHECK: lxsibzx 34, 0, 3
60 ; CHECK-NEXT: xxspltd 34, 34, 0
61 ; CHECK-BE-LABEL: veculuc
62 ; CHECK-BE: lxsibzx 34, 0, 3
63 ; CHECK-BE-NEXT: xxspltd 34, 34, 0
64 }
65
66 ; Function Attrs: norecurse nounwind readonly
67 define <16 x i8> @vecscuc(i8* nocapture readonly %ptr) {
68 entry:
69 %0 = load i8, i8* %ptr, align 1
70 %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
71 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
72 ret <16 x i8> %splat.splat
73 ; CHECK-LABEL: vecscuc
74 ; CHECK: lxsibzx 34, 0, 3
75 ; CHECK-NEXT: vspltb 2, 2, 7
76 ; CHECK-BE-LABEL: vecscuc
77 ; CHECK-BE: lxsibzx 34, 0, 3
78 ; CHECK-BE-NEXT: vspltb 2, 2, 7
79 }
80
81 ; Function Attrs: norecurse nounwind readonly
82 define <8 x i16> @vecssuc(i8* nocapture readonly %ptr) {
83 entry:
84 %0 = load i8, i8* %ptr, align 1
85 %conv = zext i8 %0 to i16
86 %splat.splatinsert = insertelement <8 x i16> undef, i16 %conv, i32 0
87 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
88 ret <8 x i16> %splat.splat
89 ; CHECK-LABEL: vecssuc
90 ; CHECK: lxsibzx 34, 0, 3
91 ; CHECK-NEXT: vsplth 2, 2, 3
92 ; CHECK-BE-LABEL: vecssuc
93 ; CHECK-BE: lxsibzx 34, 0, 3
94 ; CHECK-BE-NEXT: vsplth 2, 2, 3
95 }
96
97 ; Function Attrs: norecurse nounwind readonly
98 define <4 x i32> @vecsiuc(i8* nocapture readonly %ptr) {
99 entry:
100 %0 = load i8, i8* %ptr, align 1
101 %conv = zext i8 %0 to i32
102 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
103 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
104 ret <4 x i32> %splat.splat
105 ; CHECK-LABEL: vecsiuc
106 ; CHECK: lxsibzx 34, 0, 3
107 ; CHECK-NEXT: xxspltw 34, 34, 1
108 ; CHECK-BE-LABEL: vecsiuc
109 ; CHECK-BE: lxsibzx 34, 0, 3
110 ; CHECK-BE-NEXT: xxspltw 34, 34, 1
111 }
112
113 ; Function Attrs: norecurse nounwind readonly
114 define <2 x i64> @vecsluc(i8* nocapture readonly %ptr) {
115 entry:
116 %0 = load i8, i8* %ptr, align 1
117 %conv = zext i8 %0 to i64
118 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
119 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
120 ret <2 x i64> %splat.splat
121 ; CHECK-LABEL: vecsluc
122 ; CHECK: lxsibzx 34, 0, 3
123 ; CHECK-NEXT: xxspltd 34, 34, 0
124 ; CHECK-BE-LABEL: vecsluc
125 ; CHECK-BE: lxsibzx 34, 0, 3
126 ; CHECK-BE-NEXT: xxspltd 34, 34, 0
127 }
128
129 ; Function Attrs: norecurse nounwind readonly
130 define <4 x float> @vecfuc(i8* nocapture readonly %ptr) {
131 entry:
132 %0 = load i8, i8* %ptr, align 1
133 %conv = uitofp i8 %0 to float
134 %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
135 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
136 ret <4 x float> %splat.splat
137 ; CHECK-LABEL: vecfuc
138 ; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3
139 ; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
140 ; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
141 ; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
142 ; CHECK-BE-LABEL: vecfuc
143 ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
144 ; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
145 ; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
146 ; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
147 }
148
149 ; Function Attrs: norecurse nounwind readonly
150 define <2 x double> @vecduc(i8* nocapture readonly %ptr) {
151 entry:
152 %0 = load i8, i8* %ptr, align 1
153 %conv = uitofp i8 %0 to double
154 %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0
155 %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
156 ret <2 x double> %splat.splat
157 ; CHECK-LABEL: vecduc
158 ; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3
159 ; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
160 ; CHECK-NEXT: xxspltd 34, [[CONVD]], 0
161 ; CHECK-BE-LABEL: vecduc
162 ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
163 ; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
164 ; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0
165 }
166
167 ; Function Attrs: norecurse nounwind readonly
168 define <16 x i8> @vecucsc(i8* nocapture readonly %ptr) {
169 entry:
170 %0 = load i8, i8* %ptr, align 1
171 %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
172 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
173 ret <16 x i8> %splat.splat
174 ; CHECK-LABEL: vecucsc
175 ; CHECK: lxsibzx 34, 0, 3
176 ; CHECK-NEXT: vspltb 2, 2, 7
177 ; CHECK-BE-LABEL: vecucsc
178 ; CHECK-BE: lxsibzx 34, 0, 3
179 ; CHECK-BE-NEXT: vspltb 2, 2, 7
180 }
181
182 ; Function Attrs: norecurse nounwind readonly
183 define <4 x i32> @vecuisc(i8* nocapture readonly %ptr) {
184 entry:
185 %0 = load i8, i8* %ptr, align 1
186 %conv = sext i8 %0 to i32
187 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
188 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
189 ret <4 x i32> %splat.splat
190 ; CHECK-LABEL: vecuisc
191 ; CHECK: lxsibzx 34, 0, 3
192 ; CHECK-NEXT: vextsb2w 2, 2
193 ; CHECK-NEXT: xxspltw 34, 34, 1
194 ; CHECK-BE-LABEL: vecuisc
195 ; CHECK-BE: lxsibzx 34, 0, 3
196 ; CHECK-BE-NEXT: vextsb2w 2, 2
197 ; CHECK-BE-NEXT: xxspltw 34, 34, 1
198 }
199
200 ; Function Attrs: norecurse nounwind readonly
201 define <2 x i64> @veculsc(i8* nocapture readonly %ptr) {
202 entry:
203 %0 = load i8, i8* %ptr, align 1
204 %conv = sext i8 %0 to i64
205 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
206 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
207 ret <2 x i64> %splat.splat
208 ; CHECK-LABEL: veculsc
209 ; CHECK: lxsibzx 34, 0, 3
210 ; CHECK-NEXT: vextsb2d 2, 2
211 ; CHECK-NEXT: xxspltd 34, 34, 0
212 ; CHECK-BE-LABEL: veculsc
213 ; CHECK-BE: lxsibzx 34, 0, 3
214 ; CHECK-BE-NEXT: vextsb2d 2, 2
215 ; CHECK-BE-NEXT: xxspltd 34, 34, 0
216 }
217
218 ; Function Attrs: norecurse nounwind readonly
219 define <16 x i8> @vecscsc(i8* nocapture readonly %ptr) {
220 entry:
221 %0 = load i8, i8* %ptr, align 1
222 %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
223 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
224 ret <16 x i8> %splat.splat
225 ; CHECK-LABEL: vecscsc
226 ; CHECK: lxsibzx 34, 0, 3
227 ; CHECK-NEXT: vspltb 2, 2, 7
228 ; CHECK-BE-LABEL: vecscsc
229 ; CHECK-BE: lxsibzx 34, 0, 3
230 ; CHECK-BE-NEXT: vspltb 2, 2, 7
231 }
232
233 ; Function Attrs: norecurse nounwind readonly
234 define <4 x i32> @vecsisc(i8* nocapture readonly %ptr) {
235 entry:
236 %0 = load i8, i8* %ptr, align 1
237 %conv = sext i8 %0 to i32
238 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
239 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
240 ret <4 x i32> %splat.splat
241 ; CHECK-LABEL: vecsisc
242 ; CHECK: lxsibzx 34, 0, 3
243 ; CHECK-NEXT: vextsb2w 2, 2
244 ; CHECK-NEXT: xxspltw 34, 34, 1
245 ; CHECK-BE-LABEL: vecsisc
246 ; CHECK-BE: lxsibzx 34, 0, 3
247 ; CHECK-BE-NEXT: vextsb2w 2, 2
248 ; CHECK-BE-NEXT: xxspltw 34, 34, 1
249 }
250
251 ; Function Attrs: norecurse nounwind readonly
252 define <2 x i64> @vecslsc(i8* nocapture readonly %ptr) {
253 entry:
254 %0 = load i8, i8* %ptr, align 1
255 %conv = sext i8 %0 to i64
256 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
257 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
258 ret <2 x i64> %splat.splat
259 ; CHECK-LABEL: vecslsc
260 ; CHECK: lxsibzx 34, 0, 3
261 ; CHECK-NEXT: vextsb2d 2, 2
262 ; CHECK-NEXT: xxspltd 34, 34, 0
263 ; CHECK-BE-LABEL: vecslsc
264 ; CHECK-BE: lxsibzx 34, 0, 3
265 ; CHECK-BE-NEXT: vextsb2d 2, 2
266 ; CHECK-BE-NEXT: xxspltd 34, 34, 0
267 }
268
269 ; Function Attrs: norecurse nounwind readonly
270 define <4 x float> @vecfsc(i8* nocapture readonly %ptr) {
271 entry:
272 %0 = load i8, i8* %ptr, align 1
273 %conv = sitofp i8 %0 to float
274 %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
275 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
276 ret <4 x float> %splat.splat
277 ; CHECK-LABEL: vecfsc
278 ; CHECK: lxsibzx
279 ; CHECK-NEXT: vextsb2d
280 ; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
281 ; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
282 ; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
283 ; CHECK-BE-LABEL: vecfsc
284 ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
285 ; CHECK-BE-NEXT: vextsb2d
286 ; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
287 ; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
288 ; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
289 }
290
291 ; Function Attrs: norecurse nounwind readonly
292 define <2 x double> @vecdsc(i8* nocapture readonly %ptr) {
293 entry:
294 %0 = load i8, i8* %ptr, align 1
295 %conv = sitofp i8 %0 to double
296 %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0
297 %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
298 ret <2 x double> %splat.splat
299 ; CHECK-LABEL: vecdsc
300 ; CHECK: lxsibzx
301 ; CHECK-NEXT: vextsb2d
302 ; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]],
303 ; CHECK-NEXT: xxspltd 34, [[CONVD]], 0
304 ; CHECK-BE-LABEL: vecdsc
305 ; CHECK-BE: lxsibzx
306 ; CHECK-BE-NEXT: vextsb2d
307 ; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]],
308 ; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0
309 }
310
311 ; Function Attrs: norecurse nounwind readonly
312 define <16 x i8> @vecucus(i16* nocapture readonly %ptr) {
313 entry:
314 %0 = load i16, i16* %ptr, align 2
315 %conv = trunc i16 %0 to i8
316 %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0
317 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
318 ret <16 x i8> %splat.splat
319 ; CHECK-LABEL: vecucus
320 ; CHECK: lxsibzx 34, 0, 3
321 ; CHECK-NEXT: vspltb 2, 2, 7
322 ; CHECK-BE-LABEL: vecucus
323 ; CHECK-BE: li [[OFFSET:[0-9]+]], 1
324 ; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]]
325 ; CHECK-BE-NEXT: vspltb 2, 2, 7
326 }
327
328 ; Function Attrs: norecurse nounwind readonly
329 define <8 x i16> @vecusus(i16* nocapture readonly %ptr) {
330 entry:
331 %0 = load i16, i16* %ptr, align 2
332 %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0
333 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
334 ret <8 x i16> %splat.splat
335 ; CHECK-LABEL: vecusus
336 ; CHECK: lxsihzx 34, 0, 3
337 ; CHECK-NEXT: vsplth 2, 2, 3
338 ; CHECK-BE-LABEL: vecusus
339 ; CHECK-BE: lxsihzx 34, 0, 3
340 ; CHECK-BE-NEXT: vsplth 2, 2, 3
341 }
342
343 ; Function Attrs: norecurse nounwind readonly
344 define <4 x i32> @vecuius(i16* nocapture readonly %ptr) {
345 entry:
346 %0 = load i16, i16* %ptr, align 2
347 %conv = zext i16 %0 to i32
348 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
349 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
350 ret <4 x i32> %splat.splat
351 ; CHECK-LABEL: vecuius
352 ; CHECK: lxsihzx 34, 0, 3
353 ; CHECK-NEXT: xxspltw 34, 34, 1
354 ; CHECK-BE-LABEL: vecuius
355 ; CHECK-BE: lxsihzx 34, 0, 3
356 ; CHECK-BE-NEXT: xxspltw 34, 34, 1
357 }
358
359 ; Function Attrs: norecurse nounwind readonly
360 define <2 x i64> @veculus(i16* nocapture readonly %ptr) {
361 entry:
362 %0 = load i16, i16* %ptr, align 2
363 %conv = zext i16 %0 to i64
364 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
365 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
366 ret <2 x i64> %splat.splat
367 ; CHECK-LABEL: veculus
368 ; CHECK: lxsihzx 34, 0, 3
369 ; CHECK-NEXT: xxspltd 34, 34, 0
370 ; CHECK-BE-LABEL: veculus
371 ; CHECK-BE: lxsihzx 34, 0, 3
372 ; CHECK-BE-NEXT: xxspltd 34, 34, 0
373 }
374
375 ; Function Attrs: norecurse nounwind readonly
376 define <16 x i8> @vecscus(i16* nocapture readonly %ptr) {
377 entry:
378 %0 = load i16, i16* %ptr, align 2
379 %conv = trunc i16 %0 to i8
380 %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0
381 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
382 ret <16 x i8> %splat.splat
383 ; CHECK-LABEL: vecscus
384 ; CHECK: lxsibzx 34, 0, 3
385 ; CHECK-NEXT: vspltb 2, 2, 7
386 ; CHECK-BE-LABEL: vecscus
387 ; CHECK-BE: li [[OFFSET:[0-9]+]], 1
388 ; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]]
389 ; CHECK-BE-NEXT: vspltb 2, 2, 7
390 }
391
392 ; Function Attrs: norecurse nounwind readonly
393 define <8 x i16> @vecssus(i16* nocapture readonly %ptr) {
394 entry:
395 %0 = load i16, i16* %ptr, align 2
396 %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0
397 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
398 ret <8 x i16> %splat.splat
399 ; CHECK-LABEL: vecssus
400 ; CHECK: lxsihzx 34, 0, 3
401 ; CHECK-NEXT: vsplth 2, 2, 3
402 ; CHECK-BE-LABEL: vecssus
403 ; CHECK-BE: lxsihzx 34, 0, 3
404 ; CHECK-BE-NEXT: vsplth 2, 2, 3
405 }
406
407 ; Function Attrs: norecurse nounwind readonly
408 define <4 x i32> @vecsius(i16* nocapture readonly %ptr) {
409 entry:
410 %0 = load i16, i16* %ptr, align 2
411 %conv = zext i16 %0 to i32
412 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
413 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
414 ret <4 x i32> %splat.splat
415 ; CHECK-LABEL: vecsius
416 ; CHECK: lxsihzx 34, 0, 3
417 ; CHECK-NEXT: xxspltw 34, 34, 1
418 ; CHECK-BE-LABEL: vecsius
419 ; CHECK-BE: lxsihzx 34, 0, 3
420 ; CHECK-BE-NEXT: xxspltw 34, 34, 1
421 }
422
423 ; Function Attrs: norecurse nounwind readonly
424 define <2 x i64> @vecslus(i16* nocapture readonly %ptr) {
425 entry:
426 %0 = load i16, i16* %ptr, align 2
427 %conv = zext i16 %0 to i64
428 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
429 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
430 ret <2 x i64> %splat.splat
431 ; CHECK-LABEL: vecslus
432 ; CHECK: lxsihzx 34, 0, 3
433 ; CHECK-NEXT: xxspltd 34, 34, 0
434 ; CHECK-BE-LABEL: vecslus
435 ; CHECK-BE: lxsihzx 34, 0, 3
436 ; CHECK-BE-NEXT: xxspltd 34, 34, 0
437 }
438
439 ; Function Attrs: norecurse nounwind readonly
440 define <4 x float> @vecfus(i16* nocapture readonly %ptr) {
441 entry:
442 %0 = load i16, i16* %ptr, align 2
443 %conv = uitofp i16 %0 to float
444 %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
445 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
446 ret <4 x float> %splat.splat
447 ; CHECK-LABEL: vecfus
448 ; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3
449 ; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
450 ; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
451 ; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
452 ; CHECK-BE-LABEL: vecfus
453 ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
454 ; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
455 ; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
456 ; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
457 }
458
459 ; Function Attrs: norecurse nounwind readonly
460 define <2 x double> @vecdus(i16* nocapture readonly %ptr) {
461 entry:
462 %0 = load i16, i16* %ptr, align 2
463 %conv = uitofp i16 %0 to double
464 %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0
465 %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
466 ret <2 x double> %splat.splat
467 ; CHECK-LABEL: vecdus
468 ; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3
469 ; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
470 ; CHECK-NEXT: xxspltd 34, [[CONVD]], 0
471 ; CHECK-BE-LABEL: vecdus
472 ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
473 ; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
474 ; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0
475 }
476
477 ; Function Attrs: norecurse nounwind readonly
478 define <16 x i8> @vecucss(i16* nocapture readonly %ptr) {
479 entry:
480 %0 = load i16, i16* %ptr, align 2
481 %conv = trunc i16 %0 to i8
482 %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0
483 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
484 ret <16 x i8> %splat.splat
485 ; CHECK-LABEL: vecucss
486 ; CHECK: lxsibzx 34, 0, 3
487 ; CHECK-NEXT: vspltb 2, 2, 7
488 ; CHECK-BE-LABEL: vecucss
489 ; CHECK-BE: li [[OFFSET:[0-9]+]], 1
490 ; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]]
491 ; CHECK-BE-NEXT: vspltb 2, 2, 7
492 }
493
494 ; Function Attrs: norecurse nounwind readonly
495 define <4 x i32> @vecuiss(i16* nocapture readonly %ptr) {
496 entry:
497 %0 = load i16, i16* %ptr, align 2
498 %conv = sext i16 %0 to i32
499 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
500 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
501 ret <4 x i32> %splat.splat
502 ; CHECK-LABEL: vecuiss
503 ; CHECK: lxsihzx 34, 0, 3
504 ; CHECK-NEXT: vextsh2w 2, 2
505 ; CHECK-NEXT: xxspltw 34, 34, 1
506 ; CHECK-BE-LABEL: vecuiss
507 ; CHECK-BE: lxsihzx 34, 0, 3
508 ; CHECK-BE-NEXT: vextsh2w 2, 2
509 ; CHECK-BE-NEXT: xxspltw 34, 34, 1
510 }
511
512 ; Function Attrs: norecurse nounwind readonly
513 define <2 x i64> @veculss(i16* nocapture readonly %ptr) {
514 entry:
515 %0 = load i16, i16* %ptr, align 2
516 %conv = sext i16 %0 to i64
517 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
518 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
519 ret <2 x i64> %splat.splat
520 ; CHECK-LABEL: veculss
521 ; CHECK: lxsihzx 34, 0, 3
522 ; CHECK-NEXT: vextsh2d 2, 2
523 ; CHECK-NEXT: xxspltd 34, 34, 0
524 ; CHECK-BE-LABEL: veculss
525 ; CHECK-BE: lxsihzx 34, 0, 3
526 ; CHECK-BE-NEXT: vextsh2d 2, 2
527 ; CHECK-BE-NEXT: xxspltd 34, 34, 0
528 }
529
530 ; Function Attrs: norecurse nounwind readonly
531 define <16 x i8> @vecscss(i16* nocapture readonly %ptr) {
532 entry:
533 %0 = load i16, i16* %ptr, align 2
534 %conv = trunc i16 %0 to i8
535 %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0
536 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
537 ret <16 x i8> %splat.splat
538 ; CHECK-LABEL: vecscss
539 ; CHECK: lxsibzx 34, 0, 3
540 ; CHECK-NEXT: vspltb 2, 2, 7
541 ; CHECK-BE-LABEL: vecscss
542 ; CHECK-BE: li [[OFFSET:[0-9]+]], 1
543 ; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]]
544 ; CHECK-BE-NEXT: vspltb 2, 2, 7
545 }
546
547 ; Function Attrs: norecurse nounwind readonly
548 define <4 x i32> @vecsiss(i16* nocapture readonly %ptr) {
549 entry:
550 %0 = load i16, i16* %ptr, align 2
551 %conv = sext i16 %0 to i32
552 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
553 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
554 ret <4 x i32> %splat.splat
555 ; CHECK-LABEL: vecsiss
556 ; CHECK: lxsihzx 34, 0, 3
557 ; CHECK-NEXT: vextsh2w 2, 2
558 ; CHECK-NEXT: xxspltw 34, 34, 1
559 ; CHECK-BE-LABEL: vecsiss
560 ; CHECK-BE: lxsihzx 34, 0, 3
561 ; CHECK-BE-NEXT: vextsh2w 2, 2
562 ; CHECK-BE-NEXT: xxspltw 34, 34, 1
563 }
564
565 ; Function Attrs: norecurse nounwind readonly
566 define <2 x i64> @vecslss(i16* nocapture readonly %ptr) {
567 entry:
568 %0 = load i16, i16* %ptr, align 2
569 %conv = sext i16 %0 to i64
570 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
571 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
572 ret <2 x i64> %splat.splat
573 ; CHECK-LABEL: vecslss
574 ; CHECK: lxsihzx 34, 0, 3
575 ; CHECK-NEXT: vextsh2d 2, 2
576 ; CHECK-NEXT: xxspltd 34, 34, 0
577 ; CHECK-BE-LABEL: vecslss
578 ; CHECK-BE: lxsihzx 34, 0, 3
579 ; CHECK-BE-NEXT: vextsh2d 2, 2
580 ; CHECK-BE-NEXT: xxspltd 34, 34, 0
581 }
582
583 ; Function Attrs: norecurse nounwind readonly
584 define <4 x float> @vecfss(i16* nocapture readonly %ptr) {
585 entry:
586 %0 = load i16, i16* %ptr, align 2
587 %conv = sitofp i16 %0 to float
588 %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
589 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
590 ret <4 x float> %splat.splat
591 ; CHECK-LABEL: vecfss
592 ; CHECK: lxsihzx
593 ; CHECK-NEXT: vextsh2d
594 ; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
595 ; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
596 ; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
597 ; CHECK-BE-LABEL: vecfss
598 ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
599 ; CHECK-BE-NEXT: vextsh2d
600 ; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
601 ; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
602 ; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
603 }
604
605 ; Function Attrs: norecurse nounwind readonly
606 define <2 x double> @vecdss(i16* nocapture readonly %ptr) {
607 entry:
608 %0 = load i16, i16* %ptr, align 2
609 %conv = sitofp i16 %0 to double
610 %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0
611 %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
612 ret <2 x double> %splat.splat
613 ; CHECK-LABEL: vecdss
614 ; CHECK: lxsihzx
615 ; CHECK-NEXT: vextsh2d
616 ; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]],
617 ; CHECK-NEXT: xxspltd 34, [[CONVD]], 0
618 ; CHECK-BE-LABEL: vecdss
619 ; CHECK-BE: lxsihzx
620 ; CHECK-BE-NEXT: vextsh2d
621 ; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]],
622 ; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0
623 }
624
625 ; Function Attrs: norecurse nounwind
626 define void @storefsc(float %f, i8* nocapture %ptr) {
627 entry:
628 %conv = fptosi float %f to i8
629 store i8 %conv, i8* %ptr, align 1
630 ret void
631 ; CHECK-LABEL: storefsc
632 ; CHECK: xscvdpsxws 0, 1
633 ; CHECK: stxsibx 0, 0, 4
634 ; CHECK-BE-LABEL: storefsc
635 ; CHECK-BE: xscvdpsxws 0, 1
636 ; CHECK-BE: stxsibx 0, 0, 4
637 }
638
639 ; Function Attrs: norecurse nounwind
640 define void @storedsc(double %d, i8* nocapture %ptr) {
641 entry:
642 %conv = fptosi double %d to i8
643 store i8 %conv, i8* %ptr, align 1
644 ret void
645 ; CHECK-LABEL: storedsc
646 ; CHECK: xscvdpsxws 0, 1
647 ; CHECK: stxsibx 0, 0, 4
648 ; CHECK-BE-LABEL: storedsc
649 ; CHECK-BE: xscvdpsxws 0, 1
650 ; CHECK-BE: stxsibx 0, 0, 4
651 }
652
653 ; Function Attrs: norecurse nounwind
654 define void @storevcsc0(<16 x i8> %v, i8* nocapture %ptr) {
655 entry:
656 %vecext = extractelement <16 x i8> %v, i32 0
657 store i8 %vecext, i8* %ptr, align 1
658 ret void
659 ; CHECK-LABEL: storevcsc0
660 ; CHECK: vsldoi 2, 2, 2, 8
661 ; CHECK-NEXT: stxsibx 34, 0, 5
662 ; CHECK-BE-LABEL: storevcsc0
663 ; CHECK-BE: vsldoi 2, 2, 2, 9
664 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
665 }
666
667 ; Function Attrs: norecurse nounwind
668 define void @storevcsc1(<16 x i8> %v, i8* nocapture %ptr) {
669 entry:
670 %vecext = extractelement <16 x i8> %v, i32 1
671 store i8 %vecext, i8* %ptr, align 1
672 ret void
673 ; CHECK-LABEL: storevcsc1
674 ; CHECK: vsldoi 2, 2, 2, 7
675 ; CHECK-NEXT: stxsibx 34, 0, 5
676 ; CHECK-BE-LABEL: storevcsc1
677 ; CHECK-BE: vsldoi 2, 2, 2, 10
678 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
679 }
680
681 ; Function Attrs: norecurse nounwind
682 define void @storevcsc2(<16 x i8> %v, i8* nocapture %ptr) {
683 entry:
684 %vecext = extractelement <16 x i8> %v, i32 2
685 store i8 %vecext, i8* %ptr, align 1
686 ret void
687 ; CHECK-LABEL: storevcsc2
688 ; CHECK: vsldoi 2, 2, 2, 6
689 ; CHECK-NEXT: stxsibx 34, 0, 5
690 ; CHECK-BE-LABEL: storevcsc2
691 ; CHECK-BE: vsldoi 2, 2, 2, 11
692 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
693 }
694
695 ; Function Attrs: norecurse nounwind
696 define void @storevcsc3(<16 x i8> %v, i8* nocapture %ptr) {
697 entry:
698 %vecext = extractelement <16 x i8> %v, i32 3
699 store i8 %vecext, i8* %ptr, align 1
700 ret void
701 ; CHECK-LABEL: storevcsc3
702 ; CHECK: vsldoi 2, 2, 2, 5
703 ; CHECK-NEXT: stxsibx 34, 0, 5
704 ; CHECK-BE-LABEL: storevcsc3
705 ; CHECK-BE: vsldoi 2, 2, 2, 12
706 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
707 }
708
709 ; Function Attrs: norecurse nounwind
710 define void @storevcsc4(<16 x i8> %v, i8* nocapture %ptr) {
711 entry:
712 %vecext = extractelement <16 x i8> %v, i32 4
713 store i8 %vecext, i8* %ptr, align 1
714 ret void
715 ; CHECK-LABEL: storevcsc4
716 ; CHECK: vsldoi 2, 2, 2, 4
717 ; CHECK-NEXT: stxsibx 34, 0, 5
718 ; CHECK-BE-LABEL: storevcsc4
719 ; CHECK-BE: vsldoi 2, 2, 2, 13
720 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
721 }
722
723 ; Function Attrs: norecurse nounwind
724 define void @storevcsc5(<16 x i8> %v, i8* nocapture %ptr) {
725 entry:
726 %vecext = extractelement <16 x i8> %v, i32 5
727 store i8 %vecext, i8* %ptr, align 1
728 ret void
729 ; CHECK-LABEL: storevcsc5
730 ; CHECK: vsldoi 2, 2, 2, 3
731 ; CHECK-NEXT: stxsibx 34, 0, 5
732 ; CHECK-BE-LABEL: storevcsc5
733 ; CHECK-BE: vsldoi 2, 2, 2, 14
734 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
735 }
736
737 ; Function Attrs: norecurse nounwind
738 define void @storevcsc6(<16 x i8> %v, i8* nocapture %ptr) {
739 entry:
740 %vecext = extractelement <16 x i8> %v, i32 6
741 store i8 %vecext, i8* %ptr, align 1
742 ret void
743 ; CHECK-LABEL: storevcsc6
744 ; CHECK: vsldoi 2, 2, 2, 2
745 ; CHECK-NEXT: stxsibx 34, 0, 5
746 ; CHECK-BE-LABEL: storevcsc6
747 ; CHECK-BE: vsldoi 2, 2, 2, 15
748 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
749 }
750
751 ; Function Attrs: norecurse nounwind
752 define void @storevcsc7(<16 x i8> %v, i8* nocapture %ptr) {
753 entry:
754 %vecext = extractelement <16 x i8> %v, i32 7
755 store i8 %vecext, i8* %ptr, align 1
756 ret void
757 ; CHECK-LABEL: storevcsc7
758 ; CHECK: vsldoi 2, 2, 2, 1
759 ; CHECK-NEXT: stxsibx 34, 0, 5
760 ; CHECK-BE-LABEL: storevcsc7
761 ; CHECK-BE: stxsibx 34, 0, 5
762 }
763
764 ; Function Attrs: norecurse nounwind
765 define void @storevcsc8(<16 x i8> %v, i8* nocapture %ptr) {
766 entry:
767 %vecext = extractelement <16 x i8> %v, i32 8
768 store i8 %vecext, i8* %ptr, align 1
769 ret void
770 ; CHECK-LABEL: storevcsc8
771 ; CHECK: stxsibx 34, 0, 5
772 ; CHECK-BE-LABEL: storevcsc8
773 ; CHECK-BE: vsldoi 2, 2, 2, 1
774 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
775 }
776
777 ; Function Attrs: norecurse nounwind
778 define void @storevcsc9(<16 x i8> %v, i8* nocapture %ptr) {
779 entry:
780 %vecext = extractelement <16 x i8> %v, i32 9
781 store i8 %vecext, i8* %ptr, align 1
782 ret void
783 ; CHECK-LABEL: storevcsc9
784 ; CHECK: vsldoi 2, 2, 2, 15
785 ; CHECK-NEXT: stxsibx 34, 0, 5
786 ; CHECK-BE-LABEL: storevcsc9
787 ; CHECK-BE: vsldoi 2, 2, 2, 2
788 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
789 }
790
791 ; Function Attrs: norecurse nounwind
792 define void @storevcsc10(<16 x i8> %v, i8* nocapture %ptr) {
793 entry:
794 %vecext = extractelement <16 x i8> %v, i32 10
795 store i8 %vecext, i8* %ptr, align 1
796 ret void
797 ; CHECK-LABEL: storevcsc10
798 ; CHECK: vsldoi 2, 2, 2, 14
799 ; CHECK-NEXT: stxsibx 34, 0, 5
800 ; CHECK-BE-LABEL: storevcsc10
801 ; CHECK-BE: vsldoi 2, 2, 2, 3
802 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
803 }
804
805 ; Function Attrs: norecurse nounwind
806 define void @storevcsc11(<16 x i8> %v, i8* nocapture %ptr) {
807 entry:
808 %vecext = extractelement <16 x i8> %v, i32 11
809 store i8 %vecext, i8* %ptr, align 1
810 ret void
811 ; CHECK-LABEL: storevcsc11
812 ; CHECK: vsldoi 2, 2, 2, 13
813 ; CHECK-NEXT: stxsibx 34, 0, 5
814 ; CHECK-BE-LABEL: storevcsc11
815 ; CHECK-BE: vsldoi 2, 2, 2, 4
816 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
817 }
818
819 ; Function Attrs: norecurse nounwind
820 define void @storevcsc12(<16 x i8> %v, i8* nocapture %ptr) {
821 entry:
822 %vecext = extractelement <16 x i8> %v, i32 12
823 store i8 %vecext, i8* %ptr, align 1
824 ret void
825 ; CHECK-LABEL: storevcsc12
826 ; CHECK: vsldoi 2, 2, 2, 12
827 ; CHECK-NEXT: stxsibx 34, 0, 5
828 ; CHECK-BE-LABEL: storevcsc12
829 ; CHECK-BE: vsldoi 2, 2, 2, 5
830 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
831 }
832
833 ; Function Attrs: norecurse nounwind
834 define void @storevcsc13(<16 x i8> %v, i8* nocapture %ptr) {
835 entry:
836 %vecext = extractelement <16 x i8> %v, i32 13
837 store i8 %vecext, i8* %ptr, align 1
838 ret void
839 ; CHECK-LABEL: storevcsc13
840 ; CHECK: vsldoi 2, 2, 2, 11
841 ; CHECK-NEXT: stxsibx 34, 0, 5
842 ; CHECK-BE-LABEL: storevcsc13
843 ; CHECK-BE: vsldoi 2, 2, 2, 6
844 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
845 }
846
847 ; Function Attrs: norecurse nounwind
848 define void @storevcsc14(<16 x i8> %v, i8* nocapture %ptr) {
849 entry:
850 %vecext = extractelement <16 x i8> %v, i32 14
851 store i8 %vecext, i8* %ptr, align 1
852 ret void
853 ; CHECK-LABEL: storevcsc14
854 ; CHECK: vsldoi 2, 2, 2, 10
855 ; CHECK-NEXT: stxsibx 34, 0, 5
856 ; CHECK-BE-LABEL: storevcsc14
857 ; CHECK-BE: vsldoi 2, 2, 2, 7
858 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
859 }
860
861 ; Function Attrs: norecurse nounwind
862 define void @storevcsc15(<16 x i8> %v, i8* nocapture %ptr) {
863 entry:
864 %vecext = extractelement <16 x i8> %v, i32 15
865 store i8 %vecext, i8* %ptr, align 1
866 ret void
867 ; CHECK-LABEL: storevcsc15
868 ; CHECK: vsldoi 2, 2, 2, 9
869 ; CHECK-NEXT: stxsibx 34, 0, 5
870 ; CHECK-BE-LABEL: storevcsc15
871 ; CHECK-BE: vsldoi 2, 2, 2, 8
872 ; CHECK-BE-NEXT: stxsibx 34, 0, 5
873 }
874
875 ; Function Attrs: norecurse nounwind
876 define void @storefss(float %f, i16* nocapture %ptr) {
877 entry:
878 %conv = fptosi float %f to i16
879 store i16 %conv, i16* %ptr, align 2
880 ret void
881 ; CHECK-LABEL: storefss
882 ; CHECK: xscvdpsxws 0, 1
883 ; CHECK: stxsihx 0, 0, 4
884 ; CHECK-BE-LABEL: storefss
885 ; CHECK-BE: xscvdpsxws 0, 1
886 ; CHECK-BE: stxsihx 0, 0, 4
887 }
888
889 ; Function Attrs: norecurse nounwind
890 define void @storedss(double %d, i16* nocapture %ptr) {
891 entry:
892 %conv = fptosi double %d to i16
893 store i16 %conv, i16* %ptr, align 2
894 ret void
895 ; CHECK-LABEL: storedss
896 ; CHECK: xscvdpsxws 0, 1
897 ; CHECK: stxsihx 0, 0, 4
898 ; CHECK-BE-LABEL: storedss
899 ; CHECK-BE: xscvdpsxws 0, 1
900 ; CHECK-BE: stxsihx 0, 0, 4
901 }
902
903 ; Function Attrs: norecurse nounwind
904 define void @storevsss0(<8 x i16> %v, i16* nocapture %ptr) {
905 entry:
906 %vecext = extractelement <8 x i16> %v, i32 0
907 store i16 %vecext, i16* %ptr, align 2
908 ret void
909 ; CHECK-LABEL: storevsss0
910 ; CHECK: vsldoi 2, 2, 2, 8
911 ; CHECK-NEXT: stxsihx 34, 0, 5
912 ; CHECK-BE-LABEL: storevsss0
913 ; CHECK-BE: vsldoi 2, 2, 2, 10
914 ; CHECK-BE-NEXT: stxsihx 34, 0, 5
915 }
916
917 ; Function Attrs: norecurse nounwind
918 define void @storevsss1(<8 x i16> %v, i16* nocapture %ptr) {
919 entry:
920 %vecext = extractelement <8 x i16> %v, i32 1
921 store i16 %vecext, i16* %ptr, align 2
922 ret void
923 ; CHECK-LABEL: storevsss1
924 ; CHECK: vsldoi 2, 2, 2, 6
925 ; CHECK-NEXT: stxsihx 34, 0, 5
926 ; CHECK-BE-LABEL: storevsss1
927 ; CHECK-BE: vsldoi 2, 2, 2, 12
928 ; CHECK-BE-NEXT: stxsihx 34, 0, 5
929 }
930
931 ; Function Attrs: norecurse nounwind
932 define void @storevsss2(<8 x i16> %v, i16* nocapture %ptr) {
933 entry:
934 %vecext = extractelement <8 x i16> %v, i32 2
935 store i16 %vecext, i16* %ptr, align 2
936 ret void
937 ; CHECK-LABEL: storevsss2
938 ; CHECK: vsldoi 2, 2, 2, 4
939 ; CHECK-NEXT: stxsihx 34, 0, 5
940 ; CHECK-BE-LABEL: storevsss2
941 ; CHECK-BE: vsldoi 2, 2, 2, 14
942 ; CHECK-BE-NEXT: stxsihx 34, 0, 5
943 }
944
945 ; Function Attrs: norecurse nounwind
946 define void @storevsss3(<8 x i16> %v, i16* nocapture %ptr) {
947 entry:
948 %vecext = extractelement <8 x i16> %v, i32 3
949 store i16 %vecext, i16* %ptr, align 2
950 ret void
951 ; CHECK-LABEL: storevsss3
952 ; CHECK: vsldoi 2, 2, 2, 2
953 ; CHECK-NEXT: stxsihx 34, 0, 5
954 ; CHECK-BE-LABEL: storevsss3
955 ; CHECK-BE: stxsihx 34, 0, 5
956 }
957
958 ; Function Attrs: norecurse nounwind
959 define void @storevsss4(<8 x i16> %v, i16* nocapture %ptr) {
960 entry:
961 %vecext = extractelement <8 x i16> %v, i32 4
962 store i16 %vecext, i16* %ptr, align 2
963 ret void
964 ; CHECK-LABEL: storevsss4
965 ; CHECK: stxsihx 34, 0, 5
966 ; CHECK-BE-LABEL: storevsss4
967 ; CHECK-BE: vsldoi 2, 2, 2, 2
968 ; CHECK-BE-NEXT: stxsihx 34, 0, 5
969 }
970
971 ; Function Attrs: norecurse nounwind
972 define void @storevsss5(<8 x i16> %v, i16* nocapture %ptr) {
973 entry:
974 %vecext = extractelement <8 x i16> %v, i32 5
975 store i16 %vecext, i16* %ptr, align 2
976 ret void
977 ; CHECK-LABEL: storevsss5
978 ; CHECK: vsldoi 2, 2, 2, 14
979 ; CHECK-NEXT: stxsihx 34, 0, 5
980 ; CHECK-BE-LABEL: storevsss5
981 ; CHECK-BE: vsldoi 2, 2, 2, 4
982 ; CHECK-BE-NEXT: stxsihx 34, 0, 5
983 }
984
985 ; Function Attrs: norecurse nounwind
986 define void @storevsss6(<8 x i16> %v, i16* nocapture %ptr) {
987 entry:
988 %vecext = extractelement <8 x i16> %v, i32 6
989 store i16 %vecext, i16* %ptr, align 2
990 ret void
991 ; CHECK-LABEL: storevsss6
992 ; CHECK: vsldoi 2, 2, 2, 12
993 ; CHECK-NEXT: stxsihx 34, 0, 5
994 ; CHECK-BE-LABEL: storevsss6
995 ; CHECK-BE: vsldoi 2, 2, 2, 6
996 ; CHECK-BE-NEXT: stxsihx 34, 0, 5
997 }
998
999 ; Function Attrs: norecurse nounwind
1000 define void @storevsss7(<8 x i16> %v, i16* nocapture %ptr) {
1001 entry:
1002 %vecext = extractelement <8 x i16> %v, i32 7
1003 store i16 %vecext, i16* %ptr, align 2
1004 ret void
1005 ; CHECK-LABEL: storevsss7
1006 ; CHECK: vsldoi 2, 2, 2, 10
1007 ; CHECK-NEXT: stxsihx 34, 0, 5
1008 ; CHECK-BE-LABEL: storevsss7
1009 ; CHECK-BE: vsldoi 2, 2, 2, 8
1010 ; CHECK-BE-NEXT: stxsihx 34, 0, 5
1011 }
1012
1013 ; Function Attrs: norecurse nounwind readonly
1014 define float @convscf(i8* nocapture readonly %ptr) {
1015 entry:
1016 %0 = load i8, i8* %ptr, align 1
1017 %conv = sitofp i8 %0 to float
1018 ret float %conv
1019 ; CHECK-LABEL: convscf
1020 ; CHECK: lxsibzx 34, 0, 3
1021 ; CHECK-NEXT: vextsb2d 2, 2
1022 ; CHECK-NEXT: xscvsxdsp 1, 34
1023 ; CHECK-BE-LABEL: convscf
1024 ; CHECK-BE: lxsibzx 34, 0, 3
1025 ; CHECK-BE-NEXT: vextsb2d 2, 2
1026 ; CHECK-BE-NEXT: xscvsxdsp 1, 34
1027 }
1028
1029 ; Function Attrs: norecurse nounwind readonly
1030 define float @convucf(i8* nocapture readonly %ptr) {
1031 entry:
1032 %0 = load i8, i8* %ptr, align 1
1033 %conv = uitofp i8 %0 to float
1034 ret float %conv
1035 ; CHECK-LABEL: convucf
1036 ; CHECK: lxsibzx 0, 0, 3
1037 ; CHECK-NEXT: xscvuxdsp 1, 0
1038 ; CHECK-BE-LABEL: convucf
1039 ; CHECK-BE: lxsibzx 0, 0, 3
1040 ; CHECK-BE-NEXT: xscvuxdsp 1, 0
1041 }
1042
1043 ; Function Attrs: norecurse nounwind readonly
1044 define double @convscd(i8* nocapture readonly %ptr) {
1045 entry:
1046 %0 = load i8, i8* %ptr, align 1
1047 %conv = sitofp i8 %0 to double
1048 ; CHECK-LABEL: convscd
1049 ; CHECK: lxsibzx 34, 0, 3
1050 ; CHECK-NEXT: vextsb2d 2, 2
1051 ; CHECK-NEXT: xscvsxddp 1, 34
1052 ; CHECK-BE-LABEL: convscd
1053 ; CHECK-BE: lxsibzx 34, 0, 3
1054 ; CHECK-BE-NEXT: vextsb2d 2, 2
1055 ; CHECK-BE-NEXT: xscvsxddp 1, 34
1056 ret double %conv
1057 }
1058
1059 ; Function Attrs: norecurse nounwind readonly
1060 define double @convucd(i8* nocapture readonly %ptr) {
1061 entry:
1062 %0 = load i8, i8* %ptr, align 1
1063 %conv = uitofp i8 %0 to double
1064 ret double %conv
1065 ; CHECK-LABEL: convucd
1066 ; CHECK: lxsibzx 0, 0, 3
1067 ; CHECK-NEXT: xscvuxddp 1, 0
1068 ; CHECK-BE-LABEL: convucd
1069 ; CHECK-BE: lxsibzx 0, 0, 3
1070 ; CHECK-BE-NEXT: xscvuxddp 1, 0
1071 }
1072
1073 ; Function Attrs: norecurse nounwind readonly
1074 define float @convssf(i16* nocapture readonly %ptr) {
1075 entry:
1076 %0 = load i16, i16* %ptr, align 2
1077 %conv = sitofp i16 %0 to float
1078 ret float %conv
1079 ; CHECK-LABEL: convssf
1080 ; CHECK: lxsihzx 34, 0, 3
1081 ; CHECK-NEXT: vextsh2d 2, 2
1082 ; CHECK-NEXT: xscvsxdsp 1, 34
1083 ; CHECK-BE-LABEL: convssf
1084 ; CHECK-BE: lxsihzx 34, 0, 3
1085 ; CHECK-BE-NEXT: vextsh2d 2, 2
1086 ; CHECK-BE-NEXT: xscvsxdsp 1, 34
1087 }
1088
1089 ; Function Attrs: norecurse nounwind readonly
1090 define float @convusf(i16* nocapture readonly %ptr) {
1091 entry:
1092 %0 = load i16, i16* %ptr, align 2
1093 %conv = uitofp i16 %0 to float
1094 ret float %conv
1095 ; CHECK-LABEL: convusf
1096 ; CHECK: lxsihzx 0, 0, 3
1097 ; CHECK-NEXT: xscvuxdsp 1, 0
1098 ; CHECK-BE-LABEL: convusf
1099 ; CHECK-BE: lxsihzx 0, 0, 3
1100 ; CHECK-BE-NEXT: xscvuxdsp 1, 0
1101 }
1102
1103 ; Function Attrs: norecurse nounwind readonly
1104 define double @convssd(i16* nocapture readonly %ptr) {
1105 entry:
1106 %0 = load i16, i16* %ptr, align 2
1107 %conv = sitofp i16 %0 to double
1108 ret double %conv
1109 ; CHECK-LABEL: convssd
1110 ; CHECK: lxsihzx 34, 0, 3
1111 ; CHECK-NEXT: vextsh2d 2, 2
1112 ; CHECK-NEXT: xscvsxddp 1, 34
1113 ; CHECK-BE-LABEL: convssd
1114 ; CHECK-BE: lxsihzx 34, 0, 3
1115 ; CHECK-BE-NEXT: vextsh2d 2, 2
1116 ; CHECK-BE-NEXT: xscvsxddp 1, 34
1117 }
1118
1119 ; Function Attrs: norecurse nounwind readonly
1120 define double @convusd(i16* nocapture readonly %ptr) {
1121 entry:
1122 %0 = load i16, i16* %ptr, align 2
1123 %conv = uitofp i16 %0 to double
1124 ret double %conv
1125 ; CHECK-LABEL: convusd
1126 ; CHECK: lxsihzx 0, 0, 3
1127 ; CHECK-NEXT: xscvuxddp 1, 0
1128 ; CHECK-BE-LABEL: convusd
1129 ; CHECK-BE: lxsihzx 0, 0, 3
1130 ; CHECK-BE-NEXT: xscvuxddp 1, 0
1131 }
33
44 @.str1 = external unnamed_addr constant [5 x i8], align 1
55 @.str10 = external unnamed_addr constant [9 x i8], align 1
6 @.v2f64 = external unnamed_addr constant <2 x double>, align 16
67
78 ; Function Attrs: nounwind
89 define void @main() #0 {
1112 ; CHECK: stxvd2x
1213
1314 entry:
15 %val = load <2 x double>, <2 x double>* @.v2f64, align 16
1416 %0 = tail call <8 x i16> @llvm.ppc.altivec.vupkhsb(<16 x i8> ) #0
1517 %1 = tail call <8 x i16> @llvm.ppc.altivec.vupklsb(<16 x i8> ) #0
1618 br i1 false, label %if.then.i68.i, label %check.exit69.i
2224 br i1 undef, label %if.then.i63.i, label %check.exit64.i
2325
2426 if.then.i63.i: ; preds = %check.exit69.i
25 tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0)) #0
27 tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0), <2 x double> %val) #0
2628 br label %check.exit64.i
2729
2830 check.exit64.i: ; preds = %if.then.i63.i, %check.exit69.i
0 ; RUN: llc < %s -march=ppc64 -mattr=+vsx -verify-machineinstrs | \
1 ; RUN: FileCheck %s --check-prefix=VSX
2 ; RUN: llc < %s -march=ppc64 -mattr=-vsx -verify-machineinstrs | \
3 ; RUN: FileCheck %s --check-prefix=NOVSX
4
5 define <2 x double> @interleaving_VSX_VMX(
6 <2 x double> %a, <2 x double> %b, <2 x double> %c,
7 <2 x double> %d, <2 x double> %e, <2 x double> %f) {
8 entry:
9 tail call void asm sideeffect "# clobbers",
10 "~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind
11 tail call void @goo(<2 x double> %a) nounwind
12 %add = fadd <2 x double> %a, %b
13 %sub = fsub <2 x double> %a, %b
14 %mul = fmul <2 x double> %add, %sub
15 %add1 = fadd <2 x double> %c, %d
16 %sub2 = fsub <2 x double> %c, %d
17 %mul3 = fmul <2 x double> %add1, %sub2
18 %add4 = fadd <2 x double> %mul, %mul3
19 %add5 = fadd <2 x double> %e, %f
20 %sub6 = fsub <2 x double> %e, %f
21 %mul7 = fmul <2 x double> %add5, %sub6
22 %add8 = fadd <2 x double> %add4, %mul7
23 ret <2 x double> %add8
24 ; VSX-LABEL: interleaving_VSX_VMX
25 ; VSX-NOT: stvx
26 ; VSX-NOT: lvx
27
28 ; NOVSX-LABEL: interleaving_VSX_VMX
29 ; NOVSX-NOT: stxvd2x
30 ; NOVSX-NOT: lxvd2x
31 }
32
33 declare void @goo(<2 x double>)
6969 ; CHECK-REG: blr
7070
7171 ; CHECK-FISL-LABEL: @test5
72 ; CHECK-FISL: vor
73 ; CHECK-FISL: vor
74 ; CHECK-FISL: xxlxor
75 ; CHECK-FISL: vor 2
72 ; CHECK-FISL: xxlxor 34, 34, 35
7673 ; CHECK-FISL: blr
7774
7875 ; CHECK-LE-LABEL: @test5
9087 ; CHECK-REG: blr
9188
9289 ; CHECK-FISL-LABEL: @test6
93 ; CHECK-FISL: vor 4, 2, 2
94 ; CHECK-FISL: vor 5, 3, 3
95 ; CHECK-FISL: xxlxor 36, 36, 37
96 ; CHECK-FISL: vor 2, 4, 4
90 ; CHECK-FISL: xxlxor 34, 34, 35
9791 ; CHECK-FISL: blr
9892
9993 ; CHECK-LE-LABEL: @test6
111105 ; CHECK-REG: blr
112106
113107 ; CHECK-FISL-LABEL: @test7
114 ; CHECK-FISL: vor 4, 2, 2
115 ; CHECK-FISL: vor 5, 3, 3
116 ; CHECK-FISL: xxlxor 36, 36, 37
117 ; CHECK-FISL: vor 2, 4, 4
108 ; CHECK-FISL: xxlxor 34, 34, 35
118109 ; CHECK-FISL: blr
119110
120111 ; CHECK-LE-LABEL: @test7
132123 ; CHECK-REG: blr
133124
134125 ; CHECK-FISL-LABEL: @test8
135 ; CHECK-FISL: vor
136 ; CHECK-FISL: vor
137 ; CHECK-FISL: xxlor
138 ; CHECK-FISL: vor 2
126 ; CHECK-FISL: xxlor 34, 34, 35
139127 ; CHECK-FISL: blr
140128
141129 ; CHECK-LE-LABEL: @test8
153141 ; CHECK-REG: blr
154142
155143 ; CHECK-FISL-LABEL: @test9
156 ; CHECK-FISL: vor 4, 2, 2
157 ; CHECK-FISL: vor 5, 3, 3
158 ; CHECK-FISL: xxlor 36, 36, 37
159 ; CHECK-FISL: vor 2, 4, 4
144 ; CHECK-FISL: xxlor 34, 34, 35
160145 ; CHECK-FISL: blr
161146
162147 ; CHECK-LE-LABEL: @test9
174159 ; CHECK-REG: blr
175160
176161 ; CHECK-FISL-LABEL: @test10
177 ; CHECK-FISL: vor 4, 2, 2
178 ; CHECK-FISL: vor 5, 3, 3
179 ; CHECK-FISL: xxlor 36, 36, 37
180 ; CHECK-FISL: vor 2, 4, 4
162 ; CHECK-FISL: xxlor 34, 34, 35
181163 ; CHECK-FISL: blr
182164
183165 ; CHECK-LE-LABEL: @test10
195177 ; CHECK-REG: blr
196178
197179 ; CHECK-FISL-LABEL: @test11
198 ; CHECK-FISL: vor
199 ; CHECK-FISL: vor
200 ; CHECK-FISL: xxland
201 ; CHECK-FISL: vor 2
180 ; CHECK-FISL: xxland 34, 34, 35
202181 ; CHECK-FISL: blr
203182
204183 ; CHECK-LE-LABEL: @test11
216195 ; CHECK-REG: blr
217196
218197 ; CHECK-FISL-LABEL: @test12
219 ; CHECK-FISL: vor 4, 2, 2
220 ; CHECK-FISL: vor 5, 3, 3
221 ; CHECK-FISL: xxland 36, 36, 37
222 ; CHECK-FISL: vor 2, 4, 4
198 ; CHECK-FISL: xxland 34, 34, 35
223199 ; CHECK-FISL: blr
224200
225201 ; CHECK-LE-LABEL: @test12
237213 ; CHECK-REG: blr
238214
239215 ; CHECK-FISL-LABEL: @test13
240 ; CHECK-FISL: vor 4, 2, 2
241 ; CHECK-FISL: vor 5, 3, 3
242 ; CHECK-FISL: xxland 36, 36, 37
243 ; CHECK-FISL: vor 2, 4, 4
216 ; CHECK-FISL: xxland 34, 34, 35
244217 ; CHECK-FISL: blr
245218
246219 ; CHECK-LE-LABEL: @test13
259232 ; CHECK-REG: blr
260233
261234 ; CHECK-FISL-LABEL: @test14
262 ; CHECK-FISL: vor 4, 3, 3
263 ; CHECK-FISL: vor 5, 2, 2
264 ; CHECK-FISL: xxlor 0, 37, 36
265 ; CHECK-FISL: xxlnor 36, 37, 36
266 ; CHECK-FISL: vor 2, 4, 4
235 ; CHECK-FISL: xxlor 0, 34, 35
236 ; CHECK-FISL: xxlnor 34, 34, 35
267237 ; CHECK-FISL: lis 0, -1
268238 ; CHECK-FISL: ori 0, 0, 65520
269239 ; CHECK-FISL: stxvd2x 0, 1, 0
285255 ; CHECK-REG: blr
286256
287257 ; CHECK-FISL-LABEL: @test15
288 ; CHECK-FISL: vor 4, 2, 2
289 ; CHECK-FISL: vor 5, 3, 3
290 ; CHECK-FISL: xxlor 36, 36, 37
291 ; CHECK-FISL: vor 0, 4, 4
292 ; CHECK-FISL: vor 4, 2, 2
293 ; CHECK-FISL: vor 5, 3, 3
294 ; CHECK-FISL: xxlnor 36, 36, 37
295 ; CHECK-FISL: vor 2, 4, 4
258 ; CHECK-FISL: xxlor 0, 34, 35
259 ; CHECK-FISL: xxlor 36, 0, 0
260 ; CHECK-FISL: xxlnor 0, 34, 35
261 ; CHECK-FISL: xxlor 34, 0, 0
296262 ; CHECK-FISL: lis 0, -1
297263 ; CHECK-FISL: ori 0, 0, 65520
298 ; CHECK-FISL: stvx 0, 1, 0
264 ; CHECK-FISL: stxvd2x 36, 1, 0
299265 ; CHECK-FISL: blr
300266
301267 ; CHECK-LE-LABEL: @test15
314280 ; CHECK-REG: blr
315281
316282 ; CHECK-FISL-LABEL: @test16
317 ; CHECK-FISL: vor 4, 2, 2
318 ; CHECK-FISL: vor 5, 3, 3
319 ; CHECK-FISL: xxlor 36, 36, 37
320 ; CHECK-FISL: vor 0, 4, 4
321 ; CHECK-FISL: vor 4, 2, 2
322 ; CHECK-FISL: vor 5, 3, 3
323 ; CHECK-FISL: xxlnor 36, 36, 37
324 ; CHECK-FISL: vor 2, 4, 4
283 ; CHECK-FISL: xxlor 0, 34, 35
284 ; CHECK-FISL: xxlor 36, 0, 0
285 ; CHECK-FISL: xxlnor 0, 34, 35
286 ; CHECK-FISL: xxlor 34, 0, 0
325287 ; CHECK-FISL: lis 0, -1
326288 ; CHECK-FISL: ori 0, 0, 65520
327 ; CHECK-FISL: stvx 0, 1, 0
289 ; CHECK-FISL: stxvd2x 36, 1, 0
328290 ; CHECK-FISL: blr
329291
330292 ; CHECK-LE-LABEL: @test16
343305 ; CHECK-REG: blr
344306
345307 ; CHECK-FISL-LABEL: @test17
346 ; CHECK-FISL: vor 4, 3, 3
347 ; CHECK-FISL: vor 5, 2, 2
348 ; CHECK-FISL: xxlnor 36, 36, 36
349 ; CHECK-FISL: xxland 36, 37, 36
350 ; CHECK-FISL: vor 2, 4, 4
308 ; CHECK-FISL: xxlnor 35, 35, 35
309 ; CHECK-FISL: xxland 34, 34, 35
351310 ; CHECK-FISL: blr
352311
353312 ; CHECK-LE-LABEL: @test17
366325 ; CHECK-REG: blr
367326
368327 ; CHECK-FISL-LABEL: @test18
369 ; CHECK-FISL: vor 4, 3, 3
370 ; CHECK-FISL: vor 5, 3, 3
371 ; CHECK-FISL: xxlnor 36, 36, 37
372 ; CHECK-FISL: vor 0, 4, 4
373 ; CHECK-FISL: vor 4, 2, 2
374 ; CHECK-FISL: vor 5, 3, 3
375 ; CHECK-FISL: xxlandc 36, 36, 37
376 ; CHECK-FISL: vor 2, 4, 4
328 ; CHECK-FISL: xxlnor 0, 35, 35
329 ; CHECK-FISL: xxlor 36, 0, 0
330 ; CHECK-FISL: xxlandc 0, 34, 35
331 ; CHECK-FISL: xxlor 34, 0, 0
377332 ; CHECK-FISL: lis 0, -1
378333 ; CHECK-FISL: ori 0, 0, 65520
379 ; CHECK-FISL: stvx 0, 1, 0
334 ; CHECK-FISL: stxvd2x 36, 1, 0
380335 ; CHECK-FISL: blr
381336
382337 ; CHECK-LE-LABEL: @test18
395350 ; CHECK-REG: blr
396351
397352 ; CHECK-FISL-LABEL: @test19
398 ; CHECK-FISL: vor 4, 3, 3
399 ; CHECK-FISL: vor 5, 3, 3
400 ; CHECK-FISL: xxlnor 36, 36, 37
401 ; CHECK-FISL: vor 0, 4, 4
402 ; CHECK-FISL: vor 4, 2, 2
403 ; CHECK-FISL: vor 5, 3, 3
404 ; CHECK-FISL: xxlandc 36, 36, 37
405 ; CHECK-FISL: vor 2, 4, 4
353 ; CHECK-FISL: xxlnor 0, 35, 35
354 ; CHECK-FISL: xxlor 36, 0, 0
355 ; CHECK-FISL: xxlandc 0, 34, 35
356 ; CHECK-FISL: xxlor 34, 0, 0
406357 ; CHECK-FISL: lis 0, -1
407358 ; CHECK-FISL: ori 0, 0, 65520
408 ; CHECK-FISL: stvx 0, 1, 0
359 ; CHECK-FISL: stxvd2x 36, 1, 0
409360 ; CHECK-FISL: blr
410361
411362 ; CHECK-LE-LABEL: @test19
424375 ; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}}
425376 ; CHECK-REG: blr
426377
427 ; FIXME: The fast-isel code is pretty miserable for this one.
428
429378 ; CHECK-FISL-LABEL: @test20
430 ; CHECK-FISL: vor 0, 5, 5
431 ; CHECK-FISL: vor 1, 4, 4
432 ; CHECK-FISL: vor 6, 3, 3
433 ; CHECK-FISL: vor 7, 2, 2
434 ; CHECK-FISL: vor 2, 1, 1
435 ; CHECK-FISL: vor 3, 0, 0
436 ; CHECK-FISL: vcmpequw 2, 2, 3
437 ; CHECK-FISL: vor 0, 2, 2
438 ; CHECK-FISL: xxsel 32, 38, 39, 32
439 ; CHECK-FISL: vor 2, 0, 0
379 ; CHECK-FISL: vcmpequw {{[0-9]+}}, 4, 5
380 ; CHECK-FISL: xxsel 34, 35, 34, {{[0-9]+}}
440381 ; CHECK-FISL: blr
441382
442383 ; CHECK-LE-LABEL: @test20
457398 ; CHECK-REG: blr
458399
459400 ; CHECK-FISL-LABEL: @test21
460 ; CHECK-FISL: vor 0, 5, 5
461 ; CHECK-FISL: vor 1, 4, 4
462 ; CHECK-FISL: vor 6, 3, 3
463 ; CHECK-FISL: vor 7, 2, 2
464 ; CHECK-FISL: xvcmpeqsp 32, 33, 32
465 ; CHECK-FISL: xxsel 32, 38, 39, 32
466 ; CHECK-FISL: vor 2, 0, 0
401 ; CHECK-FISL: xvcmpeqsp [[V1:[0-9]+]], 36, 37
402 ; CHECK-FISL: xxsel 34, 35, 34, [[V1]]
467403 ; CHECK-FISL: blr
468404
469405 ; CHECK-LE-LABEL: @test21
490426 ; CHECK-REG: blr
491427
492428 ; CHECK-FISL-LABEL: @test22
493 ; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 32
494 ; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 32, 32
495 ; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 33
429 ; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37
430 ; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36
431 ; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37
496432 ; CHECK-FISL-DAG: xxlnor
497433 ; CHECK-FISL-DAG: xxlnor
498434 ; CHECK-FISL-DAG: xxlor
499435 ; CHECK-FISL-DAG: xxlor
500 ; CHECK-FISL: xxsel 0, 38, 39, {{[0-9]+}}
436 ; CHECK-FISL: xxsel 34, 35, 34, {{[0-9]+}}
501437 ; CHECK-FISL: blr
502438
503439 ; CHECK-LE-LABEL: @test22
525461
526462 ; CHECK-FISL-LABEL: @test23
527463 ; CHECK-FISL: vcmpequh 4, 4, 5
528 ; CHECK-FISL: vor 0, 3, 3
529 ; CHECK-FISL: vor 1, 2, 2
530 ; CHECK-FISL: vor 6, 4, 4
531 ; CHECK-FISL: xxsel 32, 32, 33, 38
532 ; CHECK-FISL: vor 2, 0,
464 ; CHECK-FISL: xxsel 34, 35, 34, 36
533465 ; CHECK-FISL: blr
534466
535467 ; CHECK-LE-LABEL: @test23
551483
552484 ; CHECK-FISL-LABEL: @test24
553485 ; CHECK-FISL: vcmpequb 4, 4, 5
554 ; CHECK-FISL: vor 0, 3, 3
555 ; CHECK-FISL: vor 1, 2, 2
556 ; CHECK-FISL: vor 6, 4, 4
557 ; CHECK-FISL: xxsel 32, 32, 33, 38
558 ; CHECK-FISL: vor 2, 0, 0
486 ; CHECK-FISL: xxsel 34, 35, 34, 36
559487 ; CHECK-FISL: blr
560488
561489 ; CHECK-LE-LABEL: @test24
681609 ; CHECK-FISL-LABEL: @test30
682610 ; CHECK-FISL: lxvd2x 0, 0, 3
683611 ; CHECK-FISL: xxlor 34, 0, 0
684 ; CHECK-FISL: vor 3, 2, 2
685 ; CHECK-FISL: vor 2, 3, 3
686612 ; CHECK-FISL: blr
687613
688614 ; CHECK-LE-LABEL: @test30
714640 ; CHECK-REG: blr
715641
716642 ; CHECK-FISL-LABEL: @test32
717 ; CHECK-FISL: lxvw4x 0, 0, 3
718 ; CHECK-FISL: xxlor 34, 0, 0
643 ; CHECK-FISL: lxvw4x 34, 0, 3
719644 ; CHECK-FISL: blr
720645
721646 ; CHECK-LE-LABEL: @test32
733658 ; CHECK-REG: blr
734659
735660 ; CHECK-FISL-LABEL: @test33
736 ; CHECK-FISL: vor 3, 2, 2
737 ; CHECK-FISL: stxvw4x 35, 0, 3
661 ; CHECK-FISL: stxvw4x 34, 0, 3
738662 ; CHECK-FISL: blr
739663
740664 ; CHECK-LE-LABEL: @test33
769693 ; CHECK-REG: blr
770694
771695 ; CHECK-FISL-LABEL: @test33u
772 ; CHECK-FISL: vor 3, 2, 2
773 ; CHECK-FISL: stxvw4x 35, 0, 3
696 ; CHECK-FISL: stxvw4x 34, 0, 3
774697 ; CHECK-FISL: blr
775698
776699 ; CHECK-LE-LABEL: @test33u
788711 ; CHECK-REG: blr
789712
790713 ; CHECK-FISL-LABEL: @test34
791 ; CHECK-FISL: lxvw4x 0, 0, 3
792 ; CHECK-FISL: xxlor 34, 0, 0
714 ; CHECK-FISL: lxvw4x 34, 0, 3
793715 ; CHECK-FISL: blr
794716
795717 ; CHECK-LE-LABEL: @test34
807729 ; CHECK-REG: blr
808730
809731 ; CHECK-FISL-LABEL: @test35
810 ; CHECK-FISL: vor 3, 2, 2
811 ; CHECK-FISL: stxvw4x 35, 0, 3
732 ; CHECK-FISL: stxvw4x 34, 0, 3
812733 ; CHECK-FISL: blr
813734
814735 ;