llvm.org GIT mirror llvm / ffc5401
[AMDGPU] gfx1010 allows VOP3 to have a literal Differential Revision: https://reviews.llvm.org/D61413 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359756 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 1 year, 5 months ago
18 changed file(s) with 629 addition(s) and 120 deletion(s). Raw diff Collapse all Expand all
6262
6363 def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
6464 [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
65 >;
66
67 def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
68 [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
6965 >;
7066
7167 //===----------------------------------------------------------------------===//
201197
202198 // out = (src1 > src0) ? 1 : 0
203199 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
204
205 // TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own
206 // nodes in TargetSelectionDAG.td.
207 def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>;
208
209 def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>;
210200
211201 def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
212202 SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
248238
249239 // Special case divide FMA with scale and flags (src0 = Quotient,
250240 // src1 = Denominator, src2 = Numerator).
251 def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
241 def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
242 [SDNPOptInGlue]>;
252243
253244 // Single or double precision division fixup.
254245 // Special case divide fixup and flags(src0 = Quotient, src1 =
235235 }
236236
237237 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
238 return isRegClass(RCID) || isInlinableImm(type);
238 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
239239 }
240240
241241 bool isRegOrImmWithInt16InputMods() const {
460460 }
461461
462462 bool isVSrcB32() const {
463 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
463 return isVCSrcF32() || isLiteralImm(MVT::i32);
464464 }
465465
466466 bool isVSrcB64() const {
472472 }
473473
474474 bool isVSrcV2B16() const {
475 llvm_unreachable("cannot happen");
476 return isVSrcB16();
475 return isVSrcB16() || isLiteralImm(MVT::v2i16);
477476 }
478477
479478 bool isVSrcF32() const {
480 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
479 return isVCSrcF32() || isLiteralImm(MVT::f32);
481480 }
482481
483482 bool isVSrcF64() const {
489488 }
490489
491490 bool isVSrcV2F16() const {
492 llvm_unreachable("cannot happen");
493 return isVSrcF16();
491 return isVSrcF16() || isLiteralImm(MVT::v2f16);
494492 }
495493
496494 bool isKImmFP32() const {
11441142 bool validateMIMGD16(const MCInst &Inst);
11451143 bool validateMIMGDim(const MCInst &Inst);
11461144 bool validateLdsDirect(const MCInst &Inst);
1145 bool validateVOP3Literal(const MCInst &Inst) const;
11471146 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
11481147 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
11491148 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
12861285 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
12871286 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
12881287 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1288 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1289 case AMDGPU::OPERAND_REG_IMM_V2FP16:
12891290 return &APFloat::IEEEhalf();
12901291 default:
12911292 llvm_unreachable("unsupported fp type");
14181419 return false;
14191420 }
14201421
1422 // We allow fp literals with f16x2 operands assuming that the specified
1423 // literal goes into the lower half and the upper half is zero. We also
1424 // require that the literal may be losslesly converted to f16.
1425 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1426 (type == MVT::v2i16)? MVT::i16 : type;
1427
14211428 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1422 return canLosslesslyConvertToFPType(FPLiteral, type);
1429 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
14231430 }
14241431
14251432 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
15341541 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
15351542 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
15361543 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1537 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1544 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1545 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1546 case AMDGPU::OPERAND_REG_IMM_V2FP16: {
15381547 bool lost;
15391548 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
15401549 // Convert literal to single precision
15611570 case AMDGPU::OPERAND_REG_IMM_FP32:
15621571 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
15631572 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1573 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1574 case AMDGPU::OPERAND_REG_IMM_V2FP16:
15641575 if (isSafeTruncation(Val, 32) &&
15651576 AMDGPU::isInlinableLiteral32(static_cast(Val),
15661577 AsmParser->hasInv2PiInlineImm())) {
24182429 case 2: {
24192430 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
24202431 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2421 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2432 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2433 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2434 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
24222435 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
24232436 } else {
24242437 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
29182931 return NumLiterals <= 1;
29192932 }
29202933
2934 // VOP3 literal is only allowed in GFX10+ and only one can be used
2935 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
2936 unsigned Opcode = Inst.getOpcode();
2937 const MCInstrDesc &Desc = MII.get(Opcode);
2938 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
2939 return true;
2940
2941 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2942 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2943 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2944
2945 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2946
2947 unsigned NumLiterals = 0;
2948 uint32_t LiteralValue;
2949
2950 for (int OpIdx : OpIndices) {
2951 if (OpIdx == -1) break;
2952
2953 const MCOperand &MO = Inst.getOperand(OpIdx);
2954 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
2955 continue;
2956
2957 if (!isInlineConstant(Inst, OpIdx)) {
2958 uint32_t Value = static_cast(MO.getImm());
2959 if (NumLiterals == 0 || LiteralValue != Value) {
2960 LiteralValue = Value;
2961 ++NumLiterals;
2962 }
2963 }
2964 }
2965
2966 return !NumLiterals ||
2967 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
2968 }
2969
29212970 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
29222971 const SMLoc &IDLoc) {
29232972 if (!validateLdsDirect(Inst)) {
29282977 if (!validateSOPLiteral(Inst)) {
29292978 Error(IDLoc,
29302979 "only one literal operand is allowed");
2980 return false;
2981 }
2982 if (!validateVOP3Literal(Inst)) {
2983 Error(IDLoc,
2984 "invalid literal operand");
29312985 return false;
29322986 }
29332987 if (!validateConstantBusLimitations(Inst)) {
617617 case AMDGPU::OPERAND_REG_IMM_FP16:
618618 printImmediate16(Op.getImm(), STI, O);
619619 break;
620 case AMDGPU::OPERAND_REG_IMM_V2INT16:
621 case AMDGPU::OPERAND_REG_IMM_V2FP16:
622 if (!isUInt<16>(Op.getImm()) &&
623 STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
624 printImmediate32(Op.getImm(), STI, O);
625 break;
626 }
627 LLVM_FALLTHROUGH;
620628 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
621629 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
622630 printImmediateV216(Op.getImm(), STI, O);
248248 // which does not have f16 support?
249249 return getLit16Encoding(static_cast(Imm), STI);
250250
251 case AMDGPU::OPERAND_REG_IMM_V2INT16:
252 case AMDGPU::OPERAND_REG_IMM_V2FP16:
253 if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
254 return getLit32Encoding(static_cast(Imm), STI);
255 LLVM_FALLTHROUGH;
251256 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
252257 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
253258 uint16_t Lo16 = static_cast(Imm);
164164
165165 static bool updateOperand(FoldCandidate &Fold,
166166 const SIInstrInfo &TII,
167 const TargetRegisterInfo &TRI) {
167 const TargetRegisterInfo &TRI,
168 const GCNSubtarget &ST) {
168169 MachineInstr *MI = Fold.UseMI;
169170 MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
170171 assert(Old.isReg());
171172
172173 if (Fold.isImm()) {
173 if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
174 if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
175 AMDGPU::isInlinableLiteralV216(static_cast(Fold.ImmToFold),
176 ST.hasInv2PiInlineImm())) {
174177 // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
175178 // already set.
176179 unsigned Opcode = MI->getOpcode();
191194 // Only apply the following transformation if that operand requries
192195 // a packed immediate.
193196 switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
197 case AMDGPU::OPERAND_REG_IMM_V2FP16:
198 case AMDGPU::OPERAND_REG_IMM_V2INT16:
194199 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
195200 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
196201 // If upper part is all zero we do not need op_sel_hi.
202207 return true;
203208 }
204209 Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
210 Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
211 return true;
205212 }
206213 break;
207214 default:
890897 Copy->addImplicitDefUseOperands(*MF);
891898
892899 for (FoldCandidate &Fold : FoldList) {
893 if (updateOperand(Fold, *TII, *TRI)) {
900 if (updateOperand(Fold, *TII, *TRI, *ST)) {
894901 // Clear kill flags.
895902 if (Fold.isReg()) {
896903 assert(Fold.OpToFold && Fold.OpToFold->isReg());
25482548
25492549 return false;
25502550 }
2551 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2552 case AMDGPU::OPERAND_REG_IMM_V2FP16:
25512553 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
25522554 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
2553 if (isUInt<16>(Imm)) {
2554 int16_t Trunc = static_cast(Imm);
2555 return ST.has16BitInsts() &&
2556 AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
2557 }
2558 if (!(Imm & 0xffff)) {
2559 return ST.has16BitInsts() &&
2560 AMDGPU::isInlinableLiteral16(Imm >> 16, ST.hasInv2PiInlineImm());
2561 }
25622555 uint32_t Trunc = static_cast(Imm);
2563 return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
2556 return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
25642557 }
25652558 default:
25662559 llvm_unreachable("invalid bitwidth");
26022595
26032596 bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
26042597 const MachineOperand &MO) const {
2605 const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
2598 const MCInstrDesc &InstDesc = MI.getDesc();
2599 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
26062600
26072601 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
26082602
26152609 if (MO.isImm() && isInlineConstant(MO, OpInfo))
26162610 return RI.opCanUseInlineConstant(OpInfo.OperandType);
26172611
2618 return RI.opCanUseLiteralConstant(OpInfo.OperandType);
2612 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
2613 return false;
2614
2615 if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
2616 return true;
2617
2618 const MachineFunction *MF = MI.getParent()->getParent();
2619 const GCNSubtarget &ST = MF->getSubtarget();
2620 return ST.hasVOP3Literal();
26192621 }
26202622
26212623 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
35993601 MachineOperand &Src1 = MI.getOperand(Src1Idx);
36003602
36013603 // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
3602 // we need to only have one constant bus use.
3603 //
3604 // Note we do not need to worry about literal constants here. They are
3605 // disabled for the operand type for instructions because they will always
3606 // violate the one constant bus use rule.
3604 // we need to only have one constant bus use before GFX10.
36073605 bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
36083606 if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1) {
36093607 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
36103608 MachineOperand &Src0 = MI.getOperand(Src0Idx);
36113609
3612 if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
3610 if (Src0.isReg() && (RI.isSGPRReg(MRI, Src0.getReg()) ||
3611 isLiteralConstantLike(Src0, InstrDesc.OpInfo[Src0Idx])))
36133612 legalizeOpWithMove(MI, Src0Idx);
36143613 }
36153614
37013700 Src1.setSubReg(Src0SubReg);
37023701 }
37033702
3704 // Legalize VOP3 operands. Because all operand types are supported for any
3705 // operand, and since literal constants are not allowed and should never be
3706 // seen, we only need to worry about inserting copies if we use multiple SGPR
3707 // operands.
3703 // Legalize VOP3 operands. All operand types are supported for any operand
3704 // but only one literal constant and only starting from GFX10.
37083705 void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
37093706 MachineInstr &MI) const {
37103707 unsigned Opc = MI.getOpcode();
57315728 SIEncodingFamily Gen = subtargetEncodingFamily(ST);
57325729
57335730 if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
5734 ST.getGeneration() >= AMDGPUSubtarget::GFX9)
5731 ST.getGeneration() == AMDGPUSubtarget::GFX9)
57355732 Gen = SIEncodingFamily::GFX9;
57365733
5737 if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
5738 Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
5739 : SIEncodingFamily::SDWA;
57405734 // Adjust the encoding family to GFX80 for D16 buffer instructions when the
57415735 // subtarget has UnpackedD16VMem feature.
57425736 // TODO: remove this when we discard GFX80 encoding.
57435737 if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
57445738 Gen = SIEncodingFamily::GFX80;
5739
5740 if (get(Opcode).TSFlags & SIInstrFlags::SDWA) {
5741 switch (ST.getGeneration()) {
5742 default:
5743 Gen = SIEncodingFamily::SDWA;
5744 break;
5745 case AMDGPUSubtarget::GFX9:
5746 Gen = SIEncodingFamily::SDWA9;
5747 break;
5748 case AMDGPUSubtarget::GFX10:
5749 Gen = SIEncodingFamily::SDWA10;
5750 break;
5751 }
5752 }
57455753
57465754 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
57475755
12081208 !if(!eq(VT.Value, f16.Value),
12091209 VSrc_f16,
12101210 !if(!eq(VT.Value, v2f16.Value),
1211 VCSrc_v2f16,
1211 VSrc_v2f16,
12121212 VSrc_f32
12131213 )
12141214 )
12181218 !if(!eq(VT.Value, i16.Value),
12191219 VSrc_b16,
12201220 !if(!eq(VT.Value, v2i16.Value),
1221 VCSrc_v2b16,
1221 VSrc_v2b16,
12221222 VSrc_b32
12231223 )
12241224 )
12541254 VSrc_128,
12551255 !if(!eq(VT.Size, 64),
12561256 !if(isFP,
1257 VCSrc_f64,
1258 VCSrc_b64),
1257 VSrc_f64,
1258 VSrc_b64),
12591259 !if(!eq(VT.Value, i1.Value),
12601260 SCSrc_i1,
12611261 !if(isFP,
12621262 !if(!eq(VT.Value, f16.Value),
1263 VCSrc_f16,
1263 VSrc_f16,
12641264 !if(!eq(VT.Value, v2f16.Value),
1265 VCSrc_v2f16,
1266 VCSrc_f32
1265 VSrc_v2f16,
1266 VSrc_f32
12671267 )
12681268 ),
12691269 !if(!eq(VT.Value, i16.Value),
1270 VCSrc_b16,
1270 VSrc_b16,
12711271 !if(!eq(VT.Value, v2i16.Value),
1272 VCSrc_v2b16,
1273 VCSrc_b32
1272 VSrc_v2b16,
1273 VSrc_b32
12741274 )
12751275 )
12761276 )
None ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX9 %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX10 %s
12
23 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_1:
3 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
4 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
45 define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) {
56 bb:
67 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1314 }
1415
1516 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_0:
16 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
17 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
1718 define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) {
1819 bb:
1920 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
2627 }
2728
2829 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_1:
29 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
30 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
3031 define amdgpu_kernel void @test_pk_max_f16_literal_1_1(<2 x half> addrspace(1)* nocapture %arg) {
3132 bb:
3233 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
3940 }
4041
4142 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1:
42 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
43 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
4344 define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) {
4445 bb:
4546 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
5253 }
5354
5455 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_0:
55 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
56 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
5657 define amdgpu_kernel void @test_pk_max_f16_literal_m1_0(<2 x half> addrspace(1)* nocapture %arg) {
5758 bb:
5859 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
6566 }
6667
6768 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_m1:
68 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
69 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
6970 define amdgpu_kernel void @test_pk_max_f16_literal_m1_m1(<2 x half> addrspace(1)* nocapture %arg) {
7071 bb:
7172 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
7879 }
7980
8081 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_0:
81 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
82 ; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
8283 define amdgpu_kernel void @test_pk_max_f16_literal_0_0(<2 x half> addrspace(1)* nocapture %arg) {
8384 bb:
8485 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
9091 ret void
9192 }
9293
94 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8:
95 ; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c80000
96 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
97 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}}
98 define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(<2 x half> addrspace(1)* nocapture %arg) {
99 bb:
100 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
101 %tmp1 = zext i32 %tmp to i64
102 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
103 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
104 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> )
105 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
106 ret void
107 }
108
109 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_41c8_0:
110 ; GFX9: s_movk_i32 [[C:s[0-9]+]], 0x41c8
111 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
112 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}}{{$}}
113 define amdgpu_kernel void @test_pk_max_f16_literal_41c8_0(<2 x half> addrspace(1)* nocapture %arg) {
114 bb:
115 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
116 %tmp1 = zext i32 %tmp to i64
117 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
118 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
119 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> )
120 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
121 ret void
122 }
123
124 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_42ca_41c8:
125 ; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c842ca
126 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
127 ; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c842ca, v{{[0-9]+}}{{$}}
128 define amdgpu_kernel void @test_pk_max_f16_literal_42ca_41c8(<2 x half> addrspace(1)* nocapture %arg) {
129 bb:
130 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
131 %tmp1 = zext i32 %tmp to i64
132 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
133 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
134 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> )
135 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
136 ret void
137 }
138
93139 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
94140 declare i32 @llvm.amdgcn.workitem.id.x()
4040 s_mov_b32 s0, foo+2
4141 // VI: s_mov_b32 s0, 514 ; encoding: [0xff,0x00,0x80,0xbe,0x02,0x02,0x00,0x00]
4242
43 v_mul_f32 v0, foo+2, v2
44 // VI: v_mul_f32_e32 v0, 514, v2 ; encoding: [0xff,0x04,0x00,0x0a,0x02,0x02,0x00,0x00]
45
4643 BB1:
4744 v_nop_e64
4845 BB2:
7976
8077 v=1
8178 v_sin_f32 v0, -v
82 // VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e]
79 // NOVI: error: invalid operand for instruction
8380
81 v=1
8482 v_sin_f32 v0, -v[0]
8583 // VI: v_sin_f32_e64 v0, -v0 ; encoding: [0x00,0x00,0x69,0xd1,0x00,0x01,0x00,0x20]
8684
8785 s=1
88 v_sin_f32 v0, -s
89 // VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e]
86 s_not_b32 s0, -s
87 // VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe]
9088
9189 s0=1
92 v_sin_f32 v0, -s0
93 // VI: v_sin_f32_e64 v0, -s0 ; encoding: [0x00,0x00,0x69,0xd1,0x00,0x00,0x00,0x20]
90 s_not_b32 s0, -s0
91 // VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe]
9492
9593 ttmp=1
96 v_sin_f32 v0, -ttmp
97 // VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e]
94 s_not_b32 s0, -ttmp
95 // VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe]
9896
9997 ttmp0=1
100 v_sin_f32 v0, -[ttmp0]
101 // VI: v_sin_f32_e64 v0, -ttmp0 ; encoding: [0x00,0x00,0x69,0xd1,0x70,0x00,0x00,0x20]
98 s_not_b32 s0, -[ttmp0]
99 // VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe]
0 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s
1 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s
2
3 //-----------------------------------------------------------------------------------------
4 // On GFX10 we can use two scalar operands
5
6 v_add_f32 v0, s0, s1
7 // GFX10: v_add_f32_e64 v0, s0, s1 ; encoding: [0x00,0x00,0x03,0xd5,0x00,0x02,0x00,0x00]
8
9 v_madak_f32 v0, s0, v1, 42.42
10 // GFX10: v_madak_f32 v0, s0, v1, 0x4229ae14 ; encoding: [0x00,0x02,0x00,0x42,0x14,0xae,0x29,0x42]
11
12 v_med3_f32 v0, s0, s0, s1
13 // GFX10: v_med3_f32 v0, s0, s0, s1 ; encoding: [0x00,0x00,0x57,0xd5,0x00,0x00,0x04,0x00]
14
15 //-----------------------------------------------------------------------------------------
16 // v_div_fmas implicitly reads VCC, so only one scalar operand is possible
17
18 v_div_fmas_f32 v5, s3, s3, s3
19 // GFX10: v_div_fmas_f32 v5, s3, s3, s3 ; encoding: [0x05,0x00,0x6f,0xd5,0x03,0x06,0x0c,0x00]
20
21 v_div_fmas_f32 v5, s3, s3, s2
22 // GFX10-ERR: error: invalid operand (violates constant bus restrictions)
23
24 v_div_fmas_f32 v5, s3, 0x123, v3
25 // GFX10-ERR: error: invalid operand (violates constant bus restrictions)
26
27 v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678
28 // GFX10: v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678 ; encoding: [0x05,0x00,0x70,0xd5,0xff,0xfe,0xfd,0x03,0x78,0x56,0x34,0x12]
29
30 v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4]
31 // GFX10: v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4] ; encoding: [0x05,0x00,0x70,0xd5,0x01,0x05,0x0c,0x04]
32
33 v_div_fmas_f64 v[5:6], v[1:2], s[2:3], 0x123456
34 // GFX10-ERR: error: invalid operand (violates constant bus restrictions)
281281 // GFX89: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00]
282282 v_fract_f64_e32 v[0:1], 1234
283283
284 // NOSICI: error: invalid operand for instruction
285 // NOGFX89: error: invalid operand for instruction
284 // NOSICI: error: invalid literal operand
285 // NOGFX89: error: invalid literal operand
286286 v_trunc_f32_e64 v0, 1234
287287
288 // NOSICI: error: invalid operand for instruction
289 // NOGFX89: error: invalid operand for instruction
288 // NOSICI: error: invalid literal operand
289 // NOGFX89: error: invalid literal operand
290290 v_fract_f64_e64 v[0:1], 1234
291291
292292 // SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
377377 // GFX89: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00]
378378 v_and_b32_e32 v0, 1234, v1
379379
380 // NOSICI: error: invalid operand for instruction
381 // NOGFX89: error: invalid operand for instruction
380 // NOSICI: error: invalid literal operand
381 // NOGFX89: error: invalid literal operand
382382 v_and_b32_e64 v0, 1234, v1
383383
384384 // SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff]
449449 // GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
450450 v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882
451451
452 // NOSICI: error: invalid operand for instruction
452 // NOSICI: error: invalid literal operand
453453 // GFX89: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
454454 v_trunc_f32_e64 v0, 0x3e22f983
455455
456 // NOSICI: error: invalid operand for instruction
457 // NOGFX89: error: invalid operand for instruction
456 // NOSICI: error: invalid literal operand
457 // NOGFX89: error: invalid literal operand
458458 v_fract_f64_e64 v[0:1], 0x3e22f983
459459
460460 // NOSICI: error: invalid operand for instruction
465465 // GFX89: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x26]
466466 v_and_b32_e32 v0, 0.159154943091895317852646485335, v1
467467
468 // NOSICI: error: invalid operand for instruction
468 // NOSICI: error: invalid literal operand
469469 // GFX89: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
470470 v_and_b32_e64 v0, 0.159154943091895317852646485335, v1
471471
0 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
11
22 v_pk_add_f16 v1, -17, v2
3 // GFX9: error: invalid operand for instruction
3 // GFX9: error: invalid literal operand
44
55 v_pk_add_f16 v1, 65, v2
6 // GFX9: error: invalid operand for instruction
6 // GFX9: error: invalid literal operand
77
88 v_pk_add_f16 v1, 64.0, v2
9 // GFX9: error: invalid operand for instruction
9 // GFX9: error: invalid literal operand
1010
1111 v_pk_add_f16 v1, -0.15915494, v2
12 // GFX9: error: invalid operand for instruction
12 // GFX9: error: invalid literal operand
1313
1414 v_pk_add_f16 v1, -0.0, v2
15 // GFX9: error: invalid operand for instruction
15 // GFX9: error: invalid literal operand
1616
1717 v_pk_add_f16 v1, -32768, v2
18 // GFX9: error: invalid operand for instruction
18 // GFX9: error: invalid literal operand
1919
2020 v_pk_add_f16 v1, 32767, v2
21 // GFX9: error: invalid operand for instruction
21 // GFX9: error: invalid literal operand
2222
2323 v_pk_add_f16 v1, 0xffffffffffff000f, v2
24 // GFX9: error: invalid operand for instruction
24 // GFX9: error: invalid literal operand
2525
2626 v_pk_add_f16 v1, 0x1000ffff, v2
27 // GFX9: error: invalid operand for instruction
27 // GFX9: error: invalid literal operand
None // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
0 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9
1 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10
2
3 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX9
4 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX10
5
6 //===----------------------------------------------------------------------===//
7 // Inline constants
8 //===----------------------------------------------------------------------===//
19
210 v_pk_add_f16 v1, 0, v2
311 // GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
12 // GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18]
413
514 v_pk_add_f16 v1, 0.0, v2
615 // GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
16 // GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18]
717
818 v_pk_add_f16 v1, v2, 0
919 // GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
20 // GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18]
1021
1122 v_pk_add_f16 v1, v2, 0.0
1223 // GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
24 // GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18]
1325
1426 v_pk_add_f16 v1, 1.0, v2
1527 // GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
28 // GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18]
1629
1730 v_pk_add_f16 v1, -1.0, v2
1831 // GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
32 // GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18]
1933
2034 v_pk_add_f16 v1, -0.5, v2
2135 // GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
36 // GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18]
2237
2338 v_pk_add_f16 v1, 0.5, v2
2439 // GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
40 // GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18]
2541
2642 v_pk_add_f16 v1, 2.0, v2
2743 // GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
44 // GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18]
2845
2946 v_pk_add_f16 v1, -2.0, v2
3047 // GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
48 // GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18]
3149
3250 v_pk_add_f16 v1, 4.0, v2
3351 // GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
52 // GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18]
3453
3554 v_pk_add_f16 v1, -4.0, v2
3655 // GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
56 // GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18]
3757
3858 v_pk_add_f16 v1, 0.15915494, v2
3959 // GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
60 // GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18]
4061
4162 v_pk_add_f16 v1, -1, v2
4263 // GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
64 // GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18]
4365
4466 v_pk_add_f16 v1, -2, v2
4567 // GFX9: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc2,0x04,0x02,0x18]
68 // GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18]
4669
4770 v_pk_add_f16 v1, -3, v2
4871 // GFX9: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc3,0x04,0x02,0x18]
72 // GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18]
4973
5074 v_pk_add_f16 v1, -16, v2
5175 // GFX9: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xd0,0x04,0x02,0x18]
76 // GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18]
5277
5378 v_pk_add_f16 v1, 1, v2
5479 // GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
80 // GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18]
5581
5682 v_pk_add_f16 v1, 2, v2
5783 // GFX9: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x82,0x04,0x02,0x18]
84 // GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18]
5885
5986 v_pk_add_f16 v1, 3, v2
6087 // GFX9: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x83,0x04,0x02,0x18]
88 // GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18]
6189
6290 v_pk_add_f16 v1, 4, v2
6391 // GFX9: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x84,0x04,0x02,0x18]
92 // GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18]
6493
6594 v_pk_add_f16 v1, 15, v2
6695 // GFX9: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x8f,0x04,0x02,0x18]
96 // GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18]
6797
6898 v_pk_add_f16 v1, 16, v2
6999 // GFX9: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x90,0x04,0x02,0x18]
100 // GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18]
70101
71102 v_pk_add_f16 v1, 63, v2
72103 // GFX9: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xbf,0x04,0x02,0x18]
104 // GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18]
73105
74106 v_pk_add_f16 v1, 64, v2
75107 // GFX9: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc0,0x04,0x02,0x18]
108 // GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18]
76109
77110 v_pk_add_f16 v1, 0x0001, v2
78111 // GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
112 // GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18]
79113
80114 v_pk_add_f16 v1, 0xffff, v2
81115 // GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
116 // GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18]
82117
83118 v_pk_add_f16 v1, 0x3c00, v2
84119 // GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
120 // GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18]
85121
86122 v_pk_add_f16 v1, 0xbc00, v2
87123 // GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
124 // GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18]
88125
89126 v_pk_add_f16 v1, 0x3800, v2
90127 // GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
128 // GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18]
91129
92130 v_pk_add_f16 v1, 0xb800, v2
93131 // GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
132 // GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18]
94133
95134 v_pk_add_f16 v1, 0x4000, v2
96135 // GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
136 // GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18]
97137
98138 v_pk_add_f16 v1, 0xc000, v2
99139 // GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
140 // GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18]
100141
101142 v_pk_add_f16 v1, 0x4400, v2
102143 // GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
144 // GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18]
103145
104146 v_pk_add_f16 v1, 0xc400, v2
105147 // GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
148 // GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18]
106149
107150 v_pk_add_f16 v1, 0x3118, v2
108151 // GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
152 // GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18]
109153
110154 v_pk_add_f16 v1, 65535, v2
111155 // GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
156 // GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18]
157
158 //===----------------------------------------------------------------------===//
159 // Integer literals
160 //===----------------------------------------------------------------------===//
161
162 v_pk_add_f16 v5, v1, 0x12345678
163 // NOGFX9: error: invalid literal operand
164 // GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
165
166 v_pk_add_f16 v5, 0x12345678, v2
167 // NOGFX9: error: invalid literal operand
168 // GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
169
170 v_pk_add_f16 v5, -256, v2
171 // NOGFX9: error: invalid literal operand
172 // GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff]
173
174 v_pk_add_f16 v5, v1, 256
175 // NOGFX9: error: invalid literal operand
176 // GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00]
177
178 v_pk_add_u16 v5, v1, 0x12345678
179 // NOGFX9: error: invalid literal operand
180 // GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
181
182 v_pk_add_u16 v5, 0x12345678, v2
183 // NOGFX9: error: invalid literal operand
184 // GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
185
186 v_pk_add_u16 v5, -256, v2
187 // NOGFX9: error: invalid literal operand
188 // GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff]
189
190 v_pk_add_u16 v5, v1, 256
191 // NOGFX9: error: invalid literal operand
192 // GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00]
193
194 v_pk_add_f16 v5, v1, 0x123456780
195 // NOGFX9: error: invalid operand for instruction
196 // NOGFX10: error: invalid operand for instruction
197
198 v_pk_add_u16 v5, v1, 0x123456780
199 // NOGFX9: error: invalid operand for instruction
200 // NOGFX10: error: invalid operand for instruction
201
202 v_pk_fma_f16 v5, 0xaf123456, v2, v3
203 // NOGFX9: error: invalid literal operand
204 // GFX10: v_pk_fma_f16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf]
205
206 v_pk_fma_f16 v5, v1, 0xaf123456, v3
207 // NOGFX9: error: invalid literal operand
208 // GFX10: v_pk_fma_f16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf]
209
210 v_pk_fma_f16 v5, v1, v2, 0xaf123456
211 // NOGFX9: error: invalid literal operand
212 // GFX10: v_pk_fma_f16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf]
213
214 v_pk_mad_i16 v5, 0xaf123456, v2, v3
215 // NOGFX9: error: invalid literal operand
216 // GFX10: v_pk_mad_i16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf]
217
218 v_pk_mad_i16 v5, v1, 0xaf123456, v3
219 // NOGFX9: error: invalid literal operand
220 // GFX10: v_pk_mad_i16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf]
221
222 v_pk_mad_i16 v5, v1, v2, 0xaf123456
223 // NOGFX9: error: invalid literal operand
224 // GFX10: v_pk_mad_i16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf]
225
226 v_pk_ashrrev_i16 v5, 0x12345678, v2
227 // NOGFX9: error: invalid literal operand
228 // GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
229
230 v_pk_ashrrev_i16 v5, v1, 0x12345678
231 // NOGFX9: error: invalid literal operand
232 // GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
233
234 //===----------------------------------------------------------------------===//
235 // Floating-point literals (allowed if lossless conversion to f16 is possible)
236 //===----------------------------------------------------------------------===//
237
238 v_pk_add_f16 v5, v1, 0.1234
239 // NOGFX9: error: invalid literal operand
240 // GFX10: v_pk_add_f16 v5, v1, 0x2fe6 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00]
241
242 v_pk_add_u16 v5, v1, 0.1234
243 // NOGFX9: error: invalid literal operand
244 // GFX10: v_pk_add_u16 v5, v1, 0x2fe6 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00]
245
246 v_pk_fma_f16 v5, 0.1234, v2, v3
247 // NOGFX9: error: invalid literal operand
248 // GFX10: v_pk_fma_f16 v5, 0x2fe6, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0xe6,0x2f,0x00,0x00]
249
250 v_pk_fma_f16 v5, v1, 0.1234, v3
251 // NOGFX9: error: invalid literal operand
252 // GFX10: v_pk_fma_f16 v5, v1, 0x2fe6, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0xe6,0x2f,0x00,0x00]
253
254 v_pk_fma_f16 v5, v1, v2, 0.1234
255 // NOGFX9: error: invalid literal operand
256 // GFX10: v_pk_fma_f16 v5, v1, v2, 0x2fe6 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0xe6,0x2f,0x00,0x00]
257
258 v_pk_mad_i16 v5, 0.1234, v2, v3
259 // NOGFX9: error: invalid literal operand
260 // GFX10: v_pk_mad_i16 v5, 0x2fe6, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0xe6,0x2f,0x00,0x00]
261
262 v_pk_mad_i16 v5, v1, 0.1234, v3
263 // NOGFX9: error: invalid literal operand
264 // GFX10: v_pk_mad_i16 v5, v1, 0x2fe6, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0xe6,0x2f,0x00,0x00]
265
266 v_pk_mad_i16 v5, v1, v2, 0.1234
267 // NOGFX9: error: invalid literal operand
268 // GFX10: v_pk_mad_i16 v5, v1, v2, 0x2fe6 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0xe6,0x2f,0x00,0x00]
269
270 v_pk_add_f16 v5, v1, 123456.0
271 // NOGFX9: error: invalid operand for instruction
272 // NOGFX10: error: invalid operand for instruction
273
274 v_pk_add_u16 v5, v1, 123456.0
275 // NOGFX9: error: invalid operand for instruction
276 // NOGFX10: error: invalid operand for instruction
277
278 //===----------------------------------------------------------------------===//
279 // Packed VOP2
280 //===----------------------------------------------------------------------===//
281
282 // FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid
283 v_pk_fmac_f16 v5, 0x12345678, v2
284 // NOGFX9: error: instruction not supported on this GPU
285 // GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12]
None // RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu=kaveri -show-encoding %s | llvm-readobj -r | FileCheck %s
0 // RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu=kaveri -show-encoding %s | llvm-readobj -relocations | FileCheck %s
11
22 // CHECK: Relocations [
33 // CHECK: .rel.text {
4 // CHECK: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0 0x0
5 // CHECK: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1 0x0
6 // CHECK: R_AMDGPU_GOTPCREL global_var0 0x0
7 // CHECK: R_AMDGPU_GOTPCREL32_LO global_var1 0x0
8 // CHECK: R_AMDGPU_GOTPCREL32_HI global_var2 0x0
9 // CHECK: R_AMDGPU_REL32_LO global_var3 0x0
10 // CHECK: R_AMDGPU_REL32_HI global_var4 0x0
114 // CHECK: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0 0x0
125 // CHECK: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1 0x0
136 // CHECK: R_AMDGPU_GOTPCREL global_var0 0x0
3225 s_mov_b32 s5, global_var3@rel32@lo
3326 s_mov_b32 s6, global_var4@rel32@hi
3427
35 v_mov_b32 v0, SCRATCH_RSRC_DWORD0
36 v_mov_b32 v1, SCRATCH_RSRC_DWORD1
37 v_mov_b32 v2, global_var0@GOTPCREL
38 v_mov_b32 v3, global_var1@gotpcrel32@lo
39 v_mov_b32 v4, global_var2@gotpcrel32@hi
40 v_mov_b32 v5, global_var3@rel32@lo
41 v_mov_b32 v6, global_var4@rel32@hi
42
4328 .globl global_var0
4429 .globl global_var1
4530 .globl global_var2
55 //===----------------------------------------------------------------------===//
66
77 v_mul_i32_i24 v1, v2, 100
8 // CHECK: error: invalid operand for instruction
8 // CHECK: error: invalid literal operand
99
1010 //===----------------------------------------------------------------------===//
1111 // _e32 checks
2828
2929 // Immediate src0
3030 v_mul_i32_i24_e64 v1, 100, v3
31 // CHECK: error: invalid operand for instruction
31 // CHECK: error: invalid literal operand
3232
3333 // Immediate src1
3434 v_mul_i32_i24_e64 v1, v2, 100
35 // CHECK: error: invalid operand for instruction
35 // CHECK: error: invalid literal operand
3636
3737 v_add_i32_e32 v1, s[0:1], v2, v3
3838 // CHECK: error: invalid operand for instruction
66 // GCN: error: too few operands for instruction
77
88 v_div_scale_f32 v24, vcc, v22, 1.1, v22
9 // GCN: error: invalid operand for instruction
9 // GCN: error: invalid literal operand
1010
1111 v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3]
1212 // GFX67: error: instruction not supported on this GPU
0 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding %s | FileCheck -check-prefix=GFX10 %s
1
2 #===----------------------------------------------------------------------===//
3 # Inline constants
4 #===----------------------------------------------------------------------===//
5
6 # GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18]
7 0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18
8
9 # GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18]
10 0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18
11
12 # GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18]
13 0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18
14
15 # GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18]
16 0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18
17
18 # GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18]
19 0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18
20
21 # GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18]
22 0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18
23
24 # GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18]
25 0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18
26
27 # GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18]
28 0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18
29
30 # GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18]
31 0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18
32
33 # GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18]
34 0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18
35
36 # GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18]
37 0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18
38
39 # GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18]
40 0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18
41
42 # GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18]
43 0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18
44
45 # GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18]
46 0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18
47
48 # GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18]
49 0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18
50
51 # GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18]
52 0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18
53
54 # GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18]
55 0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18
56
57 # GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18]
58 0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18
59
60 # GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18]
61 0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18
62
63 # GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18]
64 0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18
65
66 # GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18]
67 0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18
68
69 # GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18]
70 0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18
71
72 # GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18]
73 0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18
74
75 # GFX10: v_pk_fma_f16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x0e,0xcc,0xf2,0xe8,0xd9,0x1b]
76 0x05,0x40,0x0e,0xcc,0xf2,0xe8,0xd9,0x1b
77
78 # GFX10: v_pk_fma_f16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b]
79 0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b
80
81 # GFX10: v_pk_mad_i16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b]
82 0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b
83
84 # GFX10: v_pk_mad_u16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b]
85 0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b
86
87 # GFX10: v_pk_ashrrev_i16 v5, 1, 16 ; encoding: [0x05,0x00,0x06,0xcc,0x81,0x20,0x01,0x18]
88 0x05,0x00,0x06,0xcc,0x81,0x20,0x01,0x18
89
90 #===----------------------------------------------------------------------===//
91 # 32-bit literals
92 #===----------------------------------------------------------------------===//
93
94 # GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
95 0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12
96
97 # GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
98 0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12
99
100 # GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff]
101 0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff
102
103 # GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00]
104 0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00
105
106 # GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
107 0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12
108
109 # GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
110 0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12
111
112 # GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff]
113 0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff
114
115 # GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00]
116 0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00
117
118 # GFX10: v_pk_fma_f16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf]
119 0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf
120
121 # GFX10: v_pk_fma_f16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf]
122 0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf
123
124 # GFX10: v_pk_fma_f16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf]
125 0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf
126
127 # GFX10: v_pk_mad_i16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf]
128 0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf
129
130 # GFX10: v_pk_mad_i16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf]
131 0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf
132
133 # GFX10: v_pk_mad_i16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf]
134 0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf
135
136 # GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
137 0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12
138
139 # GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
140 0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12
141
142 #===----------------------------------------------------------------------===//
143 # Packed VOP2
144 #===----------------------------------------------------------------------===//
145
146 # FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid
147 # GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12]
148 0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12
0 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX10 %s
1
2 # GFX10: v_bfe_u32 v0, 0x3039, v1, s1 ; encoding: [0x00,0x00,0x48,0xd5,0xff,0x02,0x06,0x00,0x39,0x30,0x00,0x00]
3 0x00,0x00,0x48,0xd5,0xff,0x02,0x06,0x00,0x39,0x30,0x00,0x00
4
5 # GFX10: v_bfe_u32 v0, v1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xff,0x05,0x00,0x39,0x30,0x00,0x00]
6 0x00,0x00,0x48,0xd5,0x01,0xff,0x05,0x00,0x39,0x30,0x00,0x00
7
8 # GFX10: v_bfe_u32 v0, v1, s1, 0x3039 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0x03,0xfc,0x03,0x39,0x30,0x00,0x00]
9 0x00,0x00,0x48,0xd5,0x01,0x03,0xfc,0x03,0x39,0x30,0x00,0x00
10
11 # GFX10: v_bfe_u32 v0, 0x3039, v1, v2 ; encoding: [0x00,0x00,0x48,0xd5,0xff,0x02,0x0a,0x04,0x39,0x30,0x00,0x00]
12 0x00,0x00,0x48,0xd5,0xff,0x02,0x0a,0x04,0x39,0x30,0x00,0x00
13
14 # GFX10: v_bfe_u32 v0, s1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00]
15 0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00
16
17 # GFX10: v_bfm_b32_e64 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00]
18 0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00
19
20 # GFX10: v_bfm_b32_e64 v0, 0x3039, v1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00]
21 0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00
22
23 # GFX10: v_pk_add_f16 v1, 0x4e40, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00]
24 0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00
25
26 # GFX10: v_pk_add_f16 v1, 0x1e240, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00]
27 0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00
28
29 # GFX10: v_pk_add_f16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff]
30 0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff
31
32 # GFX10: v_pk_add_u16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff]
33 0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff
34
35 # GFX10: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18]
36 0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18
37
38 # GFX10: v_pk_add_u16 v1, 0x41, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00]
39 0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00
40
41 # GFX10: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18]
42 0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18
43
44 # GFX10: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18]
45 0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18
46
47 # GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff]
48 0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff