llvm.org GIT mirror llvm / 5c3ee71
[AMDGPU] gfx1010 base changes for wave32 Differential Revision: https://reviews.llvm.org/D63293 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363299 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 4 months ago
12 changed file(s) with 211 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
6868 Predicate SubtargetPredicate = TruePredicate;
6969 list AssemblerPredicates = [];
7070 Predicate AssemblerPredicate = TruePredicate;
71 Predicate WaveSizePredicate = TruePredicate;
7172 list OtherPredicates = [];
7273 list Predicates = !listconcat([SubtargetPredicate,
73 AssemblerPredicate],
74 AssemblerPredicate,
75 WaveSizePredicate],
7476 AssemblerPredicates,
7577 OtherPredicates);
7678 }
9292 }
9393
9494 FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
95
96 // Disable mutually exclusive bits.
97 if (FS.find_lower("+wavefrontsize") != StringRef::npos) {
98 if (FS.find_lower("wavefrontsize16") == StringRef::npos)
99 FullFS += "-wavefrontsize16,";
100 if (FS.find_lower("wavefrontsize32") == StringRef::npos)
101 FullFS += "-wavefrontsize32,";
102 if (FS.find_lower("wavefrontsize64") == StringRef::npos)
103 FullFS += "-wavefrontsize64,";
104 }
95105
96106 FullFS += FS;
97107
374374 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
375375 }
376376
377 bool isBoolReg() const;
378
377379 bool isSCSrcF16() const {
378380 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
379381 }
614616 }
615617
616618 void addRegOperands(MCInst &Inst, unsigned N) const;
619
620 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
621 addRegOperands(Inst, N);
622 }
617623
618624 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
619625 if (isRegKind())
880886 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
881887 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
882888 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
889 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
890 /// descriptor field, if valid.
883891 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
884892 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
885893 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
888896 /// \param SGPRBlocks [out] Result SGPR block count.
889897 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
890898 bool FlatScrUsed, bool XNACKUsed,
891 unsigned NextFreeVGPR, SMRange VGPRRange,
892 unsigned NextFreeSGPR, SMRange SGPRRange,
893 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
899 Optional EnableWavefrontSize32, unsigned NextFreeVGPR,
900 SMRange VGPRRange, unsigned NextFreeSGPR,
901 SMRange SGPRRange, unsigned &VGPRBlocks,
902 unsigned &SGPRBlocks);
894903 bool ParseDirectiveAMDGCNTarget();
895904 bool ParseDirectiveAMDHSAKernel();
896905 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
11581167 bool validateMIMGDim(const MCInst &Inst);
11591168 bool validateLdsDirect(const MCInst &Inst);
11601169 bool validateOpSel(const MCInst &Inst);
1170 bool validateVccOperand(unsigned Reg) const;
11611171 bool validateVOP3Literal(const MCInst &Inst) const;
11621172 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
11631173 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
11891199 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
11901200 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
11911201 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1202 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
11921203
11931204 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
11941205 const unsigned MinVal,
14761487
14771488 bool AMDGPUOperand::isSDWAInt32Operand() const {
14781489 return isSDWAOperand(MVT::i32);
1490 }
1491
1492 bool AMDGPUOperand::isBoolReg() const {
1493 return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1494 isSCSrcB64() : isSCSrcB32();
14791495 }
14801496
14811497 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
30293045 return true;
30303046 }
30313047
3048 // Check if VCC register matches wavefront size
3049 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3050 auto FB = getFeatureBits();
3051 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3052 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3053 }
3054
30323055 // VOP3 literal is only allowed in GFX10+ and only one can be used
30333056 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
30343057 unsigned Opcode = Inst.getOpcode();
32663289
32673290 bool AMDGPUAsmParser::calculateGPRBlocks(
32683291 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3269 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
3270 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
3271 unsigned &SGPRBlocks) {
3292 bool XNACKUsed, Optional EnableWavefrontSize32, unsigned NextFreeVGPR,
3293 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3294 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
32723295 // TODO(scott.linder): These calculations are duplicated from
32733296 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
32743297 IsaVersion Version = getIsaVersion(getSTI().getCPU());
32973320 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
32983321 }
32993322
3300 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
3323 VGPRBlocks =
3324 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
33013325 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
33023326
33033327 return false;
33283352 bool ReserveVCC = true;
33293353 bool ReserveFlatScr = true;
33303354 bool ReserveXNACK = hasXNACK();
3355 Optional EnableWavefrontSize32;
33313356
33323357 while (true) {
33333358 while (getLexer().is(AsmToken::EndOfStatement))
35463571 unsigned VGPRBlocks;
35473572 unsigned SGPRBlocks;
35483573 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3549 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3550 SGPRRange, VGPRBlocks, SGPRBlocks))
3574 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3575 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3576 SGPRBlocks))
35513577 return true;
35523578
35533579 if (!isUInt(
53835409 }
53845410
53855411 //===----------------------------------------------------------------------===//
5412 // Boolean holding registers
5413 //===----------------------------------------------------------------------===//
5414
5415 OperandMatchResultTy
5416 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5417 return parseReg(Operands);
5418 }
5419
5420 //===----------------------------------------------------------------------===//
53865421 // mubuf
53875422 //===----------------------------------------------------------------------===//
53885423
62936328 }
62946329 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
62956330 // Add the register arguments
6296 if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6331 if (Op.isReg() && validateVccOperand(Op.getReg())) {
62976332 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
62986333 // Skip it.
62996334 continue;
64366471
64376472 for (unsigned E = Operands.size(); I != E; ++I) {
64386473 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6439 if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6474 if (skipVcc && !skippedVcc && Op.isReg() &&
6475 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
64406476 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
64416477 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
64426478 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
441441
442442 printOperand(MI, OpNo, STI, O);
443443
444 // Print default vcc/vcc_lo operand.
444445 switch (MI->getOpcode()) {
445446 default: break;
446447
588589 raw_ostream &O) {
589590 if (OpNo > 0)
590591 O << ", ";
591 printRegOperand(AMDGPU::VCC, O, MRI);
592 printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
593 AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI);
592594 if (OpNo == 0)
593595 O << ", ";
594596 }
596598 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
597599 const MCSubtargetInfo &STI,
598600 raw_ostream &O) {
601 // Print default vcc/vcc_lo operand of VOPC.
599602 const MCInstrDesc &Desc = MII.get(MI->getOpcode());
600603 if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
601604 (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
679682 O << "/*INV_OP*/";
680683 }
681684
685 // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
682686 switch (MI->getOpcode()) {
683687 default: break;
684688
748752 if (InputModifiers & SISrcMods::SEXT)
749753 O << ')';
750754
755 // Print default vcc/vcc_lo operand of VOP2b.
751756 switch (MI->getOpcode()) {
752757 default: break;
753758
388388 const MCOperand &MO = MI.getOperand(OpNo);
389389
390390 unsigned Reg = MO.getReg();
391 if (Reg != AMDGPU::VCC) {
391 if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
392392 RegEnc |= MRI.getEncodingValue(Reg);
393393 RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
394394 RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
44 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55 //
66 //===----------------------------------------------------------------------===//
7
8 def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
9 AssemblerPredicate <"FeatureWavefrontSize32">;
10 def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
11 AssemblerPredicate <"FeatureWavefrontSize64">;
712
813 def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
914
187187 let CodeSize = base_inst.CodeSize;
188188 }
189189
190 let WaveSizePredicate = isWave64 in {
190191 def S_MOV_B64_term : WrapTerminatorInst;
191192 def S_XOR_B64_term : WrapTerminatorInst;
192193 def S_ANDN2_B64_term : WrapTerminatorInst;
194 }
195
196 let WaveSizePredicate = isWave32 in {
197 def S_MOV_B32_term : WrapTerminatorInst;
198 def S_XOR_B32_term : WrapTerminatorInst;
199 def S_OR_B32_term : WrapTerminatorInst;
200 def S_ANDN2_B32_term : WrapTerminatorInst;
201 }
193202
194203 def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
195204 [(int_amdgcn_wave_barrier)]> {
342351 let Defs = [EXEC];
343352 let usesCustomInserter = 1;
344353 let isAsCheapAsAMove = 1;
354 let WaveSizePredicate = isWave64;
355 }
356
357 def SI_INIT_EXEC_LO : SPseudoInstSI <
358 (outs), (ins i32imm:$src), []> {
359 let Defs = [EXEC_LO];
360 let usesCustomInserter = 1;
361 let isAsCheapAsAMove = 1;
362 let WaveSizePredicate = isWave32;
345363 }
346364
347365 def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
274274 } // End SubtargetPredicate = isGFX9Plus
275275
276276 let SubtargetPredicate = isGFX10Plus in {
277 let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in {
278 def S_AND_SAVEEXEC_B32 : SOP1_32<"s_and_saveexec_b32">;
279 def S_OR_SAVEEXEC_B32 : SOP1_32<"s_or_saveexec_b32">;
280 def S_XOR_SAVEEXEC_B32 : SOP1_32<"s_xor_saveexec_b32">;
281 def S_ANDN2_SAVEEXEC_B32 : SOP1_32<"s_andn2_saveexec_b32">;
282 def S_ORN2_SAVEEXEC_B32 : SOP1_32<"s_orn2_saveexec_b32">;
283 def S_NAND_SAVEEXEC_B32 : SOP1_32<"s_nand_saveexec_b32">;
284 def S_NOR_SAVEEXEC_B32 : SOP1_32<"s_nor_saveexec_b32">;
285 def S_XNOR_SAVEEXEC_B32 : SOP1_32<"s_xnor_saveexec_b32">;
286 def S_ANDN1_SAVEEXEC_B32 : SOP1_32<"s_andn1_saveexec_b32">;
287 def S_ORN1_SAVEEXEC_B32 : SOP1_32<"s_orn1_saveexec_b32">;
288 def S_ANDN1_WREXEC_B32 : SOP1_32<"s_andn1_wrexec_b32">;
289 def S_ANDN2_WREXEC_B32 : SOP1_32<"s_andn2_wrexec_b32">;
290 } // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
291
277292 let Uses = [M0] in {
278293 def S_MOVRELSD_2_B32 : SOP1_32<"s_movrelsd_2_b32">;
279294 } // End Uses = [M0]
780795 "$simm16"> {
781796 let has_sdst = 0;
782797 }
798
799 def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
800 def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">;
783801
784802 def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
785803 def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
12141232 defm S_ANDN1_WREXEC_B64 : SOP1_Real_gfx10<0x039>;
12151233 defm S_ANDN2_WREXEC_B64 : SOP1_Real_gfx10<0x03a>;
12161234 defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx10<0x03b>;
1235 defm S_AND_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03c>;
1236 defm S_OR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03d>;
1237 defm S_XOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03e>;
1238 defm S_ANDN2_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03f>;
1239 defm S_ORN2_SAVEEXEC_B32 : SOP1_Real_gfx10<0x040>;
1240 defm S_NAND_SAVEEXEC_B32 : SOP1_Real_gfx10<0x041>;
1241 defm S_NOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x042>;
1242 defm S_XNOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x043>;
1243 defm S_ANDN1_SAVEEXEC_B32 : SOP1_Real_gfx10<0x044>;
1244 defm S_ORN1_SAVEEXEC_B32 : SOP1_Real_gfx10<0x045>;
1245 defm S_ANDN1_WREXEC_B32 : SOP1_Real_gfx10<0x046>;
1246 defm S_ANDN2_WREXEC_B32 : SOP1_Real_gfx10<0x047>;
12171247 defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>;
12181248
12191249 //===----------------------------------------------------------------------===//
13811411 defm S_WAITCNT_VMCNT : SOPK_Real32_gfx10<0x018>;
13821412 defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx10<0x019>;
13831413 defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx10<0x01a>;
1414 defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx10<0x01b>;
1415 defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx10<0x01c>;
13841416
13851417 //===----------------------------------------------------------------------===//
13861418 // SOPK - GFX6, GFX7.
379379 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
380380 }
381381
382 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
383 return 4;
384 }
385
386 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
387 return getVGPRAllocGranule(STI);
382 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
383 Optional EnableWavefrontSize32) {
384 bool IsWave32 = EnableWavefrontSize32 ?
385 *EnableWavefrontSize32 :
386 STI->getFeatureBits().test(FeatureWavefrontSize32);
387 return IsWave32 ? 8 : 4;
388 }
389
390 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
391 Optional EnableWavefrontSize32) {
392 return getVGPRAllocGranule(STI, EnableWavefrontSize32);
388393 }
389394
390395 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
415420 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
416421 }
417422
418 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
419 NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
423 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
424 Optional EnableWavefrontSize32) {
425 NumVGPRs = alignTo(std::max(1u, NumVGPRs),
426 getVGPREncodingGranule(STI, EnableWavefrontSize32));
420427 // VGPRBlocks is actual number of VGPR blocks minus 1.
421 return NumVGPRs / getVGPREncodingGranule(STI) - 1;
428 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
422429 }
423430
424431 } // end namespace IsaInfo
436443 Header.amd_machine_version_minor = Version.Minor;
437444 Header.amd_machine_version_stepping = Version.Stepping;
438445 Header.kernel_code_entry_byte_offset = sizeof(Header);
439 // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
440446 Header.wavefront_size = 6;
441447
442448 // If the code object does not support indirect functions, then the value must
149149 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
150150
151151 /// \returns VGPR allocation granularity for given subtarget \p STI.
152 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
152 ///
153 /// For subtargets which support it, \p EnableWavefrontSize32 should match
154 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
155 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
156 Optional EnableWavefrontSize32 = None);
153157
154158 /// \returns VGPR encoding granularity for given subtarget \p STI.
155 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
159 ///
160 /// For subtargets which support it, \p EnableWavefrontSize32 should match
161 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
162 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
163 Optional EnableWavefrontSize32 = None);
156164
157165 /// \returns Total number of VGPRs for given subtarget \p STI.
158166 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
170178
171179 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
172180 /// \p NumVGPRs are used.
173 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
181 ///
182 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
183 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
184 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
185 Optional EnableWavefrontSize32 = None);
174186
175187 } // end namespace IsaInfo
176188
198198 }
199199
200200 multiclass VOP2bInstAliases {
201 let WaveSizePredicate = isWave32 in {
202 def : VOP2bInstAlias;
203 }
204 let WaveSizePredicate = isWave64 in {
201205 def : VOP2bInstAlias;
206 }
202207 }
203208
204209 multiclass VOP2eInst
233238 }
234239
235240 multiclass VOP2eInstAliases {
241 let WaveSizePredicate = isWave32 in {
242 def : VOP2eInstAlias;
243 }
244 let WaveSizePredicate = isWave64 in {
236245 def : VOP2eInstAlias;
246 }
237247 }
238248
239249 class VOP_MADAK : VOPProfile <[vt, vt, vt, vt]> {
952962 let DecoderNamespace = "DPP8";
953963 }
954964
965 let WaveSizePredicate = isWave32 in {
966 def _sdwa_w32_gfx10 :
967 Base_VOP_SDWA10_Real(opName#"_sdwa")>,
968 VOP2_SDWA9Ae(opName#"_sdwa").Pfl> {
969 VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa");
970 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
971 let isAsmParserOnly = 1;
972 let DecoderNamespace = "SDWA10";
973 }
974 def _dpp_w32_gfx10 :
975 VOP2_DPP16(opName#"_e32"), asmName> {
976 string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16;
977 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
978 let isAsmParserOnly = 1;
979 }
980 def _dpp8_w32_gfx10 :
981 VOP2_DPP8(opName#"_e32"), asmName> {
982 string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8;
983 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
984 let isAsmParserOnly = 1;
985 }
986 } // End WaveSizePredicate = isWave32
987
988 let WaveSizePredicate = isWave64 in {
955989 def _sdwa_w64_gfx10 :
956990 Base_VOP_SDWA10_Real(opName#"_sdwa")>,
957991 VOP2_SDWA9Ae(opName#"_sdwa").Pfl> {
9721006 let AsmString = asmName # AsmDPP8;
9731007 let isAsmParserOnly = 1;
9741008 }
1009 } // End WaveSizePredicate = isWave64
9751010 }
9761011
9771012 //===----------------------------- VOP3Only -----------------------------===//
164164 multiclass VOPCInstAliases {
165165 def : VOPCInstAlias (OpName#"_e64"),
166166 !cast(OpName#"_e32_"#Arch)>;
167 let WaveSizePredicate = isWave32 in {
168 def : VOPCInstAlias (OpName#"_e64"),
169 !cast(OpName#"_e32_"#Arch),
170 "vcc_lo, "#!cast(OpName#"_e64").Pfl.Asm32>;
171 }
172 let WaveSizePredicate = isWave64 in {
167173 def : VOPCInstAlias (OpName#"_e64"),
168174 !cast(OpName#"_e32_"#Arch),
169175 "vcc, "#!cast(OpName#"_e64").Pfl.Asm32>;
176 }
170177 }
171178
172179 multiclass VOPCXInstAliases {
739746 // We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
740747 // complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
741748 multiclass ICMP_Pattern {
749 let WaveSizePredicate = isWave64 in
742750 def : GCNPat <
743751 (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
744752 (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
753 >;
754
755 let WaveSizePredicate = isWave32 in
756 def : GCNPat <
757 (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
758 (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
745759 >;
746760 }
747761
779793 defm : ICMP_Pattern ;
780794
781795 multiclass FCMP_Pattern {
796 let WaveSizePredicate = isWave64 in
782797 def : GCNPat <
783798 (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
784799 (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
785800 (i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
786801 DSTCLAMP.NONE), SReg_64))
802 >;
803
804 let WaveSizePredicate = isWave32 in
805 def : GCNPat <
806 (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
807 (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
808 (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
809 DSTCLAMP.NONE), SReg_32))
787810 >;
788811 }
789812