llvm.org GIT mirror llvm / 0b37802
[AMDGPU] gfx1010 VMEM and SMEM implementation Differential Revision: https://reviews.llvm.org/D61330 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359621 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 9 months ago
108 changed file(s) with 3146 addition(s) and 1882 deletion(s). Raw diff Collapse all Expand all
14301430 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
14311431
14321432 //===----------------------------------------------------------------------===//
1433 // GFX10 Intrinsics
1434 //===----------------------------------------------------------------------===//
1435
1436 def int_amdgcn_s_get_waveid_in_workgroup :
1437 GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
1438 Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
1439
1440 //===----------------------------------------------------------------------===//
14331441 // Deep learning intrinsics.
14341442 //===----------------------------------------------------------------------===//
14351443
124124 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
125125 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
126126 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
127 SDValue &TFE) const;
127 SDValue &TFE, SDValue &DLC) const;
128128 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
129129 SDValue &SOffset, SDValue &Offset, SDValue &GLC,
130 SDValue &SLC, SDValue &TFE) const;
130 SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
131131 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
132132 SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
133133 SDValue &SLC) const;
140140
141141 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
142142 SDValue &Offset, SDValue &GLC, SDValue &SLC,
143 SDValue &TFE) const;
143 SDValue &TFE, SDValue &DLC) const;
144144 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
145145 SDValue &Offset, SDValue &SLC) const;
146146 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
147147 SDValue &Offset) const;
148148
149 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
149 bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
150150 SDValue &Offset, SDValue &SLC) const;
151 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
151 bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
152152 SDValue &Offset, SDValue &SLC) const;
153153
154154 template
155 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
155 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
156156 SDValue &Offset, SDValue &SLC) const;
157157
158158 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
12201220 SDValue &Offset, SDValue &Offen,
12211221 SDValue &Idxen, SDValue &Addr64,
12221222 SDValue &GLC, SDValue &SLC,
1223 SDValue &TFE) const {
1223 SDValue &TFE, SDValue &DLC) const {
12241224 // Subtarget prefers to use flat instruction
12251225 if (Subtarget->useFlatForGlobal())
12261226 return false;
12321232 if (!SLC.getNode())
12331233 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
12341234 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1235 DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
12351236
12361237 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
12371238 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
13101311 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
13111312 SDValue &VAddr, SDValue &SOffset,
13121313 SDValue &Offset, SDValue &GLC,
1313 SDValue &SLC, SDValue &TFE) const {
1314 SDValue &SLC, SDValue &TFE,
1315 SDValue &DLC) const {
13141316 SDValue Ptr, Offen, Idxen, Addr64;
13151317
13161318 // addr64 bit was removed for volcanic islands.
13181320 return false;
13191321
13201322 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1321 GLC, SLC, TFE))
1323 GLC, SLC, TFE, DLC))
13221324 return false;
13231325
13241326 ConstantSDNode *C = cast(Addr64);
13401342 SDValue &Offset,
13411343 SDValue &SLC) const {
13421344 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1343 SDValue GLC, TFE;
1344
1345 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1345 SDValue GLC, TFE, DLC;
1346
1347 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
13461348 }
13471349
13481350 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
14671469 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
14681470 SDValue &SOffset, SDValue &Offset,
14691471 SDValue &GLC, SDValue &SLC,
1470 SDValue &TFE) const {
1472 SDValue &TFE, SDValue &DLC) const {
14711473 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
14721474 const SIInstrInfo *TII =
14731475 static_cast(Subtarget->getInstrInfo());
14741476
14751477 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1476 GLC, SLC, TFE))
1478 GLC, SLC, TFE, DLC))
14771479 return false;
14781480
14791481 if (!cast(Offen)->getSExtValue() &&
14951497 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
14961498 SDValue &Soffset, SDValue &Offset
14971499 ) const {
1498 SDValue GLC, SLC, TFE;
1499
1500 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1500 SDValue GLC, SLC, TFE, DLC;
1501
1502 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
15011503 }
15021504 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
15031505 SDValue &Soffset, SDValue &Offset,
15041506 SDValue &SLC) const {
1505 SDValue GLC, TFE;
1506
1507 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1507 SDValue GLC, TFE, DLC;
1508
1509 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
15081510 }
15091511
15101512 template
1511 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1513 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1514 SDValue Addr,
15121515 SDValue &VAddr,
15131516 SDValue &Offset,
15141517 SDValue &SLC) const {
1515 int64_t OffsetVal = 0;
1516
1517 if (Subtarget->hasFlatInstOffsets() &&
1518 CurDAG->isBaseWithConstantOffset(Addr)) {
1519 SDValue N0 = Addr.getOperand(0);
1520 SDValue N1 = Addr.getOperand(1);
1521 int64_t COffsetVal = cast(N1)->getSExtValue();
1522
1523 if ((IsSigned && isInt<13>(COffsetVal)) ||
1524 (!IsSigned && isUInt<12>(COffsetVal))) {
1525 Addr = N0;
1526 OffsetVal = COffsetVal;
1527 }
1528 }
1529
1530 VAddr = Addr;
1531 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1532 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1533
1534 return true;
1535 }
1536
1537 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1518 return static_cast(getTargetLowering())->
1519 SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
1520 }
1521
1522 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1523 SDValue Addr,
15381524 SDValue &VAddr,
15391525 SDValue &Offset,
15401526 SDValue &SLC) const {
1541 return SelectFlatOffset(Addr, VAddr, Offset, SLC);
1542 }
1543
1544 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1527 return SelectFlatOffset(N, Addr, VAddr, Offset, SLC);
1528 }
1529
1530 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1531 SDValue Addr,
15451532 SDValue &VAddr,
15461533 SDValue &Offset,
15471534 SDValue &SLC) const {
1548 return SelectFlatOffset(Addr, VAddr, Offset, SLC);
1535 return SelectFlatOffset(N, Addr, VAddr, Offset, SLC);
15491536 }
15501537
15511538 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
28812881
28822882 if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
28832883 return false;
2884
2885 return true;
2886 }
2887
2888 // Find a load or store from corresponding pattern root.
2889 // Roots may be build_vector, bitconvert or their combinations.
2890 static MemSDNode* findMemSDNode(SDNode *N) {
2891 N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
2892 if (MemSDNode *MN = dyn_cast(N))
2893 return MN;
2894 assert(isa(N));
2895 for (SDValue V : N->op_values())
2896 if (MemSDNode *MN =
2897 dyn_cast(AMDGPUTargetLowering::stripBitcast(V)))
2898 return MN;
2899 llvm_unreachable("cannot find MemSDNode in the pattern!");
2900 }
2901
2902 bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned,
2903 SelectionDAG &DAG,
2904 SDNode *N,
2905 SDValue Addr,
2906 SDValue &VAddr,
2907 SDValue &Offset,
2908 SDValue &SLC) const {
2909 const GCNSubtarget &ST =
2910 DAG.getMachineFunction().getSubtarget();
2911 int64_t OffsetVal = 0;
2912
2913 if (ST.hasFlatInstOffsets() &&
2914 (!ST.hasFlatSegmentOffsetBug() ||
2915 findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
2916 DAG.isBaseWithConstantOffset(Addr)) {
2917 SDValue N0 = Addr.getOperand(0);
2918 SDValue N1 = Addr.getOperand(1);
2919 int64_t COffsetVal = cast(N1)->getSExtValue();
2920
2921 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
2922 if ((IsSigned && isInt<12>(COffsetVal)) ||
2923 (!IsSigned && isUInt<11>(COffsetVal))) {
2924 Addr = N0;
2925 OffsetVal = COffsetVal;
2926 }
2927 } else {
2928 if ((IsSigned && isInt<13>(COffsetVal)) ||
2929 (!IsSigned && isUInt<12>(COffsetVal))) {
2930 Addr = N0;
2931 OffsetVal = COffsetVal;
2932 }
2933 }
2934 }
2935
2936 VAddr = Addr;
2937 Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
2938 SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1);
28842939
28852940 return true;
28862941 }
322322 }
323323
324324 AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
325
326 bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N,
327 SDValue Addr, SDValue &VAddr, SDValue &Offset,
328 SDValue &SLC) const;
325329 };
326330
327331 namespace AMDGPUISD {
355355 .add(I.getOperand(0))
356356 .addImm(0) // offset
357357 .addImm(0) // glc
358 .addImm(0); // slc
358 .addImm(0) // slc
359 .addImm(0); // dlc
359360
360361
361362 // Now that we selected an opcode, we need to constrain the register
531532 .addReg(PtrReg)
532533 .addImm(0) // offset
533534 .addImm(0) // glc
534 .addImm(0); // slc
535 .addImm(0) // slc
536 .addImm(0); // dlc
535537
536538 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
537539 I.eraseFromParent();
138138 ImmTyInstOffset,
139139 ImmTyOffset0,
140140 ImmTyOffset1,
141 ImmTyDLC,
141142 ImmTyGLC,
142143 ImmTySLC,
143144 ImmTyTFE,
313314 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
314315 bool isGDS() const { return isImmTy(ImmTyGDS); }
315316 bool isLDS() const { return isImmTy(ImmTyLDS); }
317 bool isDLC() const { return isImmTy(ImmTyDLC); }
316318 bool isGLC() const { return isImmTy(ImmTyGLC); }
317319 bool isSLC() const { return isImmTy(ImmTySLC); }
318320 bool isTFE() const { return isImmTy(ImmTyTFE); }
675677 case ImmTyInstOffset: OS << "InstOffset"; break;
676678 case ImmTyOffset0: OS << "Offset0"; break;
677679 case ImmTyOffset1: OS << "Offset1"; break;
680 case ImmTyDLC: OS << "DLC"; break;
678681 case ImmTyGLC: OS << "GLC"; break;
679682 case ImmTySLC: OS << "SLC"; break;
680683 case ImmTyTFE: OS << "TFE"; break;
11831186 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
11841187 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
11851188
1189 AMDGPUOperand::Ptr defaultDLC() const;
11861190 AMDGPUOperand::Ptr defaultGLC() const;
11871191 AMDGPUOperand::Ptr defaultSLC() const;
11881192
23022306 }
23032307 }
23042308
2305 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2309 if (TSFlags & SIInstrFlags::FLAT) {
23062310 // FIXME: Produces error without correct column reported.
2307 auto OpNum =
2308 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2311 auto Opcode = Inst.getOpcode();
2312 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2313
23092314 const auto &Op = Inst.getOperand(OpNum);
2310 if (Op.getImm() != 0)
2315 if (!hasFlatOffsets() && Op.getImm() != 0)
23112316 return Match_InvalidOperand;
2317
2318 // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2319 // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2320 if (isGFX10()) {
2321 if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2322 if (!isInt<12>(Op.getImm()))
2323 return Match_InvalidOperand;
2324 } else {
2325 if (!isUInt<11>(Op.getImm()))
2326 return Match_InvalidOperand;
2327 }
2328 }
23122329 }
23132330
23142331 return Match_Success;
38863903 }
38873904 }
38883905
3906 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
3907 return MatchOperand_ParseFail;
3908
38893909 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
38903910 return MatchOperand_Success;
38913911 }
51005120 // mubuf
51015121 //===----------------------------------------------------------------------===//
51025122
5123 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5124 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5125 }
5126
51035127 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
51045128 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
51055129 }
51765200 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
51775201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
51785202 }
5203
5204 if (isGFX10())
5205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
51795206 }
51805207
51815208 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
52135240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
52145241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
52155242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5243
5244 if (isGFX10())
5245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
52165246 }
52175247
52185248 //===----------------------------------------------------------------------===//
52485278 }
52495279 }
52505280
5281 bool IsGFX10 = isGFX10();
5282
52515283 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
52525284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5285 if (IsGFX10)
5286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
52535287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
52545288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
52555289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
53525386 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
53535387 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
53545388 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5389 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
53555390 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
53565391 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
53575392 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
55805615 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
55815616 }
55825617
5583 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5618 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
55845619 // it has src2 register operand that is tied to dst operand
55855620 // we don't allow modifiers for this operand in assembler so src2_modifiers
55865621 // should be 0.
60306065 break;
60316066
60326067 case SIInstrFlags::VOPC:
6033 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6068 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
60346070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
60356071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
60366072 break;
66 //===----------------------------------------------------------------------===//
77
88 def MUBUFAddr32 : ComplexPattern;
9 def MUBUFAddr64 : ComplexPattern7, "SelectMUBUFAddr64">;
9 def MUBUFAddr64 : ComplexPattern8, "SelectMUBUFAddr64">;
1010 def MUBUFAddr64Atomic : ComplexPattern;
1111
1212 def MUBUFScratchOffen : ComplexPattern;
1313 def MUBUFScratchOffset : ComplexPattern;
1414
15 def MUBUFOffset : ComplexPattern6, "SelectMUBUFOffset">;
15 def MUBUFOffset : ComplexPattern7, "SelectMUBUFOffset">;
1616 def MUBUFOffsetNoGLC : ComplexPattern;
1717 def MUBUFOffsetAtomic : ComplexPattern;
1818
9595 bits<1> has_vdata = 1;
9696 bits<1> has_vaddr = 1;
9797 bits<1> has_glc = 1;
98 bits<1> has_dlc = 1;
9899 bits<1> glc_value = 0; // the value for glc if no such operand
100 bits<1> dlc_value = 0; // the value for dlc if no such operand
99101 bits<1> has_srsrc = 1;
100102 bits<1> has_soffset = 1;
101103 bits<1> has_offset = 1;
118120
119121 bits<12> offset;
120122 bits<1> glc;
123 bits<1> dlc;
121124 bits<7> format;
122125 bits<8> vaddr;
123126 bits<8> vdata;
136139 RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
137140 dag InsNoData = !if(!empty(vaddrList),
138141 (ins SReg_128:$srsrc, SCSrc_b32:$soffset,
139 offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
142 offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc),
140143 (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
141 offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
144 offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc)
142145 );
143146 dag InsData = !if(!empty(vaddrList),
144147 (ins vdataClass:$vdata, SReg_128:$srsrc,
145148 SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
146 SLC:$slc, TFE:$tfe),
149 SLC:$slc, TFE:$tfe, DLC:$dlc),
147150 (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
148151 SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
149 SLC:$slc, TFE:$tfe)
152 SLC:$slc, TFE:$tfe, DLC:$dlc)
150153 );
151154 dag ret = !if(!empty(vdataList), InsNoData, InsData);
152155 }
197200 : MTBUF_Pseudo
198201 (outs vdataClass:$vdata),
199202 getMTBUFIns.ret,
200 " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe",
203 " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc",
201204 pattern>,
202205 MTBUF_SetupAddr {
203206 let PseudoInstr = opName # "_" # getAddrName.ret;
212215 def _OFFSET : MTBUF_Load_Pseudo
213216 [(set load_vt:$vdata,
214217 (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
215 i1:$glc, i1:$slc, i1:$tfe)))]>,
218 i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
216219 MTBUFAddr64Table<0, NAME>;
217220
218221 def _ADDR64 : MTBUF_Load_Pseudo
219222 [(set load_vt:$vdata,
220223 (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
221 i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
224 i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
222225 MTBUFAddr64Table<1, NAME>;
223226
224227 def _OFFEN : MTBUF_Load_Pseudo ;
243246 : MTBUF_Pseudo
244247 (outs),
245248 getMTBUFIns.ret,
246 " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe",
249 " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc",
247250 pattern>,
248251 MTBUF_SetupAddr {
249252 let PseudoInstr = opName # "_" # getAddrName.ret;
258261 def _OFFSET : MTBUF_Store_Pseudo
259262 [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
260263 i16:$offset, i8:$format, i1:$glc,
261 i1:$slc, i1:$tfe))]>,
264 i1:$slc, i1:$tfe, i1:$dlc))]>,
262265 MTBUFAddr64Table<0, NAME>;
263266
264267 def _ADDR64 : MTBUF_Store_Pseudo
265268 [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
266269 i16:$offset, i8:$format, i1:$glc,
267 i1:$slc, i1:$tfe))]>,
270 i1:$slc, i1:$tfe, i1:$dlc))]>,
268271 MTBUFAddr64Table<1, NAME>;
269272
270273 def _OFFEN : MTBUF_Store_Pseudo ;
322325 bits<1> has_vdata = 1;
323326 bits<1> has_vaddr = 1;
324327 bits<1> has_glc = 1;
328 bits<1> has_dlc = 1;
325329 bits<1> glc_value = 0; // the value for glc if no such operand
330 bits<1> dlc_value = 0; // the value for dlc if no such operand
326331 bits<1> has_srsrc = 1;
327332 bits<1> has_soffset = 1;
328333 bits<1> has_offset = 1;
331336 bits<4> dwords = 0;
332337 }
333338
334 class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
339 class MUBUF_Real <MUBUF_Pseudo ps> :
335340 InstSI {
336341
337342 let isPseudo = 0;
346351
347352 bits<12> offset;
348353 bits<1> glc;
354 bits<1> dlc;
349355 bits<8> vaddr;
350356 bits<8> vdata;
351357 bits<7> srsrc;
356362
357363
358364 // For cache invalidation instructions.
359 class MUBUF_Invalidate > :
365 class MUBUF_Invalidate = null_frag> :
360366 MUBUF_Pseudo {
361367
362368 let AsmMatchConverter = "";
371377 let has_vdata = 0;
372378 let has_vaddr = 0;
373379 let has_glc = 0;
380 let has_dlc = 0;
374381 let glc_value = 0;
382 let dlc_value = 0;
375383 let has_srsrc = 0;
376384 let has_soffset = 0;
377385 let has_offset = 0;
398406 );
399407 dag ret = !con(
400408 !if(!empty(vdataList), InsNoData, InsData),
401 !if(isLds, (ins), (ins TFE:$tfe))
409 !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc))
402410 );
403411 }
404412
458466 !con(getMUBUFIns.ret,
459467 !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))),
460468 " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc" #
461 !if(isLds, " lds", "$tfe"),
469 !if(isLds, " lds", "$tfe") # "$dlc",
462470 pattern>,
463471 MUBUF_SetupAddr {
464472 let PseudoInstr = opName # !if(isLds, "_lds", "") #
488496 !if(isLds,
489497 [],
490498 [(set load_vt:$vdata,
491 (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
499 (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>,
492500 MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
493501
494502 def _ADDR64 : MUBUF_Load_Pseudo
496504 !if(isLds,
497505 [],
498506 [(set load_vt:$vdata,
499 (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
507 (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>,
500508 MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
501509
502510 def _OFFEN : MUBUF_Load_Pseudo ;
529537 : MUBUF_Pseudo
530538 (outs),
531539 getMUBUFIns.ret,
532 " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe",
540 " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe$dlc",
533541 pattern>,
534542 MUBUF_SetupAddr {
535543 let PseudoInstr = opName # "_" # getAddrName.ret;
545553
546554 def _OFFSET : MUBUF_Store_Pseudo
547555 [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
548 i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
556 i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
549557 MUBUFAddr64Table<0, NAME>;
550558
551559 def _ADDR64 : MUBUF_Store_Pseudo
552560 [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
553 i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
561 i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
554562 MUBUFAddr64Table<1, NAME>;
555563
556564 def _OFFEN : MUBUF_Store_Pseudo ;
636644 let hasSideEffects = 1;
637645 let DisableWQM = 1;
638646 let has_glc = 0;
647 let has_dlc = 0;
639648 let has_tfe = 0;
640649 let maybeAtomic = 1;
641650 }
654663 AtomicNoRet.ret, 0> {
655664 let PseudoInstr = opName # "_" # getAddrName.ret;
656665 let glc_value = 0;
666 let dlc_value = 0;
657667 let AsmMatchConverter = "cvtMubufAtomic";
658668 }
659669
671681 AtomicNoRet.ret, 1> {
672682 let PseudoInstr = opName # "_rtn_" # getAddrName.ret;
673683 let glc_value = 1;
684 let dlc_value = 0;
674685 let Constraints = "$vdata = $vdata_in";
675686 let DisableEncoding = "$vdata_in";
676687 let AsmMatchConverter = "cvtMubufAtomicReturn";
10501061
10511062 } // End let SubtargetPredicate = isGFX7Plus
10521063
1064 let SubtargetPredicate = isGFX10Plus in {
1065 def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
1066 def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
1067 } // End SubtargetPredicate = isGFX10Plus
1068
10531069 //===----------------------------------------------------------------------===//
10541070 // MUBUF Patterns
10551071 //===----------------------------------------------------------------------===//
10601076
10611077 def extract_slc : SDNodeXForm
10621078 return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
1079 }]>;
1080
1081 def extract_dlc : SDNodeXForm
1082 return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
10631083 }]>;
10641084
10651085 //===----------------------------------------------------------------------===//
10721092 (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
10731093 imm:$cachepolicy, 0)),
10741094 (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
1075 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1095 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
10761096 >;
10771097
10781098 def : GCNPat<
10791099 (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
10801100 imm:$cachepolicy, 0)),
10811101 (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
1082 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1102 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
10831103 >;
10841104
10851105 def : GCNPat<
10861106 (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
10871107 imm:$cachepolicy, imm)),
10881108 (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
1089 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1109 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
10901110 >;
10911111
10921112 def : GCNPat<
10951115 (!cast(opcode # _BOTHEN)
10961116 (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
10971117 $rsrc, $soffset, (as_i16imm $offset),
1098 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1118 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
10991119 >;
11001120 }
11011121
11431163 (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
11441164 imm:$cachepolicy, 0),
11451165 (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
1146 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1166 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
11471167 >;
11481168
11491169 def : GCNPat<
11501170 (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
11511171 imm:$cachepolicy, 0),
11521172 (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
1153 (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1173 (as_i16imm $offset), (extract_glc $cachepolicy),
1174 (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
11541175 >;
11551176
11561177 def : GCNPat<
11571178 (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
11581179 imm:$cachepolicy, imm),
11591180 (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
1160 (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1181 (as_i16imm $offset), (extract_glc $cachepolicy),
1182 (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
11611183 >;
11621184
11631185 def : GCNPat<
11661188 (!cast(opcode # _BOTHEN_exact)
11671189 $vdata,
11681190 (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
1169 $rsrc, $soffset, (as_i16imm $offset),
1170 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1191 $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy),
1192 (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
11711193 >;
11721194 }
11731195
13211343 class MUBUFLoad_PatternADDR64
13221344 PatFrag constant_ld> : GCNPat <
13231345 (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
1324 i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
1325 (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
1346 i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
1347 (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
13261348 >;
13271349
13281350 multiclass MUBUFLoad_Atomic_Pattern
13301352 def : GCNPat <
13311353 (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
13321354 i16:$offset, i1:$slc))),
1333 (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
1355 (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
13341356 >;
13351357
13361358 def : GCNPat <
13371359 (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
1338 (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
1360 (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
13391361 >;
13401362 }
13411363
13541376
13551377 def : GCNPat <
13561378 (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
1357 i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
1358 (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe)
1379 i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
1380 (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
13591381 >;
13601382 }
13611383
13761398 def : GCNPat <
13771399 (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
13781400 i32:$soffset, u16imm:$offset))),
1379 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
1401 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
13801402 >;
13811403
13821404 def : GCNPat <
13831405 (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
1384 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
1406 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0)
13851407 >;
13861408 }
13871409
13911413 ValueType vt, PatFrag ld_frag> {
13921414 def : GCNPat <
13931415 (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
1394 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in)
1416 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
13951417 >;
13961418
13971419 def : GCNPat <
13981420 (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
1399 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in)
1421 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
14001422 >;
14011423 }
14021424
14341456 def : GCNPat <
14351457 (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
14361458 i16:$offset, i1:$slc), vt:$val),
1437 (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
1459 (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
14381460 >;
14391461
14401462 def : GCNPat <
14411463 (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
1442 (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
1464 (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
14431465 >;
14441466 }
14451467 let SubtargetPredicate = isGFX6GFX7 in {
14531475
14541476 def : GCNPat <
14551477 (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
1456 i16:$offset, i1:$glc, i1:$slc, i1:$tfe)),
1457 (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
1478 i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)),
1479 (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
14581480 >;
14591481 }
14601482
14671489 def : GCNPat <
14681490 (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
14691491 i32:$soffset, u16imm:$offset)),
1470 (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
1492 (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
14711493 >;
14721494
14731495 def : GCNPat <
14741496 (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
14751497 u16imm:$offset)),
1476 (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0)
1498 (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
14771499 >;
14781500 }
14791501
15111533 imm:$format, imm:$cachepolicy, 0)),
15121534 (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
15131535 (as_i8imm $format),
1514 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1536 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
15151537 >;
15161538
15171539 def : GCNPat<
15191541 imm:$format, imm:$cachepolicy, imm)),
15201542 (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
15211543 (as_i8imm $format),
1522 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1544 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
15231545 >;
15241546
15251547 def : GCNPat<
15271549 imm:$format, imm:$cachepolicy, 0)),
15281550 (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
15291551 (as_i8imm $format),
1530 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1552 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
15311553 >;
15321554
15331555 def : GCNPat<
15371559 (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
15381560 $rsrc, $soffset, (as_i16imm $offset),
15391561 (as_i8imm $format),
1540 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1562 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
15411563 >;
15421564 }
15431565
15691591 imm:$format, imm:$cachepolicy, 0),
15701592 (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
15711593 (as_i16imm $offset), (as_i8imm $format),
1572 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1594 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
15731595 >;
15741596
15751597 def : GCNPat<
15771599 imm:$format, imm:$cachepolicy, imm),
15781600 (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
15791601 (as_i16imm $offset), (as_i8imm $format),
1580 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1602 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
15811603 >;
15821604
15831605 def : GCNPat<
15851607 imm:$format, imm:$cachepolicy, 0),
15861608 (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
15871609 (as_i16imm $offset), (as_i8imm $format),
1588 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1610 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
15891611 >;
15901612
15911613 def : GCNPat<
15951617 $vdata,
15961618 (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
15971619 $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
1598 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
1620 (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
15991621 >;
16001622 }
16011623
16251647 //===----------------------------------------------------------------------===//
16261648
16271649 //===----------------------------------------------------------------------===//
1628 // Base ENC_MUBUF for GFX6, GFX7.
1629 //===----------------------------------------------------------------------===//
1630
1631 class MUBUF_Real_si op, MUBUF_Pseudo ps> :
1632 MUBUF_Real,
1633 Enc64,
1634 SIMCInstr {
1635 let AssemblerPredicate=isGFX6GFX7;
1636 let DecoderNamespace="GFX6GFX7";
1637
1650 // Base ENC_MUBUF for GFX6, GFX7, GFX10.
1651 //===----------------------------------------------------------------------===//
1652
1653 class Base_MUBUF_Real_gfx6_gfx7_gfx10 op, MUBUF_Pseudo ps, int ef> :
1654 MUBUF_Real, Enc64, SIMCInstr {
16381655 let Inst{11-0} = !if(ps.has_offset, offset, ?);
16391656 let Inst{12} = ps.offen;
16401657 let Inst{13} = ps.idxen;
16411658 let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
1642 let Inst{15} = ps.addr64;
16431659 let Inst{16} = !if(ps.lds, 1, 0);
16441660 let Inst{24-18} = op;
1645 let Inst{31-26} = 0x38; //encoding
1661 let Inst{31-26} = 0x38;
16461662 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
16471663 let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
16481664 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
16511667 let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
16521668 }
16531669
1654 multiclass MUBUF_Real_AllAddr_si op> {
1655 def _OFFSET_si : MUBUF_Real_si (NAME#"_OFFSET")>;
1656 def _ADDR64_si : MUBUF_Real_si (NAME#"_ADDR64")>;
1657 def _OFFEN_si : MUBUF_Real_si (NAME#"_OFFEN")>;
1658 def _IDXEN_si : MUBUF_Real_si (NAME#"_IDXEN")>;
1659 def _BOTHEN_si : MUBUF_Real_si (NAME#"_BOTHEN")>;
1660 }
1661
1662 multiclass MUBUF_Real_AllAddr_Lds_si op> {
1663
1664 def _OFFSET_si : MUBUF_Real_si (NAME#"_OFFSET")>,
1665 MUBUFLdsTable<0, NAME # "_OFFSET_si">;
1666 def _ADDR64_si : MUBUF_Real_si (NAME#"_ADDR64")>,
1667 MUBUFLdsTable<0, NAME # "_ADDR64_si">;
1668 def _OFFEN_si : MUBUF_Real_si (NAME#"_OFFEN")>,
1669 MUBUFLdsTable<0, NAME # "_OFFEN_si">;
1670 def _IDXEN_si : MUBUF_Real_si (NAME#"_IDXEN")>,
1671 MUBUFLdsTable<0, NAME # "_IDXEN_si">;
1672 def _BOTHEN_si : MUBUF_Real_si (NAME#"_BOTHEN")>,
1673 MUBUFLdsTable<0, NAME # "_BOTHEN_si">;
1674
1675 def _LDS_OFFSET_si : MUBUF_Real_si (NAME#"_LDS_OFFSET")>,
1676 MUBUFLdsTable<1, NAME # "_OFFSET_si">;
1677 def _LDS_ADDR64_si : MUBUF_Real_si (NAME#"_LDS_ADDR64")>,
1678 MUBUFLdsTable<1, NAME # "_ADDR64_si">;
1679 def _LDS_OFFEN_si : MUBUF_Real_si (NAME#"_LDS_OFFEN")>,
1680 MUBUFLdsTable<1, NAME # "_OFFEN_si">;
1681 def _LDS_IDXEN_si : MUBUF_Real_si (NAME#"_LDS_IDXEN")>,
1682 MUBUFLdsTable<1, NAME # "_IDXEN_si">;
1683 def _LDS_BOTHEN_si : MUBUF_Real_si (NAME#"_LDS_BOTHEN")>,
1684 MUBUFLdsTable<1, NAME # "_BOTHEN_si">;
1685 }
1686
1687 multiclass MUBUF_Real_Atomic_si op> : MUBUF_Real_AllAddr_si {
1688 def _OFFSET_RTN_si : MUBUF_Real_si (NAME#"_OFFSET_RTN")>;
1689 def _ADDR64_RTN_si : MUBUF_Real_si (NAME#"_ADDR64_RTN")>;
1690 def _OFFEN_RTN_si : MUBUF_Real_si (NAME#"_OFFEN_RTN")>;
1691 def _IDXEN_RTN_si : MUBUF_Real_si (NAME#"_IDXEN_RTN")>;
1692 def _BOTHEN_RTN_si : MUBUF_Real_si (NAME#"_BOTHEN_RTN")>;
1693 }
1694
1695 defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_si <0x00>;
1696 defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_si <0x01>;
1697 defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x02>;
1698 defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x03>;
1699 defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_si <0x04>;
1700 defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_si <0x05>;
1701 defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x06>;
1702 defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x07>;
1703 defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_si <0x08>;
1704 defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_si <0x09>;
1705 defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_si <0x0a>;
1706 defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_si <0x0b>;
1707 defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_si <0x0c>;
1708 defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_si <0x0d>;
1709 defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_si <0x0e>;
1710 defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_si <0x0f>;
1711 defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_si <0x18>;
1712 defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_si <0x1a>;
1713 defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_si <0x1c>;
1714 defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_si <0x1d>;
1715 defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_si <0x1e>;
1716 defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_si <0x1f>;
1717
1718 defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_si <0x30>;
1719 defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_si <0x31>;
1720 defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_si <0x32>;
1721 defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_si <0x33>;
1722 //defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomic_si <0x34>; // isn't on CI & VI
1723 defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_si <0x35>;
1724 defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_si <0x36>;
1725 defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_si <0x37>;
1726 defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_si <0x38>;
1727 defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_si <0x39>;
1728 defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_si <0x3a>;
1729 defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_si <0x3b>;
1730 defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_si <0x3c>;
1731 defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_si <0x3d>;
1732
1733 //defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_si <0x3e>; // isn't on VI
1734 //defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomic_si <0x3f>; // isn't on VI
1735 //defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomic_si <0x40>; // isn't on VI
1736 defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_si <0x50>;
1737 defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_si <0x51>;
1738 defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_si <0x52>;
1739 defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_si <0x53>;
1740 //defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomic_si <0x54>; // isn't on CI & VI
1741 defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_si <0x55>;
1742 defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_si <0x56>;
1743 defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_si <0x57>;
1744 defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_si <0x58>;
1745 defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_si <0x59>;
1746 defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_si <0x5a>;
1747 defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_si <0x5b>;
1748 defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_si <0x5c>;
1749 defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_si <0x5d>;
1750 // FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI.
1751 //defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomic_si <0x5e">; // isn't on VI
1752 //defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomic_si <0x5f>; // isn't on VI
1753 //defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomic_si <0x60>; // isn't on VI
1754
1755 def BUFFER_WBINVL1_SC_si : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>;
1756 def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
1757
1758 class MTBUF_Real_si op, MTBUF_Pseudo ps> :
1759 MTBUF_Real,
1760 Enc64,
1761 SIMCInstr {
1762 let AssemblerPredicate=isGFX6GFX7;
1763 let DecoderNamespace="GFX6GFX7";
1764
1670 class MUBUF_Real_gfx10 op, MUBUF_Pseudo ps> :
1671 Base_MUBUF_Real_gfx6_gfx7_gfx10 {
1672 let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
1673 let Inst{25} = op{7};
1674 }
1675
1676 class MUBUF_Real_gfx6_gfx7 op, MUBUF_Pseudo ps> :
1677 Base_MUBUF_Real_gfx6_gfx7_gfx10 {
1678 let Inst{15} = ps.addr64;
1679 }
1680
1681 //===----------------------------------------------------------------------===//
1682 // MUBUF - GFX10.
1683 //===----------------------------------------------------------------------===//
1684
1685 let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
1686 multiclass MUBUF_Real_gfx10_with_name op, string opName,
1687 string asmName> {
1688 def _gfx10 : MUBUF_Real_gfx10(opName)> {
1689 MUBUF_Pseudo ps = !cast(opName);
1690 let AsmString = asmName # ps.AsmOperands;
1691 }
1692 }
1693 multiclass MUBUF_Real_AllAddr_gfx10 op> {
1694 def _BOTHEN_gfx10 :
1695 MUBUF_Real_gfx10(NAME#"_BOTHEN")>;
1696 def _IDXEN_gfx10 :
1697 MUBUF_Real_gfx10(NAME#"_IDXEN")>;
1698 def _OFFEN_gfx10 :
1699 MUBUF_Real_gfx10(NAME#"_OFFEN")>;
1700 def _OFFSET_gfx10 :
1701 MUBUF_Real_gfx10(NAME#"_OFFSET")>;
1702 }
1703 multiclass MUBUF_Real_AllAddr_Lds_gfx10 op> {
1704 def _OFFSET_gfx10 : MUBUF_Real_gfx10(NAME#"_OFFSET")>,
1705 MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">;
1706 def _OFFEN_gfx10 : MUBUF_Real_gfx10(NAME#"_OFFEN")>,
1707 MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">;
1708 def _IDXEN_gfx10 : MUBUF_Real_gfx10(NAME#"_IDXEN")>,
1709 MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">;
1710 def _BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_BOTHEN")>,
1711 MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">;
1712
1713 def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_OFFSET")>,
1714 MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">;
1715 def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_OFFEN")>,
1716 MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">;
1717 def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_IDXEN")>,
1718 MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">;
1719 def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_BOTHEN")>,
1720 MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">;
1721 }
1722 multiclass MUBUF_Real_Atomics_gfx10 op> :
1723 MUBUF_Real_AllAddr_gfx10 {
1724 def _BOTHEN_RTN_gfx10 :
1725 MUBUF_Real_gfx10(NAME#"_BOTHEN_RTN")>;
1726 def _IDXEN_RTN_gfx10 :
1727 MUBUF_Real_gfx10(NAME#"_IDXEN_RTN")>;
1728 def _OFFEN_RTN_gfx10 :
1729 MUBUF_Real_gfx10(NAME#"_OFFEN_RTN")>;
1730 def _OFFSET_RTN_gfx10 :
1731 MUBUF_Real_gfx10(NAME#"_OFFSET_RTN")>;
1732 }
1733 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
1734
1735 defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
1736 defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>;
1737 defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x020>;
1738 defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x021>;
1739 defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x022>;
1740 defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x023>;
1741 defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx10<0x024>;
1742 defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x025>;
1743 // FIXME-GFX10: Add following instructions:
1744 //defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>;
1745 //defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
1746 defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x080>;
1747 defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x081>;
1748 defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x082>;
1749 defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x083>;
1750 defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x084>;
1751 defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x085>;
1752 defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x086>;
1753 defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>;
1754
1755 def BUFFER_GL0_INV_gfx10 :
1756 MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>;
1757 def BUFFER_GL1_INV_gfx10 :
1758 MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>;
1759
1760 //===----------------------------------------------------------------------===//
1761 // MUBUF - GFX6, GFX7, GFX10.
1762 //===----------------------------------------------------------------------===//
1763
1764 let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in {
1765 multiclass MUBUF_Real_gfx6 op> {
1766 def _gfx6 : MUBUF_Real_gfx6_gfx7(NAME)>;
1767 }
1768 } // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6"
1769
1770 let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1771 multiclass MUBUF_Real_gfx7 op> {
1772 def _gfx7 : MUBUF_Real_gfx6_gfx7(NAME)>;
1773 }
1774 } // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
1775
1776 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1777 multiclass MUBUF_Real_AllAddr_gfx6_gfx7 op> {
1778 def _ADDR64_gfx6_gfx7 :
1779 MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>;
1780 def _BOTHEN_gfx6_gfx7 :
1781 MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>;
1782 def _IDXEN_gfx6_gfx7 :
1783 MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>;
1784 def _OFFEN_gfx6_gfx7 :
1785 MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>;
1786 def _OFFSET_gfx6_gfx7 :
1787 MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>;
1788 }
1789 multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7 op> {
1790 def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>,
1791 MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">;
1792 def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>,
1793 MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">;
1794 def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>,
1795 MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">;
1796 def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>,
1797 MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">;
1798 def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>,
1799 MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">;
1800
1801 def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_OFFSET")>,
1802 MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">;
1803 def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_ADDR64")>,
1804 MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">;
1805 def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_OFFEN")>,
1806 MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">;
1807 def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_IDXEN")>,
1808 MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">;
1809 def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_BOTHEN")>,
1810 MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
1811 }
1812 multiclass MUBUF_Real_Atomics_gfx6_gfx7 op> :
1813 MUBUF_Real_AllAddr_gfx6_gfx7 {
1814 def _ADDR64_RTN_gfx6_gfx7 :
1815 MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64_RTN")>;
1816 def _BOTHEN_RTN_gfx6_gfx7 :
1817 MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN_RTN")>;
1818 def _IDXEN_RTN_gfx6_gfx7 :
1819 MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN_RTN")>;
1820 def _OFFEN_RTN_gfx6_gfx7 :
1821 MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN_RTN")>;
1822 def _OFFSET_RTN_gfx6_gfx7 :
1823 MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET_RTN")>;
1824 }
1825 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1826
1827 multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10 op> :
1828 MUBUF_Real_AllAddr_gfx6_gfx7, MUBUF_Real_AllAddr_gfx10;
1829
1830 multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10 op> :
1831 MUBUF_Real_AllAddr_Lds_gfx6_gfx7, MUBUF_Real_AllAddr_Lds_gfx10;
1832
1833 multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10 op> :
1834 MUBUF_Real_Atomics_gfx6_gfx7, MUBUF_Real_Atomics_gfx10;
1835
1836 // FIXME-GFX6: Following instructions are available only on GFX6.
1837 //defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>;
1838 //defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomics_gfx6 <0x054>;
1839
1840 defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>;
1841 defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
1842 defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
1843 defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
1844 defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
1845 defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
1846 defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
1847 defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
1848 defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>;
1849 defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>;
1850 defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>;
1851 defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>;
1852 defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>;
1853 defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>;
1854 defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>;
1855 defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>;
1856 defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>;
1857 defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>;
1858 defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>;
1859 defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>;
1860 defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>;
1861 defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>;
1862
1863 defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>;
1864 defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>;
1865 defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>;
1866 defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>;
1867 defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>;
1868 defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>;
1869 defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>;
1870 defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>;
1871 defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>;
1872 defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
1873 defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
1874 defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
1875 defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
1876 // FIXME-GFX6-GFX7-GFX10: Add following instructions:
1877 //defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
1878 //defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
1879 //defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
1880 defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
1881 defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
1882 defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
1883 defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>;
1884 defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>;
1885 defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>;
1886 defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>;
1887 defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>;
1888 defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>;
1889 defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>;
1890 defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
1891 defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
1892 defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
1893 // FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
1894 // FIXME-GFX6-GFX7-GFX10: Add following instructions:
1895 //defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
1896 //defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
1897 //defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
1898
1899 defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
1900 defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
1901 def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>;
1902
1903 //===----------------------------------------------------------------------===//
1904 // Base ENC_MTBUF for GFX6, GFX7, GFX10.
1905 //===----------------------------------------------------------------------===//
1906
1907 class Base_MTBUF_Real_gfx6_gfx7_gfx10 op, MTBUF_Pseudo ps, int ef> :
1908 MTBUF_Real, Enc64, SIMCInstr {
17651909 let Inst{11-0} = !if(ps.has_offset, offset, ?);
17661910 let Inst{12} = ps.offen;
17671911 let Inst{13} = ps.idxen;
17681912 let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
1769 let Inst{15} = ps.addr64;
17701913 let Inst{18-16} = op;
1771 let Inst{22-19} = dfmt;
1772 let Inst{25-23} = nfmt;
17731914 let Inst{31-26} = 0x3a; //encoding
17741915 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
17751916 let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
17791920 let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
17801921 }
17811922
1782 multiclass MTBUF_Real_AllAddr_si op> {
1783 def _OFFSET_si : MTBUF_Real_si (NAME#"_OFFSET")>;
1784 def _ADDR64_si : MTBUF_Real_si (NAME#"_ADDR64")>;
1785 def _OFFEN_si : MTBUF_Real_si (NAME#"_OFFEN")>;
1786 def _IDXEN_si : MTBUF_Real_si (NAME#"_IDXEN")>;
1787 def _BOTHEN_si : MTBUF_Real_si (NAME#"_BOTHEN")>;
1788 }
1789
1790 defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>;
1791 defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>;
1792 defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>;
1793 defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>;
1794 defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>;
1795 defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>;
1796 defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>;
1797 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
1798
1799 //===----------------------------------------------------------------------===//
1800 // CI
1801 // MTBUF - GFX6, GFX7.
1802 //===----------------------------------------------------------------------===//
1803
1804 class MUBUF_Real_ci op, MUBUF_Pseudo ps> :
1805 MUBUF_Real_si {
1806 let AssemblerPredicate = isGFX7Only;
1807 let DecoderNamespace = "GFX7";
1808 }
1809
1810 def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>;
1811
1812
1813 //===----------------------------------------------------------------------===//
1814 // GFX8, GFX9 (VI).
1923 //===----------------------------------------------------------------------===//
1924 // MTBUF - GFX10.
1925 //===----------------------------------------------------------------------===//
1926
1927 class MTBUF_Real_gfx10 op, MTBUF_Pseudo ps> :
1928 Base_MTBUF_Real_gfx6_gfx7_gfx10 {
1929 let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
1930 let Inst{25-19} = format;
1931 let Inst{53} = op{3};
1932 }
1933
1934 let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
1935 multiclass MTBUF_Real_AllAddr_gfx10 op> {
1936 def _BOTHEN_gfx10 :
1937 MTBUF_Real_gfx10(NAME#"_BOTHEN")>;
1938 def _IDXEN_gfx10 :
1939 MTBUF_Real_gfx10(NAME#"_IDXEN")>;
1940 def _OFFEN_gfx10 :
1941 MTBUF_Real_gfx10(NAME#"_OFFEN")>;
1942 def _OFFSET_gfx10 :
1943 MTBUF_Real_gfx10(NAME#"_OFFSET")>;
1944 }
1945 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
1946
1947 defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>;
1948 defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>;
1949 defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00a>;
1950 defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00b>;
1951 defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x00c>;
1952 defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x00d>;
1953 defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00e>;
1954 defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>;
1955
1956 //===----------------------------------------------------------------------===//
1957 // MTBUF - GFX6, GFX7, GFX10.
1958 //===----------------------------------------------------------------------===//
1959
1960 class MTBUF_Real_gfx6_gfx7 op, MTBUF_Pseudo ps> :
1961 Base_MTBUF_Real_gfx6_gfx7_gfx10 {
1962 let Inst{15} = ps.addr64;
1963 let Inst{22-19} = dfmt;
1964 let Inst{25-23} = nfmt;
1965 }
1966
1967 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1968 multiclass MTBUF_Real_AllAddr_gfx6_gfx7 op> {
1969 def _ADDR64_gfx6_gfx7 :
1970 MTBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>;
1971 def _BOTHEN_gfx6_gfx7 :
1972 MTBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>;
1973 def _IDXEN_gfx6_gfx7 :
1974 MTBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>;
1975 def _OFFEN_gfx6_gfx7 :
1976 MTBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>;
1977 def _OFFSET_gfx6_gfx7 :
1978 MTBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>;
1979 }
1980 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1981
1982 multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10 op> :
1983 MTBUF_Real_AllAddr_gfx6_gfx7, MTBUF_Real_AllAddr_gfx10;
1984
1985 defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>;
1986 defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
1987 defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
1988 defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
1989 defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
1990 defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
1991 defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
1992 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
1993
1994 //===----------------------------------------------------------------------===//
1995 // GFX8, GFX9 (VI).
18151996 //===----------------------------------------------------------------------===//
18161997
18171998 class MUBUF_Real_vi op, MUBUF_Pseudo ps> :
1818 MUBUF_Real<op, ps>,
1999 MUBUF_Real<ps>,
18192000 Enc64,
18202001 SIMCInstr {
18212002 let AssemblerPredicate = isGFX8GFX9;
18652046 }
18662047
18672048 class MUBUF_Real_gfx80 op, MUBUF_Pseudo ps> :
1868 MUBUF_Real<op, ps>,
2049 MUBUF_Real<ps>,
18692050 Enc64,
18702051 SIMCInstr {
18712052 let AssemblerPredicate=HasUnpackedD16VMem;
55 //
66 //===----------------------------------------------------------------------===//
77
8 def FLATAtomic : ComplexPattern;
9 def FLATOffset : ComplexPattern", [], [], -10>;
10
11 def FLATOffsetSigned : ComplexPattern", [], [], -10>;
12 def FLATSignedAtomic : ComplexPattern], -10>;
8 def FLATAtomic : ComplexPattern], -10>;
9 def FLATOffset : ComplexPattern", [], [SDNPWantRoot], -10>;
10
11 def FLATOffsetSigned : ComplexPattern", [], [SDNPWantRoot], -10>;
12 def FLATSignedAtomic : ComplexPattern;
1313
1414 //===----------------------------------------------------------------------===//
1515 // FLAT classes
5050 bits<1> has_data = 1;
5151 bits<1> has_glc = 1;
5252 bits<1> glcValue = 0;
53 bits<1> has_dlc = 1;
54 bits<1> dlcValue = 0;
5355
5456 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
5557 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
8789
8890 bits<1> slc;
8991 bits<1> glc;
92 bits<1> dlc;
9093
9194 // Only valid on gfx9
9295 bits<1> lds = 0; // XXX - What does this actually do?
140143 !con((ins VReg_64:$vaddr),
141144 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
142145 (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
143 (ins GLC:$glc, SLC:$slc)),
146 (ins GLC:$glc, SLC:$slc, DLC:$dlc)),
144147 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
145 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
148 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
146149 let has_data = 0;
147150 let mayLoad = 1;
148151 let has_saddr = HasSaddr;
163166 !con((ins VReg_64:$vaddr, vdataClass:$vdata),
164167 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
165168 (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
166 (ins GLC:$glc, SLC:$slc)),
167 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
169 (ins GLC:$glc, SLC:$slc, DLC:$dlc)),
170 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
168171 let mayLoad = 0;
169172 let mayStore = 1;
170173 let has_vdst = 0;
197200 opName,
198201 (outs regClass:$vdst),
199202 !if(EnableSaddr,
200 (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
201 (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
202 " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
203 (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
204 (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
205 " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
203206 let has_data = 0;
204207 let mayLoad = 1;
205208 let has_saddr = 1;
213216 opName,
214217 (outs),
215218 !if(EnableSaddr,
216 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
217 (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
218 " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
219 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
220 (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
221 " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
219222 let mayLoad = 0;
220223 let mayStore = 1;
221224 let has_vdst = 0;
247250 let mayStore = 1;
248251 let has_glc = 0;
249252 let glcValue = 0;
253 let has_dlc = 0;
254 let dlcValue = 0;
250255 let has_vdst = 0;
251256 let maybeAtomic = 1;
252257 }
257262 let hasPostISelHook = 1;
258263 let has_vdst = 1;
259264 let glcValue = 1;
265 let dlcValue = 0;
260266 let PseudoInstr = NAME # "_RTN";
261267 }
262268
491497 defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
492498 VReg_64, i64, atomic_dec_flat>;
493499
494 // GFX7-only flat instructions.
495 let SubtargetPredicate = isGFX7Only in {
500 // GFX7-, GFX10-only flat instructions.
501 let SubtargetPredicate = isGFX7GFX10 in {
496502
497503 defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
498504 VGPR_32, f32, null_frag, v2f32, VReg_64>;
512518 defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
513519 VReg_64, f64>;
514520
515 } // End SubtargetPredicate = isGFX7Only
521 } // End SubtargetPredicate = isGFX7GFX10
516522
517523 let SubtargetPredicate = HasFlatGlobalInsts in {
518524 defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
655661
656662 } // End SubtargetPredicate = HasFlatScratchInsts
657663
664 let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
665 defm GLOBAL_ATOMIC_FCMPSWAP :
666 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
667 defm GLOBAL_ATOMIC_FMIN :
668 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
669 defm GLOBAL_ATOMIC_FMAX :
670 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
671 defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
672 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
673 defm GLOBAL_ATOMIC_FMIN_X2 :
674 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
675 defm GLOBAL_ATOMIC_FMAX_X2 :
676 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
677 } // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
678
679
658680 //===----------------------------------------------------------------------===//
659681 // Flat Patterns
660682 //===----------------------------------------------------------------------===//
662684 // Patterns for global loads with no offset.
663685 class FlatLoadPat : GCNPat <
664686 (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
665 (inst $vaddr, $offset, 0, $slc)
687 (inst $vaddr, $offset, 0, 0, $slc)
666688 >;
667689
668690 class FlatLoadPat_D16 : GCNPat <
669691 (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc), vt:$in),
670 (inst $vaddr, $offset, 0, $slc, $in)
692 (inst $vaddr, $offset, 0, 0, $slc, $in)
671693 >;
672694
673695 class FlatSignedLoadPat_D16 : GCNPat <
674696 (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc), vt:$in),
675 (inst $vaddr, $offset, 0, $slc, $in)
697 (inst $vaddr, $offset, 0, 0, $slc, $in)
676698 >;
677699
678700 class FlatLoadAtomicPat : GCNPat <
679701 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
680 (inst $vaddr, $offset, 0, $slc)
702 (inst $vaddr, $offset, 0, 0, $slc)
681703 >;
682704
683705 class FlatLoadSignedPat : GCNPat <
684706 (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
685 (inst $vaddr, $offset, 0, $slc)
707 (inst $vaddr, $offset, 0, 0, $slc)
686708 >;
687709
688710 class FlatStorePat : GCNPat <
689711 (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
690 (inst $vaddr, $data, $offset, 0, $slc)
712 (inst $vaddr, $data, $offset, 0, 0, $slc)
691713 >;
692714
693715 class FlatStoreSignedPat : GCNPat <
694716 (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
695 (inst $vaddr, $data, $offset, 0, $slc)
717 (inst $vaddr, $data, $offset, 0, 0, $slc)
696718 >;
697719
698720 class FlatStoreAtomicPat : GCNPat <
699721 // atomic store follows atomic binop convention so the address comes
700722 // first.
701723 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
702 (inst $vaddr, $data, $offset, 0, $slc)
724 (inst $vaddr, $data, $offset, 0, 0, $slc)
703725 >;
704726
705727 class FlatStoreSignedAtomicPat : GCNPat <
706728 // atomic store follows atomic binop convention so the address comes
707729 // first.
708730 (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
709 (inst $vaddr, $data, $offset, 0, $slc)
731 (inst $vaddr, $data, $offset, 0, 0, $slc)
710732 >;
711733
712734 class FlatAtomicPat
11071129 defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
11081130 defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
11091131 defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1132
1133
1134 //===----------------------------------------------------------------------===//
1135 // GFX10.
1136 //===----------------------------------------------------------------------===//
1137
1138 class FLAT_Real_gfx10 op, FLAT_Pseudo ps> :
1139 FLAT_Real, SIMCInstr {
1140 let AssemblerPredicate = isGFX10Plus;
1141 let DecoderNamespace = "GFX10";
1142
1143 let Inst{11-0} = {offset{12}, offset{10-0}};
1144 let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue);
1145 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
1146 let Inst{55} = 0;
1147 }
1148
1149
1150 multiclass FLAT_Real_Base_gfx10 op> {
1151 def _gfx10 :
1152 FLAT_Real_gfx10(NAME)>;
1153 }
1154
1155 multiclass FLAT_Real_RTN_gfx10 op> {
1156 def _RTN_gfx10 :
1157 FLAT_Real_gfx10(NAME#"_RTN")>;
1158 }
1159
1160 multiclass FLAT_Real_SADDR_gfx10 op> {
1161 def _SADDR_gfx10 :
1162 FLAT_Real_gfx10(NAME#"_SADDR")>;
1163 }
1164
1165 multiclass FLAT_Real_SADDR_RTN_gfx10 op> {
1166 def _SADDR_RTN_gfx10 :
1167 FLAT_Real_gfx10(NAME#"_SADDR_RTN")>;
1168 }
1169
1170
1171 multiclass FLAT_Real_AllAddr_gfx10 op> :
1172 FLAT_Real_Base_gfx10,
1173 FLAT_Real_SADDR_gfx10;
1174
1175 multiclass FLAT_Real_Atomics_gfx10 op> :
1176 FLAT_Real_Base_gfx10,
1177 FLAT_Real_RTN_gfx10;
1178
1179 multiclass FLAT_Real_GlblAtomics_gfx10 op> :
1180 FLAT_Real_AllAddr_gfx10,
1181 FLAT_Real_RTN_gfx10,
1182 FLAT_Real_SADDR_RTN_gfx10;
1183
1184
1185 // ENC_FLAT.
1186 defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>;
1187 defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>;
1188 defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>;
1189 defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>;
1190 defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>;
1191 defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>;
1192 defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>;
1193 defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>;
1194 defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>;
1195 defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>;
1196 defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>;
1197 defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>;
1198 defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>;
1199 defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>;
1200 defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>;
1201 defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>;
1202 defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>;
1203 defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>;
1204 defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>;
1205 defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>;
1206 defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>;
1207 defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>;
1208 defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>;
1209 defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>;
1210 defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>;
1211 defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>;
1212 defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>;
1213 defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>;
1214 defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>;
1215 defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>;
1216 defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>;
1217 defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>;
1218 defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>;
1219 defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>;
1220 defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>;
1221 defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>;
1222 defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>;
1223 defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>;
1224 defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>;
1225 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>;
1226 defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>;
1227 defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>;
1228 defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>;
1229 defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>;
1230 defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>;
1231 defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>;
1232 defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>;
1233 defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>;
1234 defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>;
1235 defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>;
1236 defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>;
1237 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>;
1238 defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>;
1239 defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>;
1240
1241
1242 // ENC_FLAT_GLBL.
1243 defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
1244 defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
1245 defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
1246 defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
1247 defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
1248 defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
1249 defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
1250 defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
1251 defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
1252 defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
1253 defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
1254 defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
1255 defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
1256 defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
1257 defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
1258 defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
1259 defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
1260 defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
1261 defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
1262 defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
1263 defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
1264 defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
1265 defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
1266 defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
1267 defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
1268 defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
1269 defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
1270 defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
1271 defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
1272 defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>;
1273 defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>;
1274 defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>;
1275 defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>;
1276 defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>;
1277 defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>;
1278 defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>;
1279 defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>;
1280 defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>;
1281 defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>;
1282 defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>;
1283 defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>;
1284 defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>;
1285 defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>;
1286 defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>;
1287 defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>;
1288 defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>;
1289 defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>;
1290 defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>;
1291 defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>;
1292 defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>;
1293 defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
1294 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
1295 defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
1296 defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
1297
1298
1299 // ENC_FLAT_SCRATCH.
1300 defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
1301 defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
1302 defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
1303 defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
1304 defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
1305 defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
1306 defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
1307 defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
1308 defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
1309 defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
1310 defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
1311 defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
1312 defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
1313 defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
1314 defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
1315 defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
1316 defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
1317 defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
1318 defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
1319 defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
1320 defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
1321 defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
7171 }
7272
7373 void AMDGPUInstPrinter::printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
74 const MCSubtargetInfo &STI,
7475 raw_ostream &O) {
75 O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
76 // GFX10: Address offset is 12-bit signed byte offset.
77 if (AMDGPU::isGFX10(STI)) {
78 O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
79 } else {
80 O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
81 }
7682 }
7783
7884 void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
127133 uint16_t Imm = MI->getOperand(OpNo).getImm();
128134 if (Imm != 0) {
129135 O << ((OpNo == 0)? "offset:" : " offset:");
130 printS13ImmDecOperand(MI, OpNo, O);
136 printS13ImmDecOperand(MI, OpNo, STI, O);
131137 }
132138 }
133139
170176 void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
171177 const MCSubtargetInfo &STI, raw_ostream &O) {
172178 printNamedBit(MI, OpNo, O, "gds");
179 }
180
181 void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
182 const MCSubtargetInfo &STI, raw_ostream &O) {
183 if (AMDGPU::isGFX10(STI))
184 printNamedBit(MI, OpNo, O, "dlc");
173185 }
174186
175187 void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
4040 void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4141 void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4242 void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
43 void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
43 void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
44 const MCSubtargetInfo &STI, raw_ostream &O);
4445 void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
4546 const MCSubtargetInfo &STI, raw_ostream &O);
4647 void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
6566 void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
6667 const MCSubtargetInfo &STI, raw_ostream &O);
6768 void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
69 raw_ostream &O);
70 void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
6871 raw_ostream &O);
6972 void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
7073 raw_ostream &O);
196196 // Atomics dont have a GLC, so omit the field if not there.
197197 if (Glc)
198198 NewGlob->addOperand(MF, *Glc);
199
200 MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc);
201 if (DLC)
202 NewGlob->addOperand(MF, *DLC);
203
199204 NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
200205 // _D16 have an vdst_in operand, copy it in.
201206 MachineOperand *VDstInOp = TII->getNamedOperand(MI,
6969
7070 // Do a 64-bit pointer add.
7171 if (ST.flatScratchIsPointer()) {
72 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
73 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
74 .addReg(FlatScrInitLo)
75 .addReg(ScratchWaveOffsetReg);
76 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
77 .addReg(FlatScrInitHi)
78 .addImm(0);
79 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
80 addReg(FlatScrInitLo).
81 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
82 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
83 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
84 addReg(FlatScrInitHi).
85 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
86 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
87 return;
88 }
89
7290 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
7391 .addReg(FlatScrInitLo)
7492 .addReg(ScratchWaveOffsetReg);
7896
7997 return;
8098 }
99
100 assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
81101
82102 // Copy the size in bytes.
83103 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
422442 .addReg(Rsrc01)
423443 .addImm(EncodedOffset) // offset
424444 .addImm(0) // glc
445 .addImm(0) // dlc
425446 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
426447 .addMemOperand(MMO);
427448 return;
462483 .addReg(MFI->getImplicitBufferPtrUserSGPR())
463484 .addImm(0) // offset
464485 .addImm(0) // glc
486 .addImm(0) // dlc
465487 .addMemOperand(MMO)
466488 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
467489 }
41654165 getNamedOperand(MI, AMDGPU::OpName::glc)) {
41664166 MIB.addImm(GLC->getImm());
41674167 }
4168 if (const MachineOperand *DLC =
4169 getNamedOperand(MI, AMDGPU::OpName::dlc)) {
4170 MIB.addImm(DLC->getImm());
4171 }
41684172
41694173 MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
41704174
829829 def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
830830 def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
831831
832 def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
832833 def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
833834 def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
834835 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
130130 bool GLC1;
131131 bool SLC0;
132132 bool SLC1;
133 bool DLC0;
134 bool DLC1;
133135 bool UseST64;
134136 SmallVector InstsToMove;
135137 };
322324 if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
323325 return (EltOffset0 + CI.Width0 == EltOffset1 ||
324326 EltOffset1 + CI.Width1 == EltOffset0) &&
325 CI.GLC0 == CI.GLC1 &&
327 CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 &&
326328 (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
327329 }
328330
636638 CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
637639 CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
638640 }
641 CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm();
642 CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm();
639643 }
640644
641645 // Check both offsets fit in the reduced range.
856860 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
857861 .addImm(MergedOffset) // offset
858862 .addImm(CI.GLC0) // glc
863 .addImm(CI.DLC0) // dlc
859864 .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
860865
861866 std::pair SubRegIdx = getSubRegIdxs(CI);
908913 .addImm(CI.GLC0) // glc
909914 .addImm(CI.SLC0) // slc
910915 .addImm(0) // tfe
916 .addImm(CI.DLC0) // dlc
911917 .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
912918
913919 std::pair SubRegIdx = getSubRegIdxs(CI);
10871093 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
10881094 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
10891095 .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
1090 .addImm(CI.GLC0) // glc
1091 .addImm(CI.SLC0) // slc
1092 .addImm(0) // tfe
1096 .addImm(CI.GLC0) // glc
1097 .addImm(CI.SLC0) // slc
1098 .addImm(0) // tfe
1099 .addImm(CI.DLC0) // dlc
10931100 .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
10941101
10951102 moveInstsAfter(MIB, CI.InstsToMove);
535535 .addImm(0) // glc
536536 .addImm(0) // slc
537537 .addImm(0) // tfe
538 .addImm(0) // dlc
538539 .cloneMemRefs(*MI);
539540
540541 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
638639 .addImm(0) // glc
639640 .addImm(0) // slc
640641 .addImm(0) // tfe
642 .addImm(0) // dlc
641643 .addMemOperand(NewMMO);
642644
643645 if (NumSubRegs > 1)
768770 .addReg(MFI->getScratchRSrcReg()) // sbase
769771 .addReg(OffsetReg, RegState::Kill) // soff
770772 .addImm(0) // glc
773 .addImm(0) // dlc
771774 .addMemOperand(MMO);
772775
773776 continue;
927930
928931 auto MIB =
929932 BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
930 .addReg(MFI->getScratchRSrcReg()) // sbase
931 .addReg(OffsetReg, RegState::Kill) // soff
932 .addImm(0) // glc
933 .addReg(MFI->getScratchRSrcReg()) // sbase
934 .addReg(OffsetReg, RegState::Kill) // soff
935 .addImm(0) // glc
936 .addImm(0) // dlc
933937 .addMemOperand(MMO);
934938
935939 if (NumSubRegs > 1 && i == 0)
3939 bits<1> has_sbase = 1;
4040 bits<1> has_sdst = 1;
4141 bit has_glc = 0;
42 bit has_dlc = 0;
4243 bits<1> has_offset = 1;
4344 bits<1> offset_is_imm = 0;
4445 }
7879 let mayLoad = 1;
7980 let mayStore = 0;
8081 let has_glc = 1;
82 let has_dlc = 1;
8183 }
8284
8385 class SM_Store_Pseudo pattern = []>
8789 let mayLoad = 0;
8890 let mayStore = 1;
8991 let has_glc = 1;
92 let has_dlc = 1;
9093 let ScalarStore = 1;
9194 }
9295
107110 RegisterClass dstClass> {
108111 def _IMM : SM_Load_Pseudo
109112 (outs dstClass:$sdst),
110 (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc),
111 " $sdst, $sbase, $offset$glc", []> {
113 (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
114 " $sdst, $sbase, $offset$glc$dlc", []> {
112115 let offset_is_imm = 1;
113116 let BaseClass = baseClass;
114117 let PseudoInstr = opName # "_IMM";
115118 let has_glc = 1;
119 let has_dlc = 1;
116120 }
117121
118122 def _SGPR : SM_Load_Pseudo
119123 (outs dstClass:$sdst),
120 (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
121 " $sdst, $sbase, $offset$glc", []> {
124 (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
125 " $sdst, $sbase, $offset$glc$dlc", []> {
122126 let BaseClass = baseClass;
123127 let PseudoInstr = opName # "_SGPR";
124128 let has_glc = 1;
129 let has_dlc = 1;
125130 }
126131 }
127132
129134 RegisterClass baseClass,
130135 RegisterClass srcClass> {
131136 def _IMM : SM_Store_Pseudo
132 (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc),
133 " $sdata, $sbase, $offset$glc", []> {
137 (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
138 " $sdata, $sbase, $offset$glc$dlc", []> {
134139 let offset_is_imm = 1;
135140 let BaseClass = baseClass;
136141 let SrcClass = srcClass;
138143 }
139144
140145 def _SGPR : SM_Store_Pseudo
141 (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
142 " $sdata, $sbase, $offset$glc", []> {
146 (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
147 " $sdata, $sbase, $offset$glc$dlc", []> {
143148 let BaseClass = baseClass;
144149 let SrcClass = srcClass;
145150 let PseudoInstr = opName # "_SGPR";
183188 def _SGPR : SM_Probe_Pseudo ;
184189 }
185190
191 class SM_WaveId_Pseudo : SM_Pseudo<
192 opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
193 " $sdst", [(set i32:$sdst, (node))]> {
194 let hasSideEffects = 1;
195 let mayStore = 0;
196 let mayLoad = 1;
197 let has_sbase = 0;
198 let has_offset = 0;
199 }
200
186201 //===----------------------------------------------------------------------===//
187202 // Scalar Atomic Memory Classes
188203 //===----------------------------------------------------------------------===//
196211 let mayLoad = 1;
197212 let mayStore = 1;
198213 let has_glc = 1;
214 let has_dlc = 1;
199215
200216 // Should these be set?
201217 let ScalarStore = 1;
211227 SM_Atomic_Pseudo
212228 !if(isRet, (outs dataClass:$sdst), (outs)),
213229 !if(isImm,
214 (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset),
215 (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset)),
216 !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", ""),
230 (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset, DLC:$dlc),
231 (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
232 !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
217233 isRet> {
218234 let offset_is_imm = isImm;
219235 let PseudoInstr = opName # !if(isImm,
271287 "s_buffer_load_dwordx16", SReg_128, SReg_512
272288 >;
273289
290 let SubtargetPredicate = HasScalarStores in {
274291 defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
275292 defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
276293 defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
286303 defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
287304 "s_buffer_store_dwordx4", SReg_128, SReg_128
288305 >;
289
306 } // End SubtargetPredicate = HasScalarStores
290307
291308 def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
292309 def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
296313 } // let SubtargetPredicate = isGFX7GFX8GFX9
297314
298315 let SubtargetPredicate = isGFX8Plus in {
316 let OtherPredicates = [HasScalarStores] in {
299317 def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
300318 def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
319 } // End OtherPredicates = [HasScalarStores]
301320 def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
302321
303322 defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
304323 defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
305324 } // SubtargetPredicate = isGFX8Plus
306325
307 let SubtargetPredicate = HasFlatScratchInsts, Uses = [FLAT_SCR] in {
326 let SubtargetPredicate = isGFX10Plus in {
327 def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
328 def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
329 } // End SubtargetPredicate = isGFX10Plus
330
331 let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
308332 defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
309333 defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
310334 defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
312336 defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
313337 defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
314338 defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
315 } // SubtargetPredicate = HasFlatScratchInsts
339 } // SubtargetPredicate = HasScalarFlatScratchInsts
316340
317341 let SubtargetPredicate = HasScalarAtomics in {
318342
374398
375399 } // let SubtargetPredicate = HasScalarAtomics
376400
377 let SubtargetPredicate = isGFX9Only in {
401 let SubtargetPredicate = HasScalarAtomics in {
378402 defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">;
379403 defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
380404 }
410434 SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> {
411435
412436 def _IMM_si : SMRD_Real_si {
413 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc);
437 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
414438 }
415439
416440 // FIXME: The operand name $offset is inconsistent with $soff used
417441 // in the pseudo
418442 def _SGPR_si : SMRD_Real_si {
419 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
443 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
420444 }
421445
422446 }
463487 SM_Load_Pseudo immPs = !cast(ps#_IMM),
464488 SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> {
465489 def _IMM_vi : SMEM_Real_vi {
466 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
490 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
467491 }
468492 def _SGPR_vi : SMEM_Real_vi {
469 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
493 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
470494 }
471495 }
472496
484508 // FIXME: The operand name $offset is inconsistent with $soff used
485509 // in the pseudo
486510 def _IMM_vi : SMEM_Real_Store_vi {
487 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
511 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
488512 }
489513
490514 def _SGPR_vi : SMEM_Real_Store_vi {
491 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
515 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
492516 }
493517 }
494518
637661
638662 let AssemblerPredicates = [isGFX7Only];
639663 let DecoderNamespace = "GFX7";
640 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc);
664 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
641665
642666 let LGKM_CNT = ps.LGKM_CNT;
643667 let SMRD = ps.SMRD;
717741 // 1. IMM offset
718742 def : GCNPat <
719743 (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
720 (vt (!cast(Instr#"_IMM") $sbase, $offset, 0))
744 (vt (!cast(Instr#"_IMM") $sbase, $offset, 0, 0))
721745 >;
722746
723747 // 2. 32-bit IMM offset on CI
724748 def : GCNPat <
725749 (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
726 (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> {
750 (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
727751 let OtherPredicates = [isGFX7Only];
728752 }
729753
730754 // 3. SGPR offset
731755 def : GCNPat <
732756 (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
733 (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0))
757 (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0, 0))
734758 >;
735759
736760 // 4. No offset
737761 def : GCNPat <
738762 (vt (smrd_load (i64 SReg_64:$sbase))),
739 (vt (!cast(Instr#"_IMM") i64:$sbase, 0, 0))
763 (vt (!cast(Instr#"_IMM") i64:$sbase, 0, 0, 0))
740764 >;
741765 }
742766
744768 // 1. Offset as an immediate
745769 def : GCNPat <
746770 (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
747 (vt (!cast(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc)))
771 (vt (!cast(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc), 0))
748772 >;
749773
750774 // 2. 32-bit IMM offset on CI
751775 def : GCNPat <
752776 (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
753 (!cast(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> {
777 (!cast(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), 0)> {
754778 let OtherPredicates = [isGFX7Only];
755779 }
756780
757781 // 3. Offset loaded in an 32bit SGPR
758782 def : GCNPat <
759783 (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
760 (vt (!cast(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc)))
784 (vt (!cast(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc), 0))
761785 >;
762786 }
763787
800824 >;
801825
802826 } // let OtherPredicates = [isGFX8Plus]
827
828 //===----------------------------------------------------------------------===//
829 // GFX10.
830 //===----------------------------------------------------------------------===//
831
832 class SMEM_Real_gfx10 op, SM_Pseudo ps> :
833 SM_Real, SIMCInstr, Enc64 {
834 bit glc;
835 bit dlc;
836
837 let AssemblerPredicates = [isGFX10Plus];
838 let DecoderNamespace = "GFX10";
839
840 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
841 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
842 let Inst{14} = !if(ps.has_dlc, dlc, ?);
843 let Inst{16} = !if(ps.has_glc, glc, ?);
844 let Inst{25-18} = op;
845 let Inst{31-26} = 0x3d;
846 let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?);
847 let Inst{63-57} = !if(ps.offset_is_imm, !cast(SGPR_NULL.HWEncoding),
848 !if(ps.has_offset, offset{6-0}, ?));
849 }
850
851 multiclass SM_Real_Loads_gfx10 op, string ps,
852 SM_Load_Pseudo immPs = !cast(ps#_IMM),
853 SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> {
854 def _IMM_gfx10 : SMEM_Real_gfx10 {
855 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
856 }
857 def _SGPR_gfx10 : SMEM_Real_gfx10 {
858 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
859 }
860 }
861
862 class SMEM_Real_Store_gfx10 op, SM_Pseudo ps> : SMEM_Real_gfx10 {
863 bits<7> sdata;
864
865 let sdst = ?;
866 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
867 }
868
869 multiclass SM_Real_Stores_gfx10 op, string ps,
870 SM_Store_Pseudo immPs = !cast(ps#_IMM),
871 SM_Store_Pseudo sgprPs = !cast(ps#_SGPR)> {
872 // FIXME: The operand name $offset is inconsistent with $soff used
873 // in the pseudo
874 def _IMM_gfx10 : SMEM_Real_Store_gfx10 {
875 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
876 }
877
878 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 {
879 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
880 }
881 }
882
883 defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
884 defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
885 defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
886 defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
887 defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
888
889 let SubtargetPredicate = HasScalarFlatScratchInsts in {
890 defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
891 defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
892 defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
893 } // End SubtargetPredicate = HasScalarFlatScratchInsts
894
895 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
896 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
897 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
898 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
899 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
900
901 let SubtargetPredicate = HasScalarStores in {
902 defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
903 defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
904 defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
905 let OtherPredicates = [HasScalarFlatScratchInsts] in {
906 defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
907 defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
908 defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
909 } // End OtherPredicates = [HasScalarFlatScratchInsts]
910 defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
911 defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
912 defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
913 } // End SubtargetPredicate = HasScalarStores
914
915 def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
916 def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>;
917 def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
918 def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
919 def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
920
921 let SubtargetPredicate = HasScalarStores in {
922 def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
923 } // End SubtargetPredicate = HasScalarStores
924
925 multiclass SM_Real_Probe_gfx10 op, string ps> {
926 def _IMM_gfx10 : SMEM_Real_Store_gfx10 (ps#_IMM)>;
927 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 (ps#_SGPR)>;
928 }
929
930 defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
931 defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
932
933 class SMEM_Atomic_Real_gfx10 op, SM_Atomic_Pseudo ps>
934 : SMEM_Real_gfx10 {
935
936 bits<7> sdata;
937 bit dlc;
938
939 let Constraints = ps.Constraints;
940 let DisableEncoding = ps.DisableEncoding;
941
942 let glc = ps.glc;
943
944 let Inst{14} = !if(ps.has_dlc, dlc, 0);
945 let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
946 }
947
948 multiclass SM_Real_Atomics_gfx10 op, string ps> {
949 def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM)>;
950 def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR)>;
951 def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM_RTN)>;
952 def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_RTN)>;
953 }
954
955 let SubtargetPredicate = HasScalarAtomics in {
956
957 defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
958 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
959 defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
960 defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
961 defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
962 defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
963 defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
964 defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
965 defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
966 defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
967 defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
968 defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
969 defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
970
971 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
972 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
973 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
974 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
975 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
976 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
977 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
978 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
979 defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
980 defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
981 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
982 defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
983 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
984
985 defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
986 defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
987 defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
988 defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
989 defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
990 defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
991 defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
992 defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
993 defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
994 defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
995 defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
996 defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
997 defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
998
999 defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
1000 defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
1001 defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
1002 defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
1003 defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
1004 defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
1005 defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
1006 defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
1007 defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
1008 defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
1009 defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
1010 defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
1011 defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
1012
1013 multiclass SM_Real_Discard_gfx10 op, string ps> {
1014 def _IMM_gfx10 : SMEM_Real_gfx10 (ps#_IMM)>;
1015 def _SGPR_gfx10 : SMEM_Real_gfx10 (ps#_SGPR)>;
1016 }
1017
1018 defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
1019 defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
1020
1021 } // End SubtargetPredicate = HasScalarAtomics
1717 ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
1818 ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
1919 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
20 ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
20 ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
2121 %0:sgpr(p1) = COPY $sgpr2_sgpr3
2222 %1:vgpr(p1) = COPY %0
2323 %2:vgpr(s32) = G_IMPLICIT_DEF
1212 ; GCN-LABEL: name: implicit_def_s32
1313 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
1414 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
15 ; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
15 ; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
1616 %0:vgpr(p1) = COPY $vgpr3_vgpr4
1717 %1:vgpr(s32) = G_IMPLICIT_DEF
1818 G_STORE %1, %0 :: (store 4, addrspace 1)
2929 ; GCN-LABEL: name: implicit_def_s64
3030 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
3131 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
32 ; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
32 ; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
3333 %0:vgpr(p1) = COPY $vgpr3_vgpr4
3434 %1:vgpr(s64) = G_IMPLICIT_DEF
3535 G_STORE %1, %0 :: (store 8, addrspace 1)
5959 ; GCN-LABEL: name: implicit_def_p1
6060 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
6161 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
62 ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
62 ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
6363 %0:vgpr(p1) = G_IMPLICIT_DEF
6464 %1:vgpr(s32) = G_CONSTANT i32 4
6565 G_STORE %1, %0 :: (store 4, addrspace 1)
7575 ; GCN-LABEL: name: implicit_def_p3
7676 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
7777 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
78 ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
78 ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
7979 %0:vgpr(p3) = G_IMPLICIT_DEF
8080 %1:vgpr(s32) = G_CONSTANT i32 4
8181 G_STORE %1, %0 :: (store 4, addrspace 1)
9191 ; GCN-LABEL: name: implicit_def_p4
9292 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
9393 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
94 ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
94 ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
9595 %0:vgpr(p4) = G_IMPLICIT_DEF
9696 %1:vgpr(s32) = G_CONSTANT i32 4
9797 G_STORE %1, %0 :: (store 4, addrspace 1)
77 body: |
88 bb.0:
99 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
10 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
11 ; GCN-NEXT: S_ENDPGM 0
12 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
10 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
11 ; GCN-NEXT: S_ENDPGM 0
12 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
1313 S_ENDPGM 0
1414 ...
1515 ---
1919 body: |
2020 bb.0:
2121 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
22 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
23 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
24 ; GCN-NEXT: S_ENDPGM 0
25 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
26 $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
22 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
23 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
24 ; GCN-NEXT: S_ENDPGM 0
25 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
26 $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
2727 S_ENDPGM 0
2828 ...
2929 ---
3333 body: |
3434 bb.0:
3535 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
36 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
37 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
38 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
39 ; GCN-NEXT: S_ENDPGM 0
40 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
41 $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
42 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
36 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
37 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
38 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
39 ; GCN-NEXT: S_ENDPGM 0
40 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
41 $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
42 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
4343 S_ENDPGM 0
4444 ...
4545 ---
4949 body: |
5050 bb.0:
5151 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
52 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
53 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
54 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
55 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
56 ; GCN-NEXT: S_ENDPGM 0
57 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
58 $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
59 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
60 $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
52 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
53 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
54 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
55 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
56 ; GCN-NEXT: S_ENDPGM 0
57 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
58 $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
59 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
60 $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
6161 S_ENDPGM 0
6262 ...
6363 ---
6666 body: |
6767 bb.0:
6868 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
69 ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
70 ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
71 ; GCN-NEXT: S_ENDPGM 0
72 $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
73 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
69 ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
70 ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
71 ; GCN-NEXT: S_ENDPGM 0
72 $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
73 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
7474 S_ENDPGM 0
7575 ...
7676 ---
8080 body: |
8181 bb.0:
8282 ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
83 ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
84 ; GCN-NEXT: S_ENDPGM 0
85 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
83 ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
84 ; GCN-NEXT: S_ENDPGM 0
85 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
8686 S_ENDPGM 0
8787 ...
8888 ---
9292 body: |
9393 bb.0:
9494 ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
95 ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
96 ; GCN-NEXT: S_ENDPGM 0
97 $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
95 ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
96 ; GCN-NEXT: S_ENDPGM 0
97 $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
9898 S_ENDPGM 0
9999 ...
100100 ---
104104 body: |
105105 bb.0:
106106 ; GCN-LABEL: name: smrd_load8_overwrite_ptr
107 ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
108 ; GCN-NEXT: S_ENDPGM 0
109 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
107 ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
108 ; GCN-NEXT: S_ENDPGM 0
109 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
110110 S_ENDPGM 0
111111 ...
112112 ---
118118 body: |
119119 bb.0:
120120 ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
121 ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
122 ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
123 ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
124 ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
125 ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
126 ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
127 ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
128 ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
129 ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
130 ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
131 ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
132 ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
133 ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
134 ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
135 ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
136 ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
137 ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
121 ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
122 ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
123 ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
124 ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
125 ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
126 ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
127 ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
128 ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
129 ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
130 ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
131 ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
132 ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
133 ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
134 ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
135 ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
136 ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
137 ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
138138 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
139139 ; GCN-NEXT: S_ENDPGM 0
140 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
141 $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
142 $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
143 $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
144
145 $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
146 $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
147 $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
148 $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
149
150 $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
151 $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
152 $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
153 $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
154
155 $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
156 $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
157 $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
158 $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
159
160 $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
140 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
141 $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
142 $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
143 $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
144
145 $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
146 $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
147 $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
148 $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
149
150 $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
151 $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
152 $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
153 $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
154
155 $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
156 $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
157 $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
158 $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
159
160 $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
161161 $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
162162 S_ENDPGM 0
163163 ...
168168 body: |
169169 bb.0:
170170 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
171 ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
171 ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
172172 ; XNACK-NEXT: S_NOP 0
173 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
174 ; GCN-NEXT: S_ENDPGM 0
175 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
176 $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
173 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
174 ; GCN-NEXT: S_ENDPGM 0
175 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
176 $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
177177 S_ENDPGM 0
178178 ...
179179 ---
183183 body: |
184184 bb.0:
185185 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
186 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
187 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
188 ; GCN-NEXT: S_ENDPGM 0
189 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
190 $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
186 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
187 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
188 ; GCN-NEXT: S_ENDPGM 0
189 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
190 $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
191191 S_ENDPGM 0
192192 ...
193193 ---
197197 body: |
198198 bb.0:
199199 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
200 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
200 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
201201 ; XNACK-NEXT: S_NOP 0
202 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
203 ; GCN-NEXT: S_ENDPGM 0
204 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
205 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
202 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
203 ; GCN-NEXT: S_ENDPGM 0
204 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
205 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
206206 S_ENDPGM 0
207207 ...
208208 ---
212212 body: |
213213 bb.0:
214214 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
215 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
216 ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
217 ; GCN-NEXT: S_ENDPGM 0
218 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
219 $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
215 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
216 ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
217 ; GCN-NEXT: S_ENDPGM 0
218 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
219 $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
220220 S_ENDPGM 0
221221 ...
222222 ---
227227 ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
228228 ; GCN: bb.0:
229229 ; GCN: successors: %bb.1(0x80000000)
230 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
230 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
231231 ; GCN: bb.1:
232232 ; XNACK-NEXT: S_NOP 0
233 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
233 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
234234 ; GCN-NEXT: S_ENDPGM 0
235235 bb.0:
236 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
236 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
237237
238238 bb.1:
239 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
239 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
240240 S_ENDPGM 0
241241 ...
242242 ---
247247 body: |
248248 bb.0:
249249 ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
250 ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
251 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
252 ; GCN-NEXT: S_ENDPGM 0
253 S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
254 $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
250 ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
251 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
252 ; GCN-NEXT: S_ENDPGM 0
253 S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
254 $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
255255 S_ENDPGM 0
256256 ...
257257 ---
263263 body: |
264264 bb.0:
265265 ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
266 ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
267 ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
268 ; GCN-NEXT: S_ENDPGM 0
269 S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
270 $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
266 ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
267 ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
268 ; GCN-NEXT: S_ENDPGM 0
269 S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
270 $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
271271 S_ENDPGM 0
272272 ...
273273 ---
277277 body: |
278278 bb.0:
279279 ; GCN-LABEL: name: valu_inst_breaks_smem_clause
280 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
280 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
281281 ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
282 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
283 ; GCN-NEXT: S_ENDPGM 0
284 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
282 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
283 ; GCN-NEXT: S_ENDPGM 0
284 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
285285 $vgpr8 = V_MOV_B32_e32 0, implicit $exec
286 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
286 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
287287 S_ENDPGM 0
288288 ...
289289 ---
293293 body: |
294294 bb.0:
295295 ; GCN-LABEL: name: salu_inst_breaks_smem_clause
296 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
296 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
297297 ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
298 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
299 ; GCN-NEXT: S_ENDPGM 0
300 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
298 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
299 ; GCN-NEXT: S_ENDPGM 0
300 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
301301 $sgpr8 = S_MOV_B32 0
302 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
302 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
303303 S_ENDPGM 0
304304 ...
305305 ---
308308 body: |
309309 bb.0:
310310 ; GCN-LABEL: name: ds_inst_breaks_smem_clause
311 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
311 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
312312 ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
313 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
314 ; GCN-NEXT: S_ENDPGM 0
315 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
313 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
314 ; GCN-NEXT: S_ENDPGM 0
315 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
316316 $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
317 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
317 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
318318 S_ENDPGM 0
319319 ...
320320 ---
324324 body: |
325325 bb.0:
326326 ; GCN-LABEL: name: flat_inst_breaks_smem_clause
327 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
328 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
329 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
330 ; GCN-NEXT: S_ENDPGM 0
331 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
332 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
333 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
327 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
328 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
329 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
330 ; GCN-NEXT: S_ENDPGM 0
331 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
332 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
333 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
334334 S_ENDPGM 0
335335 ...
336336 ---
340340 body: |
341341 bb.0:
342342 ; GCN-LABEL: name: implicit_use_breaks_smem_clause
343 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
343 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
344344 ; XNACK-NEXT: S_NOP 0
345 ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
346 ; GCN-NEXT: S_ENDPGM 0
347 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
348 $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
349 S_ENDPGM 0
350 ...
345 ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
346 ; GCN-NEXT: S_ENDPGM 0
347 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
348 $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
349 S_ENDPGM 0
350 ...
66 body: |
77 bb.0:
88 ; GCN-LABEL: name: trivial_clause_load_flat4_x1
9 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
10 ; GCN-NEXT: S_ENDPGM 0
11
12 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
9 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
10 ; GCN-NEXT: S_ENDPGM 0
11
12 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
1313 S_ENDPGM 0
1414 ...
1515 ---
1919 body: |
2020 bb.0:
2121 ; GCN-LABEL: name: trivial_clause_load_flat4_x2
22 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
23 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
24 ; GCN-NEXT: S_ENDPGM 0
25
26 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
27 $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
22 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
23 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
24 ; GCN-NEXT: S_ENDPGM 0
25
26 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
27 $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
2828 S_ENDPGM 0
2929 ...
3030 ---
3434 body: |
3535 bb.0:
3636 ; GCN-LABEL: name: trivial_clause_load_flat4_x3
37 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
38 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
39 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr
40 ; GCN-NEXT: S_ENDPGM 0
41
42 $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
43 $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
44 $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr
37 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
38 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
39 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
40 ; GCN-NEXT: S_ENDPGM 0
41
42 $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
43 $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
44 $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
4545 S_ENDPGM 0
4646 ...
4747 ---
5151 body: |
5252 bb.0:
5353 ; GCN-LABEL: name: trivial_clause_load_flat4_x4
54 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
55 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
56 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr
57 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr
58 ; GCN-NEXT: S_ENDPGM 0
59
60 $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
61 $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
62 $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr
63 $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr
54 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
55 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
56 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
57 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
58 ; GCN-NEXT: S_ENDPGM 0
59
60 $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
61 $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
62 $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
63 $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
6464 S_ENDPGM 0
6565 ...
6666 ---
7070 body: |
7171 bb.0:
7272 ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
73 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
74 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
75 ; GCN-NEXT: S_ENDPGM 0
76
77 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
78 $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
73 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
74 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
75 ; GCN-NEXT: S_ENDPGM 0
76
77 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
78 $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
7979 S_ENDPGM 0
8080 ...
8181 ---
8585 body: |
8686 bb.0:
8787 ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
88 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
89 ; GCN-NEXT: S_ENDPGM 0
90
91 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
88 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
89 ; GCN-NEXT: S_ENDPGM 0
90
91 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
9292 S_ENDPGM 0
9393 ...
9494 ---
9898 body: |
9999 bb.0:
100100 ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
101 ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
102 ; GCN-NEXT: S_ENDPGM 0
103
104 $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
101 ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
102 ; GCN-NEXT: S_ENDPGM 0
103
104 $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
105105 S_ENDPGM 0
106106 ...
107107 ---
111111 body: |
112112 bb.0:
113113 ; GCN-LABEL: name: flat_load8_overwrite_ptr
114 ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
115 ; GCN-NEXT: S_ENDPGM 0
116
117 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
114 ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
115 ; GCN-NEXT: S_ENDPGM 0
116
117 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
118118 S_ENDPGM 0
119119 ...
120120 ---
127127 body: |
128128 bb.0:
129129 ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
130 ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
131 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
132 ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
133 ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
134 ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
135 ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
136 ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
137 ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
138 ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
139 ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
140 ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
141 ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
142 ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
143 ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
144 ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
145 ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
146 ; XNACK-NEXT: S_NOP 0
147 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
130 ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
131 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
132 ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
133 ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
134 ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
135 ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
136 ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
137 ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
138 ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
139 ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
140 ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
141 ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
142 ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
143 ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
144 ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
145 ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
146 ; XNACK-NEXT: S_NOP 0
147 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
148148 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
149149 ; GCN-NEXT: S_ENDPGM 0
150150
151 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
152 $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
153 $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
154 $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
155
156 $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
157 $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
158 $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
159 $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
160
161 $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
162 $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
163 $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
164 $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
165
166 $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
167 $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
168 $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
169 $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
170
171 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
151 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
152 $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
153 $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
154 $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
155
156