llvm.org GIT mirror llvm / 4923776
AMDGPU: Start adding offset fields to flat instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305194 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 years ago
15 changed file(s) with 206 addition(s) and 96 deletion(s). Raw diff Collapse all Expand all
137137 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
138138 SDValue &ImmOffset, SDValue &VOffset) const;
139139
140 bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const;
140 bool SelectFlat(SDValue Addr, SDValue &VAddr,
141 SDValue &Offset, SDValue &SLC) const;
141142
142143 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
143144 bool &Imm) const;
13141315
13151316 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
13161317 SDValue &VAddr,
1318 SDValue &Offset,
13171319 SDValue &SLC) const {
13181320 VAddr = Addr;
1321 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i16);
13191322 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
13201323 return true;
13211324 }
125125 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
126126 .add(I.getOperand(1))
127127 .add(I.getOperand(0))
128 .addImm(0)
129 .addImm(0);
128 .addImm(0) // offset
129 .addImm(0) // glc
130 .addImm(0); // slc
130131
131132
132133 // Now that we selected an opcode, we need to constrain the register
391392 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
392393 .add(I.getOperand(0))
393394 .addReg(PtrReg)
394 .addImm(0)
395 .addImm(0);
395 .addImm(0) // offset
396 .addImm(0) // glc
397 .addImm(0); // slc
396398
397399 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
398400 I.eraseFromParent();
284284 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
285285 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
286286 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
287
288 bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
289 bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
287290 bool isGDS() const { return isImmTy(ImmTyGDS); }
288291 bool isGLC() const { return isImmTy(ImmTyGLC); }
289292 bool isSLC() const { return isImmTy(ImmTySLC); }
885888 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
886889 }
887890
891 bool hasFlatOffsets() const {
892 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
893 }
894
888895 bool hasSGPR102_SGPR103() const {
889896 return !isVI();
890897 }
10331040 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
10341041 AMDGPUOperand::Ptr defaultSMRDOffset20() const;
10351042 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1043 AMDGPUOperand::Ptr defaultOffsetU12() const;
1044 AMDGPUOperand::Ptr defaultOffsetS13() const;
10361045
10371046 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
10381047
19671976 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
19681977 return Match_InvalidOperand;
19691978 }
1979 }
1980
1981 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
1982 // FIXME: Produces error without correct column reported.
1983 auto OpNum =
1984 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
1985 const auto &Op = Inst.getOperand(OpNum);
1986 if (Op.getImm() != 0)
1987 return Match_InvalidOperand;
19701988 }
19711989
19721990 return Match_Success;
38483866 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
38493867 }
38503868
3869 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
3870 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3871 }
3872
3873 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
3874 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3875 }
3876
38513877 //===----------------------------------------------------------------------===//
38523878 // vop3
38533879 //===----------------------------------------------------------------------===//
66 //
77 //===----------------------------------------------------------------------===//
88
9 def FLATAtomic : ComplexPattern2, "SelectFlat">;
9 def FLATAtomic : ComplexPattern3, "SelectFlat">;
1010
1111 //===----------------------------------------------------------------------===//
1212 // FLAT classes
5454 // copy relevant pseudo op flags
5555 let SubtargetPredicate = ps.SubtargetPredicate;
5656 let AsmMatchConverter = ps.AsmMatchConverter;
57 let TSFlags = ps.TSFlags;
58 let UseNamedOperandTable = ps.UseNamedOperandTable;
5759
5860 // encoding fields
5961 bits<8> vaddr;
6264 bits<1> slc;
6365 bits<1> glc;
6466
67 // Only valid on gfx9
68 bits<1> lds = 0; // XXX - What does this actually do?
69 bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
70
71 // Signed offset. Highest bit ignored for flat and treated as 12-bit
72 // unsigned for flat acceses.
73 bits<13> offset;
74 bits<1> nv = 0; // XXX - What does this actually do?
75
6576 // We don't use tfe right now, and it was removed in gfx9.
6677 bits<1> tfe = 0;
6778
68 // 15-0 is reserved.
79 // Only valid on GFX9+
80 let Inst{12-0} = offset;
81 let Inst{13} = lds;
82 let Inst{15-14} = 0;
83
6984 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
7085 let Inst{17} = slc;
7186 let Inst{24-18} = op;
7388 let Inst{39-32} = vaddr;
7489 let Inst{47-40} = !if(ps.has_data, vdata, ?);
7590 // 54-48 is reserved.
76 let Inst{55} = tfe;
91 let Inst{55} = nv; // nv on GFX9+, TFE before.
7792 let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
7893 }
7994
80 class FLAT_Load_Pseudo > : FLAT_Pseudo<
95 class FLAT_Load_Pseudo ,
96 bit HasSignedOffset = 0> : FLAT_Pseudo<
8197 opName,
8298 (outs regClass:$vdst),
83 (ins VReg_64:$vaddr, GLC:$glc, slc:$slc),
84 " $vdst, $vaddr$glc$slc"> {
99 !if(HasSignedOffset,
100 (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc),
101 (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)),
102 " $vdst, $vaddr$offset$glc$slc"> {
85103 let has_data = 0;
86104 let mayLoad = 1;
87105 }
88106
89 class FLAT_Store_Pseudo > : FLAT_Pseudo<
107 class FLAT_Store_Pseudo ,
108 bit HasSignedOffset = 0> : FLAT_Pseudo<
90109 opName,
91110 (outs),
92 (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc),
93 " $vaddr, $vdata$glc$slc"> {
111 !if(HasSignedOffset,
112 (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc),
113 (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)),
114 " $vaddr, $vdata$offset$glc$slc"> {
94115 let mayLoad = 0;
95116 let mayStore = 1;
96117 let has_vdst = 0;
102123 ValueType vt,
103124 SDPatternOperator atomic = null_frag,
104125 ValueType data_vt = vt,
105 RegisterClass data_rc = vdst_rc> {
126 RegisterClass data_rc = vdst_rc,
127 bit HasSignedOffset = 0> {
106128
107129 def "" : FLAT_Pseudo
108130 (outs),
109 (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc),
110 " $vaddr, $vdata$slc",
131 !if(HasSignedOffset,
132 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
133 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
134 " $vaddr, $vdata$offset$slc",
111135 []>,
112136 AtomicNoRet {
113137 let mayLoad = 1;
120144
121145 def _RTN : FLAT_Pseudo
122146 (outs vdst_rc:$vdst),
123 (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc),
124 " $vdst, $vaddr, $vdata glc$slc",
147 !if(HasSignedOffset,
148 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
149 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
150 " $vdst, $vaddr, $vdata$offset glc$slc",
125151 [(set vt:$vdst,
126 (atomic (FLATAtomic i64:$vaddr, i1:$slc), data_vt:$vdata))]>,
152 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
127153 AtomicNoRet {
128154 let mayLoad = 1;
129155 let mayStore = 1;
312338 // Patterns for global loads with no offset.
313339 class FlatLoadPat : Pat <
314340 (vt (node i64:$addr)),
315 (inst $addr, 0, 0)
341 (inst $addr, 0, 0, 0)
316342 >;
317343
318344 class FlatLoadAtomicPat : Pat <
319345 (vt (node i64:$addr)),
320 (inst $addr, 1, 0)
346 (inst $addr, 0, 1, 0)
321347 >;
322348
323349 class FlatStorePat : Pat <
324350 (node vt:$data, i64:$addr),
325 (inst $addr, $data, 0, 0)
351 (inst $addr, $data, 0, 0, 0)
326352 >;
327353
328354 class FlatStoreAtomicPat : Pat <
329355 // atomic store follows atomic binop convention so the address comes
330356 // first.
331357 (node i64:$addr, vt:$data),
332 (inst $addr, $data, 1, 0)
358 (inst $addr, $data, 0, 1, 0)
333359 >;
334360
335361 class FlatAtomicPat
336362 ValueType data_vt = vt> : Pat <
337363 (vt (node i64:$addr, data_vt:$data)),
338 (inst $addr, $data, 0)
364 (inst $addr, $data, 0, 0)
339365 >;
340366
341367 let Predicates = [isCIVI] in {
491491 let ParserMatchClass = MatchClass;
492492 }
493493
494 class NamedOperandU12 : Operand {
495 let PrintMethod = "print"#Name;
496 let ParserMatchClass = MatchClass;
497 }
498
494499 class NamedOperandU16 : Operand {
500 let PrintMethod = "print"#Name;
501 let ParserMatchClass = MatchClass;
502 }
503
504 class NamedOperandS13 : Operand {
495505 let PrintMethod = "print"#Name;
496506 let ParserMatchClass = MatchClass;
497507 }
513523 def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
514524 def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
515525
526 def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>;
527 def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>;
516528 def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
517529 def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
518530 def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
1313
1414 # GCN: global_addrspace
1515 # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
16 # GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0
16 # GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0
1717
1818 body: |
1919 bb.0:
1414 # GCN: global_addrspace
1515 # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
1616 # GCN: [[VAL:%[0-9]+]] = COPY %vgpr2
17 # GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0
17 # GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0
1818
1919 body: |
2020 bb.0:
218218 %34 = V_MOV_B32_e32 63, implicit %exec
219219
220220 %27 = V_AND_B32_e64 %26, %24, implicit %exec
221 FLAT_STORE_DWORD %37, %27, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
221 FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
222222
223223 %28 = V_AND_B32_e64 %24, %26, implicit %exec
224 FLAT_STORE_DWORD %37, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
224 FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
225225
226226 %29 = V_AND_B32_e32 %26, %24, implicit %exec
227 FLAT_STORE_DWORD %37, %29, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
227 FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
228228
229229 %30 = V_AND_B32_e64 %26, %26, implicit %exec
230 FLAT_STORE_DWORD %37, %30, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
230 FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
231231
232232 %31 = V_AND_B32_e64 %34, %34, implicit %exec
233 FLAT_STORE_DWORD %37, %31, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
233 FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
234234
235235 S_ENDPGM
236236
406406 %27 = S_MOV_B32 -4
407407
408408 %11 = V_LSHLREV_B32_e64 12, %10, implicit %exec
409 FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
409 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
410410
411411 %12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec
412 FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
412 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
413413
414414 %13 = V_LSHL_B32_e64 %7, 12, implicit %exec
415 FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
415 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
416416
417417 %14 = V_LSHL_B32_e64 12, %7, implicit %exec
418 FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
418 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
419419
420420 %15 = V_LSHL_B32_e64 12, %24, implicit %exec
421 FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
421 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
422422
423423 %22 = V_LSHL_B32_e64 %6, 12, implicit %exec
424 FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
424 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
425425
426426 %23 = V_LSHL_B32_e64 %6, 32, implicit %exec
427 FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
427 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
428428
429429 %25 = V_LSHL_B32_e32 %6, %6, implicit %exec
430 FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
430 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
431431
432432 %26 = V_LSHLREV_B32_e32 11, %24, implicit %exec
433 FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
433 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
434434
435435 %28 = V_LSHL_B32_e32 %27, %6, implicit %exec
436 FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
436 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
437437
438438 S_ENDPGM
439439
614614 %35 = V_MOV_B32_e32 2, implicit %exec
615615
616616 %11 = V_ASHRREV_I32_e64 8, %10, implicit %exec
617 FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
617 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
618618
619619 %12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec
620 FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
620 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
621621
622622 %13 = V_ASHR_I32_e64 %7, 3, implicit %exec
623 FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
623 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
624624
625625 %14 = V_ASHR_I32_e64 7, %32, implicit %exec
626 FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
626 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
627627
628628 %15 = V_ASHR_I32_e64 %27, %24, implicit %exec
629 FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
629 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
630630
631631 %22 = V_ASHR_I32_e64 %6, 4, implicit %exec
632 FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
632 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
633633
634634 %23 = V_ASHR_I32_e64 %6, %33, implicit %exec
635 FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
635 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
636636
637637 %25 = V_ASHR_I32_e32 %34, %34, implicit %exec
638 FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
638 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
639639
640640 %26 = V_ASHRREV_I32_e32 11, %10, implicit %exec
641 FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
641 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
642642
643643 %28 = V_ASHR_I32_e32 %27, %35, implicit %exec
644 FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
644 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
645645
646646 S_ENDPGM
647647
823823 %35 = V_MOV_B32_e32 2, implicit %exec
824824
825825 %11 = V_LSHRREV_B32_e64 8, %10, implicit %exec
826 FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
826 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
827827
828828 %12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec
829 FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
829 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
830830
831831 %13 = V_LSHR_B32_e64 %7, 3, implicit %exec
832 FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
832 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
833833
834834 %14 = V_LSHR_B32_e64 7, %32, implicit %exec
835 FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
835 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
836836
837837 %15 = V_LSHR_B32_e64 %27, %24, implicit %exec
838 FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
838 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
839839
840840 %22 = V_LSHR_B32_e64 %6, 4, implicit %exec
841 FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
841 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
842842
843843 %23 = V_LSHR_B32_e64 %6, %33, implicit %exec
844 FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
844 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
845845
846846 %25 = V_LSHR_B32_e32 %34, %34, implicit %exec
847 FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
847 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
848848
849849 %26 = V_LSHRREV_B32_e32 11, %10, implicit %exec
850 FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
850 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
851851
852852 %28 = V_LSHR_B32_e32 %27, %35, implicit %exec
853 FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
853 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
854854
855855 S_ENDPGM
856856
245245 S_BRANCH %bb.1
246246
247247 bb.1:
248 FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, implicit %exec, implicit %flat_scr
249 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
250 FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, implicit %exec, implicit %flat_scr
251 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
252 FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
253 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
254 FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr
255 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
256 FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr
248 FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
249 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
250 FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
251 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
252 FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
253 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
254 FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
255 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
256 FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
257257 %vgpr3 = V_MOV_B32_e32 0, implicit %exec
258258 S_ENDPGM
259259
5656 %4.sub1 = COPY %3.sub0
5757 undef %5.sub0 = COPY %4.sub1
5858 %5.sub1 = COPY %4.sub0
59 FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, implicit %exec, implicit %flat_scr
59 FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, 0, implicit %exec, implicit %flat_scr
6060
6161 %6 = IMPLICIT_DEF
6262 undef %7.sub0_sub1 = COPY %6
6363 %7.sub2 = COPY %3.sub0
64 FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, implicit %exec, implicit %flat_scr
64 FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, 0, implicit %exec, implicit %flat_scr
6565
6666 %8 = IMPLICIT_DEF
6767 undef %9.sub0_sub1_sub2 = COPY %8
6868 %9.sub3 = COPY %3.sub0
69 FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, implicit %exec, implicit %flat_scr
69 FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, 0, implicit %exec, implicit %flat_scr
7070 ...
5757
5858 bb.3:
5959 %1 = COPY killed %17
60 FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr
60 FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit %exec, implicit %flat_scr
6161 %14 = COPY %1.sub1
6262 %16 = COPY killed %1.sub0
6363 undef %15.sub0 = COPY killed %16
6464 %15.sub1 = COPY killed %14
65 FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr
65 FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit %exec, implicit %flat_scr
6666 S_ENDPGM
6767
6868 ...
213213 %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
214214 %16 = REG_SEQUENCE %14, 1, %15, 2
215215 %18 = COPY %16
216 %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
216 %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
217217 %60 = V_BFE_U32 %17, 8, 8, implicit %exec
218218 %61 = V_LSHLREV_B32_e32 2, killed %60, implicit %exec
219219 %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
220220 %66 = COPY %13
221221 %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
222222 %67 = REG_SEQUENCE %70, 1, killed %65, 2
223 FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
223 FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
224224 %37 = S_ADD_U32 %14, 4, implicit-def %scc
225225 %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
226226 %71 = COPY killed %37
227227 %72 = COPY killed %38
228228 %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
229 %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
229 %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
230230 %73 = V_BFE_U32 %40, 8, 8, implicit %exec
231231 %74 = V_LSHLREV_B32_e32 2, killed %73, implicit %exec
232232 %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
233233 %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
234234 %80 = REG_SEQUENCE %83, 1, killed %78, 2
235 FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
235 FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
236236 %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
237237 %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
238238 %57 = REG_SEQUENCE %55, 1, killed %56, 2
376376 %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
377377 %16 = REG_SEQUENCE %14, 1, %15, 2
378378 %18 = COPY %16
379 %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
379 %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
380380 %60 = V_BFE_U32 %17, 8, 8, implicit %exec
381381 %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit %exec
382382 %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
383383 %66 = COPY %13
384384 %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
385385 %67 = REG_SEQUENCE %70, 1, killed %65, 2
386 FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
386 FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
387387 %37 = S_ADD_U32 %14, 4, implicit-def %scc
388388 %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
389389 %71 = COPY killed %37
390390 %72 = COPY killed %38
391391 %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
392 %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
392 %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
393393 %73 = V_BFE_U32 %40, 8, 8, implicit %exec
394394 %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit %exec
395395 %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
396396 %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
397397 %80 = REG_SEQUENCE %83, 1, killed %78, 2
398 FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
398 FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
399399 %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
400400 %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
401401 %57 = REG_SEQUENCE %55, 1, killed %56, 2
5050 body: |
5151 bb.0:
5252 successors: %bb.1
53 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4)
54 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
53 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4)
54 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
5555 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
5656 S_BRANCH %bb.1
5757
5858 bb.1:
5959 successors: %bb.2
60 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
61 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
60 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
61 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
6262 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
6363 S_BRANCH %bb.2
6464
6565 bb.2:
66 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4)
67 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16)
66 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4)
67 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16)
6868 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
6969 S_ENDPGM
7070 ...
8585 body: |
8686 bb.0:
8787 successors: %bb.1
88 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
88 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
8989
9090 bb.1:
9191 %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
92 FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr
92 FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
9393 S_ENDPGM
9494 ...
9595 ---
113113 body: |
114114 bb.0:
115115 successors: %bb.2
116 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
116 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
117117 S_BRANCH %bb.2
118118
119119 bb.1:
120 FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, implicit %exec, implicit %flat_scr
120 FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr
121121 S_ENDPGM
122122
123123 bb.2:
124124 %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
125 FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr
125 FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
126126 S_ENDPGM
127127 ...
0 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
1 // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
2
3 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s
4 // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s
5
6
7 flat_load_dword v1, v[3:4] offset:0
8 // GCN: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01]
9
10 flat_load_dword v1, v[3:4] offset:-1
11 // GCN-ERR: :35: error: failed parsing operand.
12
13 // FIXME: Error on VI in wrong column
14 flat_load_dword v1, v[3:4] offset:4095
15 // GFX9: flat_load_dword v1, v[3:4] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x03,0x00,0x00,0x01]
16 // VIERR: :1: error: invalid operand for instruction
17
18 flat_load_dword v1, v[3:4] offset:4096
19 // GCNERR: :28: error: invalid operand for instruction
20
21 flat_load_dword v1, v[3:4] offset:4 glc
22 // GFX9: flat_load_dword v1, v[3:4] offset:4 glc ; encoding: [0x04,0x00,0x51,0xdc,0x03,0x00,0x00,0x01]
23 // VIERR: :1: error: invalid operand for instruction
24
25 flat_load_dword v1, v[3:4] offset:4 glc slc
26 // GFX9: flat_load_dword v1, v[3:4] offset:4 glc slc ; encoding: [0x04,0x00,0x53,0xdc,0x03,0x00,0x00,0x01]
27 // VIERR: :1: error: invalid operand for instruction
28
29 flat_atomic_add v[3:4], v5 offset:8 slc
30 // GFX9: flat_atomic_add v[3:4], v5 offset:8 slc ; encoding: [0x08,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00]
31 // VIERR: :1: error: invalid operand for instruction
32
33 flat_atomic_swap v[3:4], v5 offset:16
34 // GFX9: flat_atomic_swap v[3:4], v5 offset:16 ; encoding: [0x10,0x00,0x00,0xdd,0x03,0x05,0x00,0x00]
35 // VIERR: :1: error: invalid operand for instruction
36
37 flat_store_dword v[3:4], v1 offset:16
38 // GFX9: flat_store_dword v[3:4], v1 offset:16 ; encoding: [0x10,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
39 // VIERR: :1: error: invalid operand for instruction
4848
4949 // FIXME: For atomic instructions, glc must be placed immediately following
5050 // the data regiser. These forms aren't currently supported:
51 // FIXME: offset:0 required
5152 // flat_atomic_add v1, v[3:4], v5 slc glc
5253
53 flat_atomic_add v1 v[3:4], v5 glc slc
54 flat_atomic_add v1, v[3:4], v5 offset:0 glc slc
5455 // NOSI: error:
5556 // CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01]
5657 // VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01]