llvm.org GIT mirror llvm / b9c9b02
[AMDGPU] hazard recognizer for fp atomic to s_denorm_mode This requires 3 wait states unless there is a wait or VALU in between. Differential Revision: https://reviews.llvm.org/D63619 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364074 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 4 months ago
10 changed file(s) with 559 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
690690 multiclass MUBUF_Pseudo_Atomics_NO_RTN
691691 RegisterClass vdataClass,
692692 ValueType vdataType,
693 SDPatternOperator atomic> {
693 SDPatternOperator atomic,
694 bit isFP = getIsFP.ret> {
695 let FPAtomic = isFP in
694696 def _OFFSET : MUBUF_AtomicNoRet_Pseudo ,
695697 MUBUFAddr64Table <0, NAME>;
698
699 let FPAtomic = isFP in
696700 def _ADDR64 : MUBUF_AtomicNoRet_Pseudo ,
697701 MUBUFAddr64Table <1, NAME>;
702
703 let FPAtomic = isFP in
698704 def _OFFEN : MUBUF_AtomicNoRet_Pseudo ;
705
706 let FPAtomic = isFP in
707
699708 def _IDXEN : MUBUF_AtomicNoRet_Pseudo ;
709
710 let FPAtomic = isFP in
700711 def _BOTHEN : MUBUF_AtomicNoRet_Pseudo ;
701712 }
702713
703714 multiclass MUBUF_Pseudo_Atomics_RTN
704715 RegisterClass vdataClass,
705716 ValueType vdataType,
706 SDPatternOperator atomic> {
717 SDPatternOperator atomic,
718 bit isFP = getIsFP.ret> {
719 let FPAtomic = isFP in
707720 def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo
708721 [(set vdataType:$vdata,
709722 (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
710723 vdataType:$vdata_in))]>,
711724 MUBUFAddr64Table <0, NAME # "_RTN">;
712725
726 let FPAtomic = isFP in
713727 def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo
714728 [(set vdataType:$vdata,
715729 (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
716730 vdataType:$vdata_in))]>,
717731 MUBUFAddr64Table <1, NAME # "_RTN">;
718732
733 let FPAtomic = isFP in
719734 def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo ;
735
736 let FPAtomic = isFP in
720737 def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo ;
738
739 let FPAtomic = isFP in
721740 def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo ;
722741 }
723742
272272 ValueType vt,
273273 SDPatternOperator atomic = null_frag,
274274 ValueType data_vt = vt,
275 RegisterClass data_rc = vdst_rc> {
275 RegisterClass data_rc = vdst_rc,
276 bit isFP = getIsFP.ret> {
276277 def "" : FLAT_AtomicNoRet_Pseudo
277278 (outs),
278279 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
280281 GlobalSaddrTable<0, opName>,
281282 AtomicNoRet {
282283 let PseudoInstr = NAME;
284 let FPAtomic = isFP;
283285 }
284286
285287 def _RTN : FLAT_AtomicRet_Pseudo
289291 [(set vt:$vdst,
290292 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
291293 GlobalSaddrTable<0, opName#"_rtn">,
292 AtomicNoRet ;
294 AtomicNoRet {
295 let FPAtomic = isFP;
296 }
293297 }
294298
295299 multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
298302 ValueType vt,
299303 SDPatternOperator atomic = null_frag,
300304 ValueType data_vt = vt,
301 RegisterClass data_rc = vdst_rc> {
305 RegisterClass data_rc = vdst_rc,
306 bit isFP = getIsFP.ret> {
302307
303308 def "" : FLAT_AtomicNoRet_Pseudo
304309 (outs),
308313 AtomicNoRet {
309314 let has_saddr = 1;
310315 let PseudoInstr = NAME;
316 let FPAtomic = isFP;
311317 }
312318
313319 def _SADDR : FLAT_AtomicNoRet_Pseudo
319325 let has_saddr = 1;
320326 let enabled_saddr = 1;
321327 let PseudoInstr = NAME#"_SADDR";
328 let FPAtomic = isFP;
322329 }
323330 }
324331
328335 ValueType vt,
329336 SDPatternOperator atomic = null_frag,
330337 ValueType data_vt = vt,
331 RegisterClass data_rc = vdst_rc> {
338 RegisterClass data_rc = vdst_rc,
339 bit isFP = getIsFP.ret> {
332340
333341 def _RTN : FLAT_AtomicRet_Pseudo
334342 (outs vdst_rc:$vdst),
339347 GlobalSaddrTable<0, opName#"_rtn">,
340348 AtomicNoRet {
341349 let has_saddr = 1;
350 let FPAtomic = isFP;
342351 }
343352
344353 def _SADDR_RTN : FLAT_AtomicRet_Pseudo
350359 let has_saddr = 1;
351360 let enabled_saddr = 1;
352361 let PseudoInstr = NAME#"_SADDR_RTN";
362 let FPAtomic = isFP;
353363 }
354364 }
355365
144144 if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
145145 return NoopHazard;
146146
147 if (checkFPAtomicToDenormModeHazard(MI) > 0)
148 return NoopHazard;
149
147150 if (ST.hasNoDataDepHazard())
148151 return NoHazard;
149152
245248
246249 if (ST.hasNSAtoVMEMBug())
247250 WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
251
252 WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
248253
249254 if (ST.hasNoDataDepHazard())
250255 return WaitStates;
11371142
11381143 return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
11391144 }
1145
1146 int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1147 int FPAtomicToDenormModeWaitStates = 3;
1148
1149 if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1150 return 0;
1151
1152 auto IsHazardFn = [] (MachineInstr *I) {
1153 if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
1154 return false;
1155 return SIInstrInfo::isFPAtomic(*I);
1156 };
1157
1158 auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
1159 if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
1160 return true;
1161
1162 switch (MI->getOpcode()) {
1163 case AMDGPU::S_WAITCNT:
1164 case AMDGPU::S_WAITCNT_VSCNT:
1165 case AMDGPU::S_WAITCNT_VMCNT:
1166 case AMDGPU::S_WAITCNT_EXPCNT:
1167 case AMDGPU::S_WAITCNT_LGKMCNT:
1168 case AMDGPU::S_WAITCNT_IDLE:
1169 return true;
1170 default:
1171 break;
1172 }
1173
1174 return false;
1175 };
1176
1177
1178 return FPAtomicToDenormModeWaitStates -
1179 ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1180 }
8383 int checkAnyInstHazards(MachineInstr *MI);
8484 int checkReadM0Hazards(MachineInstr *SMovRel);
8585 int checkNSAtoVMEMHazard(MachineInstr *MI);
86 int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
8687 void fixHazards(MachineInstr *MI);
8788 bool fixVcmpxPermlaneHazards(MachineInstr *MI);
8889 bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
715715 defm IMAGE_ATOMIC_XOR : MIMG_Atomic , "image_atomic_xor">;
716716 defm IMAGE_ATOMIC_INC : MIMG_Atomic , "image_atomic_inc">;
717717 defm IMAGE_ATOMIC_DEC : MIMG_Atomic , "image_atomic_dec">;
718 //let FPAtomic = 1 in {
718719 //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d, 1>; -- not on VI
719720 //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
720721 //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
722 //} // End let FPAtomic = 1
721723 defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
722724 defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
723725 defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>;
9292 IsNonFlatSeg = UINT64_C(1) << 51,
9393
9494 // Uses floating point double precision rounding mode
95 FPDPRounding = UINT64_C(1) << 52
95 FPDPRounding = UINT64_C(1) << 52,
96
97 // Instruction is FP atomic.
98 FPAtomic = UINT64_C(1) << 53
9699 };
97100
98101 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
117117 // rounding mode flags
118118 field bit FPDPRounding = 0;
119119
120 // Instruction is FP atomic.
121 field bit FPAtomic = 0;
122
120123 // These need to be kept in sync with the enum in SIInstrFlags.
121124 let TSFlags{0} = SALU;
122125 let TSFlags{1} = VALU;
177180 let TSFlags{51} = IsNonFlatSeg;
178181
179182 let TSFlags{52} = FPDPRounding;
183
184 let TSFlags{53} = FPAtomic;
180185
181186 let SchedRW = [Write32Bit];
182187
630630 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
631631 }
632632
633 static bool isFPAtomic(const MachineInstr &MI) {
634 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
635 }
636
637 bool isFPAtomic(uint16_t Opcode) const {
638 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
639 }
640
633641 bool isVGPRCopy(const MachineInstr &MI) const {
634642 assert(MI.isCopy());
635643 unsigned Dest = MI.getOperand(0).getReg();
12421242 VOPDstS64orS32)))); // else VT == i1
12431243 }
12441244
1245 // Returns true if VT is floating point.
1246 class getIsFP {
1247 bit ret = !if(!eq(VT.Value, f16.Value), 1,
1248 !if(!eq(VT.Value, v2f16.Value), 1,
1249 !if(!eq(VT.Value, f32.Value), 1,
1250 !if(!eq(VT.Value, v2f32.Value), 1,
1251 !if(!eq(VT.Value, f64.Value), 1,
1252 !if(!eq(VT.Value, v2f64.Value), 1,
1253 0))))));
1254 }
1255
12451256 // Returns the register class to use for the destination of VOP[12C]
12461257 // instructions with SDWA extension
12471258 class getSDWADstForVT {
12531264 // Returns the register class to use for source 0 of VOP[12C]
12541265 // instructions for the given VT.
12551266 class getVOPSrc0ForVT {
1256 bit isFP = !if(!eq(VT.Value, f16.Value), 1,
1257 !if(!eq(VT.Value, v2f16.Value), 1,
1258 !if(!eq(VT.Value, f32.Value), 1,
1259 !if(!eq(VT.Value, f64.Value), 1,
1260 0))));
1267 bit isFP = getIsFP.ret;
12611268
12621269 RegisterOperand ret =
12631270 !if(isFP,
12911298 }
12921299
12931300 class getSDWASrcForVT {
1294 bit isFP = !if(!eq(VT.Value, f16.Value), 1,
1295 !if(!eq(VT.Value, f32.Value), 1,
1296 0));
1301 bit isFP = getIsFP.ret;
12971302 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
12981303 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
12991304 RegisterOperand ret = !if(isFP, retFlt, retInt);
13021307 // Returns the register class to use for sources of VOP3 instructions for the
13031308 // given VT.
13041309 class getVOP3SrcForVT {
1305 bit isFP = !if(!eq(VT.Value, f16.Value), 1,
1306 !if(!eq(VT.Value, v2f16.Value), 1,
1307 !if(!eq(VT.Value, f32.Value), 1,
1308 !if(!eq(VT.Value, f64.Value), 1,
1309 0))));
1310 bit isFP = getIsFP.ret;
13101311 RegisterOperand ret =
13111312 !if(!eq(VT.Size, 128),
13121313 VSrc_128,
13501351
13511352 // Return type of input modifiers operand for specified input operand
13521353 class getSrcMod {
1353 bit isFP = !if(!eq(VT.Value, f16.Value), 1,
1354 !if(!eq(VT.Value, f32.Value), 1,
1355 !if(!eq(VT.Value, f64.Value), 1,
1356 0)));
1354 bit isFP = getIsFP.ret;
13571355 bit isPacked = isPackedType.ret;
13581356 Operand ret = !if(!eq(VT.Size, 64),
13591357 !if(isFP, FP64InputMods, Int64InputMods),
13721370
13731371 // Return type of input modifiers operand specified input operand for DPP
13741372 class getSrcModExt {
1375 bit isFP = !if(!eq(VT.Value, f16.Value), 1,
1376 !if(!eq(VT.Value, f32.Value), 1,
1377 !if(!eq(VT.Value, f64.Value), 1,
1378 0)));
1373 bit isFP = getIsFP.ret;
13791374 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
13801375 }
13811376
0 # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
1
2 # GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode
3 # GCN: FLAT_ATOMIC_FCMPSWAP
4 # GCN-NEXT: S_NOP 0
5 # GCN-NEXT: S_NOP 0
6 # GCN-NEXT: S_NOP 0
7 # GCN-NEXT: S_DENORM_MODE
8 ---
9 name: flat_atomic_fcmpswap_to_s_denorm_mode
10 body: |
11 bb.0:
12 FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
13 S_DENORM_MODE 0
14 ...
15
16 # GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
17 # GCN: FLAT_ATOMIC_FCMPSWAP_X2
18 # GCN-NEXT: S_NOP 0
19 # GCN-NEXT: S_NOP 0
20 # GCN-NEXT: S_NOP 0
21 # GCN-NEXT: S_DENORM_MODE
22 ---
23 name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
24 body: |
25 bb.0:
26 FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
27 S_DENORM_MODE 0
28 ...
29
30 # GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode
31 # GCN: FLAT_ATOMIC_FMAX
32 # GCN-NEXT: S_NOP 0
33 # GCN-NEXT: S_NOP 0
34 # GCN-NEXT: S_NOP 0
35 # GCN-NEXT: S_DENORM_MODE
36 ---
37 name: flat_atomic_fmax_to_s_denorm_mode
38 body: |
39 bb.0:
40 FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
41 S_DENORM_MODE 0
42 ...
43
44 # GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
45 # GCN: FLAT_ATOMIC_FMAX_X2
46 # GCN-NEXT: S_NOP 0
47 # GCN-NEXT: S_NOP 0
48 # GCN-NEXT: S_NOP 0
49 # GCN-NEXT: S_DENORM_MODE
50 ---
51 name: flat_atomic_fmax_x2_to_s_denorm_mode
52 body: |
53 bb.0:
54 FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
55 S_DENORM_MODE 0
56 ...
57
58 # GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode
59 # GCN: FLAT_ATOMIC_FMIN
60 # GCN-NEXT: S_NOP 0
61 # GCN-NEXT: S_NOP 0
62 # GCN-NEXT: S_NOP 0
63 # GCN-NEXT: S_DENORM_MODE
64 ---
65 name: flat_atomic_fmin_to_s_denorm_mode
66 body: |
67 bb.0:
68 FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
69 S_DENORM_MODE 0
70 ...
71
72 # GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
73 # GCN: FLAT_ATOMIC_FMIN_X2
74 # GCN-NEXT: S_NOP 0
75 # GCN-NEXT: S_NOP 0
76 # GCN-NEXT: S_NOP 0
77 # GCN-NEXT: S_DENORM_MODE
78 ---
79 name: flat_atomic_fmin_x2_to_s_denorm_mode
80 body: |
81 bb.0:
82 FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
83 S_DENORM_MODE 0
84 ...
85
86 # GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
87 # GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN
88 # GCN-NEXT: S_NOP 0
89 # GCN-NEXT: S_NOP 0
90 # GCN-NEXT: S_NOP 0
91 # GCN-NEXT: S_DENORM_MODE
92 ---
93 name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
94 body: |
95 bb.0:
96 %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
97 S_DENORM_MODE 0
98 ...
99
100 # GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode
101 # GCN: FLAT_ATOMIC_FMAX_RTN
102 # GCN-NEXT: S_NOP 0
103 # GCN-NEXT: S_NOP 0
104 # GCN-NEXT: S_NOP 0
105 # GCN-NEXT: S_DENORM_MODE
106 ---
107 name: flat_atomic_fmax_rtn_to_s_denorm_mode
108 body: |
109 bb.0:
110 %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
111 S_DENORM_MODE 0
112 ...
113
114 # GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
115 # GCN: FLAT_ATOMIC_FMAX_X2_RTN
116 # GCN-NEXT: S_NOP 0
117 # GCN-NEXT: S_NOP 0
118 # GCN-NEXT: S_NOP 0
119 # GCN-NEXT: S_DENORM_MODE
120 ---
121 name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
122 body: |
123 bb.0:
124 %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
125 S_DENORM_MODE 0
126 ...
127
128 # GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode
129 # GCN: FLAT_ATOMIC_FMIN_RTN
130 # GCN-NEXT: S_NOP 0
131 # GCN-NEXT: S_NOP 0
132 # GCN-NEXT: S_NOP 0
133 # GCN-NEXT: S_DENORM_MODE
134 ---
135 name: flat_atomic_fmin_rtn_to_s_denorm_mode
136 body: |
137 bb.0:
138 %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
139 S_DENORM_MODE 0
140 ...
141
142 # GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
143 # GCN: FLAT_ATOMIC_FMIN_X2_RTN
144 # GCN-NEXT: S_NOP 0
145 # GCN-NEXT: S_NOP 0
146 # GCN-NEXT: S_NOP 0
147 # GCN-NEXT: S_DENORM_MODE
148 ---
149 name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
150 body: |
151 bb.0:
152 %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
153 S_DENORM_MODE 0
154 ...
155
156 # GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
157 # GCN: FLAT_ATOMIC_FCMPSWAP_RTN
158 # GCN-NEXT: S_NOP 0
159 # GCN-NEXT: S_NOP 0
160 # GCN-NEXT: S_NOP 0
161 # GCN-NEXT: S_DENORM_MODE
162 ---
163 name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
164 body: |
165 bb.0:
166 %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
167 S_DENORM_MODE 0
168 ...
169
170 # GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode
171 # GCN: GLOBAL_ATOMIC_FCMPSWAP
172 # GCN-NEXT: S_NOP 0
173 # GCN-NEXT: S_NOP 0
174 # GCN-NEXT: S_NOP 0
175 # GCN-NEXT: S_DENORM_MODE
176 ---
177 name: global_atomic_fcmpswap_to_s_denorm_mode
178 body: |
179 bb.0:
180 GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
181 S_DENORM_MODE 0
182 ...
183
184 # GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode
185 # GCN: GLOBAL_ATOMIC_FCMPSWAP_X2
186 # GCN-NEXT: S_NOP 0
187 # GCN-NEXT: S_NOP 0
188 # GCN-NEXT: S_NOP 0
189 # GCN-NEXT: S_DENORM_MODE
190 ---
191 name: global_atomic_fcmpswap_x2_to_s_denorm_mode
192 body: |
193 bb.0:
194 GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
195 S_DENORM_MODE 0
196 ...
197
198 # GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode
199 # GCN: GLOBAL_ATOMIC_FMAX
200 # GCN-NEXT: S_NOP 0
201 # GCN-NEXT: S_NOP 0
202 # GCN-NEXT: S_NOP 0
203 # GCN-NEXT: S_DENORM_MODE
204 ---
205 name: global_atomic_fmax_to_s_denorm_mode
206 body: |
207 bb.0:
208 GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
209 S_DENORM_MODE 0
210 ...
211
212 # GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
213 # GCN: GLOBAL_ATOMIC_FMAX_X2
214 # GCN-NEXT: S_NOP 0
215 # GCN-NEXT: S_NOP 0
216 # GCN-NEXT: S_NOP 0
217 # GCN-NEXT: S_DENORM_MODE
218 ---
219 name: global_atomic_fmax_x2_to_s_denorm_mode
220 body: |
221 bb.0:
222 GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
223 S_DENORM_MODE 0
224 ...
225
226 # GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode
227 # GCN: GLOBAL_ATOMIC_FMIN
228 # GCN-NEXT: S_NOP 0
229 # GCN-NEXT: S_NOP 0
230 # GCN-NEXT: S_NOP 0
231 # GCN-NEXT: S_DENORM_MODE
232 ---
233 name: global_atomic_fmin_to_s_denorm_mode
234 body: |
235 bb.0:
236 GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
237 S_DENORM_MODE 0
238 ...
239
240 # GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
241 # GCN: GLOBAL_ATOMIC_FMIN_X2
242 # GCN-NEXT: S_NOP 0
243 # GCN-NEXT: S_NOP 0
244 # GCN-NEXT: S_NOP 0
245 # GCN-NEXT: S_DENORM_MODE
246 ---
247 name: global_atomic_fmin_x2_to_s_denorm_mode
248 body: |
249 bb.0:
250 GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
251 S_DENORM_MODE 0
252 ...
253
254 # GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
255 # GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN
256 # GCN-NEXT: S_NOP 0
257 # GCN-NEXT: S_NOP 0
258 # GCN-NEXT: S_NOP 0
259 # GCN-NEXT: S_DENORM_MODE
260 ---
261 name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
262 body: |
263 bb.0:
264 %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
265 S_DENORM_MODE 0
266 ...
267
268 # GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
269 # GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN
270 # GCN-NEXT: S_NOP 0
271 # GCN-NEXT: S_NOP 0
272 # GCN-NEXT: S_NOP 0
273 # GCN-NEXT: S_DENORM_MODE
274 ---
275 name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
276 body: |
277 bb.0:
278 %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
279 S_DENORM_MODE 0
280 ...
281
282 # GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode
283 # GCN: GLOBAL_ATOMIC_FMAX_RTN
284 # GCN-NEXT: S_NOP 0
285 # GCN-NEXT: S_NOP 0
286 # GCN-NEXT: S_NOP 0
287 # GCN-NEXT: S_DENORM_MODE
288 ---
289 name: global_atomic_fmax_rtn_to_s_denorm_mode
290 body: |
291 bb.0:
292 %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
293 S_DENORM_MODE 0
294 ...
295
296 # GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
297 # GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
298 # GCN-NEXT: S_NOP 0
299 # GCN-NEXT: S_NOP 0
300 # GCN-NEXT: S_NOP 0
301 # GCN-NEXT: S_DENORM_MODE
302 ---
303 name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
304 body: |
305 bb.0:
306 %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
307 S_DENORM_MODE 0
308 ...
309
310 # GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode
311 # GCN: GLOBAL_ATOMIC_FMIN_RTN
312 # GCN-NEXT: S_NOP 0
313 # GCN-NEXT: S_NOP 0
314 # GCN-NEXT: S_NOP 0
315 # GCN-NEXT: S_DENORM_MODE
316 ---
317 name: global_atomic_fmin_rtn_to_s_denorm_mode
318 body: |
319 bb.0:
320 %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
321 S_DENORM_MODE 0
322 ...
323
324 # GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
325 # GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
326 # GCN-NEXT: S_NOP 0
327 # GCN-NEXT: S_NOP 0
328 # GCN-NEXT: S_NOP 0
329 # GCN-NEXT: S_DENORM_MODE
330 ---
331 name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
332 body: |
333 bb.0:
334 %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
335 S_DENORM_MODE 0
336 ...
337
338 # GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
339 # GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR
340 # GCN-NEXT: S_NOP 0
341 # GCN-NEXT: S_NOP 0
342 # GCN-NEXT: S_NOP 0
343 # GCN-NEXT: S_DENORM_MODE
344 ---
345 name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
346 body: |
347 bb.0:
348 GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
349 S_DENORM_MODE 0
350 ...
351
352 # GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
353 # GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN
354 # GCN-NEXT: S_NOP 0
355 # GCN-NEXT: S_NOP 0
356 # GCN-NEXT: S_NOP 0
357 # GCN-NEXT: S_DENORM_MODE
358 ---
359 name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
360 body: |
361 bb.0:
362 %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
363 S_DENORM_MODE 0
364 ...
365
366 # GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
367 # GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN
368 # GCN-NEXT: S_NOP 0
369 # GCN-NEXT: S_NOP 0
370 # GCN-NEXT: S_NOP 0
371 # GCN-NEXT: S_DENORM_MODE
372 ---
373 name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
374 body: |
375 bb.0:
376 %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
377 S_DENORM_MODE 0
378 ...
379
380 # GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
381 # GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
382 # GCN-NEXT: S_NOP 0
383 # GCN-NEXT: S_NOP 0
384 # GCN-NEXT: S_NOP 0
385 # GCN-NEXT: S_DENORM_MODE
386 ---
387 name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
388 body: |
389 bb.0:
390 %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
391 S_DENORM_MODE 0
392 ...
393
394 # GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
395 # GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN
396 # GCN-NEXT: S_NOP 0
397 # GCN-NEXT: S_NOP 0
398 # GCN-NEXT: S_NOP 0
399 # GCN-NEXT: S_DENORM_MODE
400 ---
401 name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
402 body: |
403 bb.0:
404 %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
405 S_DENORM_MODE 0
406 ...
407
408 # GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
409 # GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
410 # GCN-NEXT: S_NOP 0
411 # GCN-NEXT: S_NOP 0
412 # GCN-NEXT: S_NOP 0
413 # GCN-NEXT: S_DENORM_MODE
414 ---
415 name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
416 body: |
417 bb.0:
418 %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
419 S_DENORM_MODE 0
420 ...
421
422 # GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_waitcnt
423 # GCN: FLAT_ATOMIC_FMIN
424 # GCN-NEXT: S_WAITCNT
425 # GCN-NEXT: S_DENORM_MODE
426 ---
427 name: flat_fp_atomic_to_s_denorm_mode_waitcnt
428 body: |
429 bb.0:
430 FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
431 S_WAITCNT 0
432 S_DENORM_MODE 0
433 ...
434
435 # GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_valu
436 # GCN: FLAT_ATOMIC_FMIN
437 # GCN-NEXT: V_ADD_F32_e32
438 # GCN-NEXT: S_DENORM_MODE
439 ---
440 name: flat_fp_atomic_to_s_denorm_mode_valu
441 body: |
442 bb.0:
443 FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
444 %2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $exec
445 S_DENORM_MODE 0
446 ...