llvm.org GIT mirror llvm / eca9ecf
[AMDGPU] Enabled v2.16 literals for VOP3P Literal encoding needs op_sel_hi to select low 16 bit in this case. Differential Revision: https://reviews.llvm.org/D45745 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330230 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 1 year, 10 months ago
14 changed file(s) with 67 addition(s) and 63 deletion(s). Raw diff Collapse all Expand all
154154 assert(Old.isReg());
155155
156156 if (Fold.isImm()) {
157 if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
158 // Set op_sel_hi on this operand or bail out if op_sel is already set.
159 unsigned Opcode = MI->getOpcode();
160 int OpNo = MI->getOperandNo(&Old);
161 int ModIdx = -1;
162 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
163 ModIdx = AMDGPU::OpName::src0_modifiers;
164 else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
165 ModIdx = AMDGPU::OpName::src1_modifiers;
166 else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
167 ModIdx = AMDGPU::OpName::src2_modifiers;
168 assert(ModIdx != -1);
169 ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
170 MachineOperand &Mod = MI->getOperand(ModIdx);
171 unsigned Val = Mod.getImm();
172 if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
173 return false;
174 Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
175 }
157176 Old.ChangeToImmediate(Fold.ImmToFold);
158177 return true;
159178 }
9595 } // end namespace anonymous
9696
9797 namespace llvm {
98
99 static cl::opt EnablePackedInlinableLiterals(
100 "enable-packed-inlinable-literals",
101 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
102 cl::init(false));
10398
10499 namespace AMDGPU {
105100
876871 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
877872 assert(HasInv2Pi);
878873
879 if (!EnablePackedInlinableLiterals)
880 return false;
881
882874 int16_t Lo16 = static_cast(Literal);
883875 int16_t Hi16 = static_cast(Literal >> 16);
884876 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
None ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
22
33 ; FIXME: Need to handle non-uniform case for function below (load without gep).
9595 }
9696
9797 ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1:
98 ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, -1{{$}}
98 ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, -1 op_sel_hi:[1,0]{{$}}
9999
100100 ; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
101101 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]]
None ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
22 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
33
187187
188188
189189 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm:
190 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
191 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
192 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]] clamp{{$}}
190 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
191 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0] clamp{{$}}
193192 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
194193 %tid = call i32 @llvm.amdgcn.workitem.id.x()
195194 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
203202 }
204203
205204 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_no_denormals:
206 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
207 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
208 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]] clamp{{$}}
205 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
206 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0] clamp{{$}}
209207 define amdgpu_kernel void @v_clamp_add_src_v2f16_no_denormals(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #3 {
210208 %tid = call i32 @llvm.amdgcn.workitem.id.x()
211209 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
219217 }
220218
221219 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg:
222 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
223 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
224 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
220 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
221 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
225222 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
226223 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
227224 %tid = call i32 @llvm.amdgcn.workitem.id.x()
237234 }
238235
239236 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg_lo:
240 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
241 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
242 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
237 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
238 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
243239 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_lo:[1,1] clamp{{$}}
244240 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
245241 %tid = call i32 @llvm.amdgcn.workitem.id.x()
257253 }
258254
259255 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg_hi:
260 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
261 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
262 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
256 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
257 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
263258 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_hi:[1,1] clamp{{$}}
264259 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
265260 %tid = call i32 @llvm.amdgcn.workitem.id.x()
277272 }
278273
279274 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_shuf:
280 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
281 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
282 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
275 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
276 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
283277 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
284278 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
285279 %tid = call i32 @llvm.amdgcn.workitem.id.x()
314308 }
315309
316310 ; GCN-LABEL: {{^}}v_no_clamp_add_packed_src_f32:
317 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
318 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
319 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
311 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
312 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
320313 ; GFX9: v_max_f32_e64 [[CLAMP:v[0-9]+]], [[ADD]], [[ADD]] clamp{{$}}
321314 define amdgpu_kernel void @v_no_clamp_add_packed_src_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
322315 %tid = call i32 @llvm.amdgcn.workitem.id.x()
0 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
22
33 declare half @llvm.fabs.f16(half) #0
44 declare half @llvm.canonicalize.f16(half) #0
None ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
0 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
44
5 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
6 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
7 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
8 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
5 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
6 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
7 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
8 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
99
1010 declare i32 @llvm.amdgcn.workitem.id.x() #1
1111 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
0 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=CIVI %s
11 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN -check-prefix=CIVI %s
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s
33
44 ; GCN-LABEL: {{^}}fneg_fabs_fadd_f16:
55 ; CI: v_cvt_f32_f16_e32
123123 ; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
124124
125125 ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
126 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 neg_lo:[1,0] neg_hi:[1,0]
126 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
127127 define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 {
128128 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
129129 %fneg.fabs = fsub <2 x half> , %fabs
146146
147147 ; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_foldable_neg_v2f16:
148148 ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
149 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 neg_lo:[1,0] neg_hi:[1,0]
149 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
150150 define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {
151151 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
152152 %fneg = fsub <2 x half> , %fabs
None ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
0 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
11 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
22 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
33 ; FIXME: Merge into imm.ll
116116
117117 ; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16:
118118 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
119 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
119 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0 op_sel_hi:[1,0]{{$}}
120120 ; GFX9: buffer_store_dword [[REG]]
121121
122122 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
134134
135135 ; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16:
136136 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
137 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
137 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0]{{$}}
138138 ; GFX9: buffer_store_dword [[REG]]
139139
140140 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
152152
153153 ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16:
154154 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
155 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
155 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0]{{$}}
156156 ; GFX9: buffer_store_dword [[REG]]
157157
158158 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
170170
171171 ; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16:
172172 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
173 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
173 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0]{{$}}
174174 ; GFX9: buffer_store_dword [[REG]]
175175
176176 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
188188
189189 ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16:
190190 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
191 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
191 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0]{{$}}
192192 ; GFX9: buffer_store_dword [[REG]]
193193
194194 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
206206
207207 ; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16:
208208 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
209 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
209 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0]{{$}}
210210 ; GFX9: buffer_store_dword [[REG]]
211211
212212 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
224224
225225 ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16:
226226 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
227 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
227 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0]{{$}}
228228 ; GFX9: buffer_store_dword [[REG]]
229229
230230 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
242242
243243 ; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16:
244244 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
245 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
245 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0]{{$}}
246246 ; GFX9: buffer_store_dword [[REG]]
247247
248248 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
260260
261261 ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16:
262262 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
263 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
263 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0]{{$}}
264264 ; GFX9: buffer_store_dword [[REG]]
265265
266266 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
317317
318318 ; GCN-LABEL: {{^}}add_inline_imm_1_v2f16:
319319 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
320 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1{{$}}
320 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0]{{$}}
321321 ; GFX9: buffer_store_dword [[REG]]
322322
323323 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
335335
336336 ; GCN-LABEL: {{^}}add_inline_imm_2_v2f16:
337337 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
338 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2{{$}}
338 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0]{{$}}
339339 ; GFX9: buffer_store_dword [[REG]]
340340
341341 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
353353
354354 ; GCN-LABEL: {{^}}add_inline_imm_16_v2f16:
355355 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
356 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16{{$}}
356 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0]{{$}}
357357 ; GFX9: buffer_store_dword [[REG]]
358358
359359 ; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
None ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
22 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
33
0 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
11 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
33 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
44
55 ; FIXME: i16 promotion pass ruins the scalar cases when legal.
None ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
22 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
33
None ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
0 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
11 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s
22 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s
33
None ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
22
33 ; FIXME: Need to handle non-uniform case for function below (load without gep).
9292 }
9393
9494 ; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_neg1:
95 ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, -1{{$}}
95 ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, -1 op_sel_hi:[1,0]{{$}}
9696
9797 ; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
9898 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]]