llvm.org GIT mirror llvm / f154896
AMDGPU: Split flat offsets that don't fit in DAG We handle it this way for some other address spaces. Since r349196, SILoadStoreOptimizer has been trying to do this. This is after SIFoldOperands runs, which can change the addressing patterns. It's simpler to just split this earlier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375366 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 1 month ago
12 changed file(s) with 523 addition(s) and 456 deletion(s). Raw diff Collapse all Expand all
261261
262262 SDValue getHi16Elt(SDValue In) const;
263263
264 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
265
264266 void SelectADD_SUB_I64(SDNode *N);
265267 void SelectAddcSubb(SDNode *N);
266268 void SelectUADDO_USUBO(SDNode *N);
960962 }
961963
962964 return true;
965 }
966
967 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
968 const SDLoc &DL) const {
969 SDNode *Mov = CurDAG->getMachineNode(
970 AMDGPU::S_MOV_B32, DL, MVT::i32,
971 CurDAG->getTargetConstant(Val, DL, MVT::i32));
972 return SDValue(Mov, 0);
963973 }
964974
965975 // FIXME: Should only handle addcarry/subcarry
16291639 CurDAG->isBaseWithConstantOffset(Addr)) {
16301640 SDValue N0 = Addr.getOperand(0);
16311641 SDValue N1 = Addr.getOperand(1);
1632 int64_t COffsetVal = cast(N1)->getSExtValue();
1642 uint64_t COffsetVal = cast(N1)->getSExtValue();
16331643
16341644 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1635 if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(),
1636 IsSigned)) {
1645 unsigned AS = findMemSDNode(N)->getAddressSpace();
1646 if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
16371647 Addr = N0;
16381648 OffsetVal = COffsetVal;
1649 } else {
1650 // If the offset doesn't fit, put the low bits into the offset field and
1651 // add the rest.
1652
1653 SDLoc DL(N);
1654 uint64_t ImmField;
1655 const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
1656 if (IsSigned) {
1657 ImmField = SignExtend64(COffsetVal, NumBits);
1658
1659 // Don't use a negative offset field if the base offset is positive.
1660 // Since the scheduler currently relies on the offset field, doing so
1661 // could result in strange scheduling decisions.
1662
1663 // TODO: Should we not do this in the opposite direction as well?
1664 if (static_cast(COffsetVal) > 0) {
1665 if (static_cast(ImmField) < 0) {
1666 const uint64_t OffsetMask = maskTrailingOnes(NumBits - 1);
1667 ImmField = COffsetVal & OffsetMask;
1668 }
1669 }
1670 } else {
1671 // TODO: Should we do this for a negative offset?
1672 const uint64_t OffsetMask = maskTrailingOnes(NumBits);
1673 ImmField = COffsetVal & OffsetMask;
1674 }
1675
1676 uint64_t RemainderOffset = COffsetVal - ImmField;
1677
1678 assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
1679 assert(RemainderOffset + ImmField == COffsetVal);
1680
1681 OffsetVal = ImmField;
1682
1683 // TODO: Should this try to use a scalar add pseudo if the base address is
1684 // uniform and saddr is usable?
1685 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1686 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1687
1688 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1689 DL, MVT::i32, N0, Sub0);
1690 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1691 DL, MVT::i32, N0, Sub1);
1692
1693 SDValue AddOffsetLo
1694 = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1695 SDValue AddOffsetHi
1696 = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1697
1698 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1699 SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1700
1701 SDNode *Add = CurDAG->getMachineNode(
1702 AMDGPU::V_ADD_I32_e64, DL, VTs,
1703 {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1704
1705 SDNode *Addc = CurDAG->getMachineNode(
1706 AMDGPU::V_ADDC_U32_e64, DL, VTs,
1707 {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1708
1709 SDValue RegSequenceArgs[] = {
1710 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1711 SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1
1712 };
1713
1714 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1715 MVT::i64, RegSequenceArgs), 0);
16391716 }
16401717 }
16411718
62746274 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
62756275 }
62766276
6277 unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace,
6278 bool Signed) const {
6279 if (!ST.hasFlatInstOffsets())
6280 return 0;
6281
6282 if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
6283 return 0;
6284
6285 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10)
6286 return Signed ? 12 : 11;
6287
6288 return Signed ? 13 : 12;
6289 }
6290
62776291 bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
62786292 bool Signed) const {
62796293 // TODO: Should 0 be special cased?
10031003 return isUInt<12>(Imm);
10041004 }
10051005
1006 unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const;
1007
10061008 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
10071009 /// encoded instruction. If \p Signed, this is for an instruction that
10081010 /// interprets the offset as signed.
4343 ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
4444 ; GCN: s_and_saveexec_b64
4545 ; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
46 ; GFX9: global_load_sbyte {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
46
47 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xf000,
48 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0,
49 ; GFX9: global_load_sbyte {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, off offset:4095{{$}}
4750 ; GCN: {{^}}BB1_2:
4851 ; GCN: s_or_b64 exec
4952 define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
None ; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
1 ; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
2 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s
3 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
4
5 ; Disable optimizations in case there are optimizations added that
6 ; specialize away generic pointer accesses.
7
8
9 ; These testcases might become useless when there are optimizations to
10 ; remove generic pointers.
0 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
1 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI,HSA %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
114
125 ; CHECK-LABEL: {{^}}store_flat_i32:
136 ; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
190183 }
191184
192185 ; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
193 ; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
186 ; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
187
188 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
189 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1,
190 ; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4094{{$}}
194191 define amdgpu_kernel void @store_flat_i8_neg_offset(i8* %fptr, i8 %x) #0 {
195192 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
196193 store volatile i8 %x, i8* %fptr.offset
215212 }
216213
217214 ; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
218 ; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
215 ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
216
217 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
218 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1,
219 ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4094{{$}}
219220 define amdgpu_kernel void @load_flat_i8_neg_offset(i8* %fptr) #0 {
220221 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
221222 %val = load volatile i8, i8* %fptr.offset
4545
4646 ; Test various offset boundaries.
4747 ; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}}
48 ; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
48 ; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4088{{$}}
4949 ; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2040{{$}}
5050 %gep11 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 511
5151 %load11 = load i64, i64 addrspace(1)* %gep11
2424 ; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
2525 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
2626
27 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
27 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0x8000,
28 ; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
29 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3232{{$}}
2830 define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
2931 entry:
3032 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
3941
4042 ; VI: flat_atomic_add
4143
42 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}
44 ; GFX9: v_mov_b32_e32 [[HIGH_K:v[0-9]+]], 0xabcd
45 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xd000,
46 ; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, [[HIGH_K]], v{{[0-9]+}}, vcc
47 ; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3756{{$}}
4348 define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
4449 entry:
4550 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
990990 ; CIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x11940
991991 ; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
992992
993 ; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}
993 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0x11000,
994 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
995 ; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:2368{{$}}
994996 define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(1)* %out, i64 %in, i64 %old) {
995997 entry:
996998 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 9000
7777 ; GFX9-LABEL: flat_inst_valu_offset_13bit_max:
7878 ; GFX9: ; %bb.0:
7979 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
80 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
8181 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
82 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
82 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
8383 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8484 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8585 ;
102102 ; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max:
103103 ; GFX9: ; %bb.0:
104104 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
105 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
106106 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
107 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
107 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048
108108 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
109109 ; GFX9-NEXT: s_setpc_b64 s[30:31]
110110 ;
200200 ; GFX9-LABEL: flat_inst_valu_offset_2x_12bit_max:
201201 ; GFX9: ; %bb.0:
202202 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
203 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
204204 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
205 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
205 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
206206 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
207207 ; GFX9-NEXT: s_setpc_b64 s[30:31]
208208 ;
225225 ; GFX9-LABEL: flat_inst_valu_offset_2x_13bit_max:
226226 ; GFX9: ; %bb.0:
227227 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3fff, v0
228 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
229229 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
230 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
230 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
231231 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
232232 ; GFX9-NEXT: s_setpc_b64 s[30:31]
233233 ;
326326 ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
327327 ; GFX9: ; %bb.0:
328328 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
329 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
330330 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
331 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
331 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047
332332 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
333333 ; GFX9-NEXT: s_setpc_b64 s[30:31]
334334 ;
352352 ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
353353 ; GFX9: ; %bb.0:
354354 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
355 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
356356 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
357 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
357 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048
358358 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
359359 ; GFX9-NEXT: s_setpc_b64 s[30:31]
360360 ;
378378 ; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
379379 ; GFX9: ; %bb.0:
380380 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
381 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
382382 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
383 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
383 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
384384 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
385385 ; GFX9-NEXT: s_setpc_b64 s[30:31]
386386 ;
430430 ; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
431431 ; GFX9: ; %bb.0:
432432 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
433 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
434434 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
435 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
435 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
436436 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
437437 ; GFX9-NEXT: s_setpc_b64 s[30:31]
438438 ;
482482 ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
483483 ; GFX9: ; %bb.0:
484484 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
486485 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
487 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
488 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
486 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
487 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
488 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047
489489 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
490490 ; GFX9-NEXT: s_setpc_b64 s[30:31]
491491 ;
509509 ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
510510 ; GFX9: ; %bb.0:
511511 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
513512 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
514 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
515 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
513 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
514 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
515 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048
516516 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
517517 ; GFX9-NEXT: s_setpc_b64 s[30:31]
518518 ;
536536 ; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
537537 ; GFX9: ; %bb.0:
538538 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
540539 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
541 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
542 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
540 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
541 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
542 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
543543 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
544544 ; GFX9-NEXT: s_setpc_b64 s[30:31]
545545 ;
565565 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566566 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
567567 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
568 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
568 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
569569 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
570570 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
571571 ; GFX9-NEXT: s_setpc_b64 s[30:31]
590590 ; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
591591 ; GFX9: ; %bb.0:
592592 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
593 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
594594 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
595 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
596 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
595 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
596 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
597597 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
598598 ; GFX9-NEXT: s_setpc_b64 s[30:31]
599599 ;
619619 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620620 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
621621 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
622 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
622 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
623623 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
624624 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
625625 ; GFX9-NEXT: s_setpc_b64 s[30:31]
737737 ; GFX9: ; %bb.0:
738738 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
739739 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
740 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
741 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
742 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
743 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
744 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
740 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
741 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
742 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
743 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
744 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
745745 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
746746 ; GFX9-NEXT: flat_store_byte v[0:1], v0
747747 ; GFX9-NEXT: s_endpgm
770770 ; GFX9: ; %bb.0:
771771 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
772772 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
773 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfffff800
774 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
775 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
776 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
777 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
773 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
774 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
775 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
776 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
777 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048
778778 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
779779 ; GFX9-NEXT: flat_store_byte v[0:1], v0
780780 ; GFX9-NEXT: s_endpgm
803803 ; GFX9: ; %bb.0:
804804 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
805805 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
806 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfffff000
807 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
808 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
809 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
806 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
807 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
808 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
809 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
810810 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
811811 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
812812 ; GFX9-NEXT: flat_store_byte v[0:1], v0
836836 ; GFX9: ; %bb.0:
837837 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
838838 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
839 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
840 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
841 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
842 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
839 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
840 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
841 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
842 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
843843 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
844844 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
845845 ; GFX9-NEXT: flat_store_byte v[0:1], v0
900900 ; GFX9: ; %bb.0:
901901 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
902902 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
903 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
904 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
905 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
906 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
907 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
903 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
904 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
905 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
906 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
907 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
908908 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
909909 ; GFX9-NEXT: flat_store_byte v[0:1], v0
910910 ; GFX9-NEXT: s_endpgm
933933 ; GFX9: ; %bb.0:
934934 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
935935 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
936 ; GFX9-NEXT: s_add_u32 s0, s0, 0x3fff
937 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
938 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
939 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
940 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
936 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
937 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
938 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
939 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
940 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
941941 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
942942 ; GFX9-NEXT: flat_store_byte v[0:1], v0
943943 ; GFX9-NEXT: s_endpgm
966966 ; GFX9: ; %bb.0:
967967 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
968968 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
969 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfffff000
970 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
971 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
972 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
969 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
970 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
971 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
972 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
973973 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
974974 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
975975 ; GFX9-NEXT: flat_store_byte v[0:1], v0
999999 ; GFX9: ; %bb.0:
10001000 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
10011001 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1002 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
1003 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
1004 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1005 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1002 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1003 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1004 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
1005 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
10061006 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
10071007 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
10081008 ; GFX9-NEXT: flat_store_byte v[0:1], v0
10321032 ; GFX9: ; %bb.0:
10331033 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
10341034 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1035 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffc000
1036 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
1037 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1038 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1035 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1036 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1037 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
1038 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
10391039 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
10401040 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
10411041 ; GFX9-NEXT: flat_store_byte v[0:1], v0
10661066 ; GFX9: ; %bb.0:
10671067 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
10681068 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1069 ; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
1070 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1071 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1072 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1073 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
1069 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1070 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1071 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1072 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047
10741073 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
10751074 ; GFX9-NEXT: flat_store_byte v[0:1], v0
10761075 ; GFX9-NEXT: s_endpgm
11001099 ; GFX9: ; %bb.0:
11011100 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11021101 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1103 ; GFX9-NEXT: s_add_u32 s0, s0, 0x800
1104 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1105 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1106 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1107 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
1102 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1103 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1104 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1105 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048
11081106 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
11091107 ; GFX9-NEXT: flat_store_byte v[0:1], v0
11101108 ; GFX9-NEXT: s_endpgm
11341132 ; GFX9: ; %bb.0:
11351133 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11361134 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1137 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
1138 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1139 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1140 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1141 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
1135 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1136 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1137 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1138 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
11421139 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
11431140 ; GFX9-NEXT: flat_store_byte v[0:1], v0
11441141 ; GFX9-NEXT: s_endpgm
11681165 ; GFX9: ; %bb.0:
11691166 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11701167 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1171 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
1172 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1173 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1174 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1168 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1169 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1170 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1171 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
11751172 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
11761173 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
11771174 ; GFX9-NEXT: flat_store_byte v[0:1], v0
12021199 ; GFX9: ; %bb.0:
12031200 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
12041201 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1205 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
1206 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1207 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1208 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1209 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
1202 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1203 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1204 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1205 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1206 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
12101207 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
12111208 ; GFX9-NEXT: flat_store_byte v[0:1], v0
12121209 ; GFX9-NEXT: s_endpgm
12361233 ; GFX9: ; %bb.0:
12371234 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
12381235 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1239 ; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
1240 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1241 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1242 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1236 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1237 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1238 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1239 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
12431240 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
12441241 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
12451242 ; GFX9-NEXT: flat_store_byte v[0:1], v0
12691266 ; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
12701267 ; GFX9: ; %bb.0:
12711268 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1272 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1273 ; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
1274 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1275 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1276 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1277 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
1269 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1270 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1271 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1272 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1273 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1274 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047
12781275 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
12791276 ; GFX9-NEXT: flat_store_byte v[0:1], v0
12801277 ; GFX9-NEXT: s_endpgm
13031300 ; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
13041301 ; GFX9: ; %bb.0:
13051302 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1306 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1307 ; GFX9-NEXT: s_add_u32 s0, s0, 0x800
1308 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1309 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1310 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1311 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
1303 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1304 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1305 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1306 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1307 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1308 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048
13121309 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
13131310 ; GFX9-NEXT: flat_store_byte v[0:1], v0
13141311 ; GFX9-NEXT: s_endpgm
13371334 ; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
13381335 ; GFX9: ; %bb.0:
13391336 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1340 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1341 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
1342 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1343 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1344 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1345 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
1337 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1338 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1339 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1340 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1341 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1342 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
13461343 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
13471344 ; GFX9-NEXT: flat_store_byte v[0:1], v0
13481345 ; GFX9-NEXT: s_endpgm
13711368 ; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
13721369 ; GFX9: ; %bb.0:
13731370 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1374 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1375 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
1376 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1377 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1378 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1371 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1372 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1373 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1374 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1375 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1376 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
13791377 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
13801378 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
13811379 ; GFX9-NEXT: flat_store_byte v[0:1], v0
14051403 ; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
14061404 ; GFX9: ; %bb.0:
14071405 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1408 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1409 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
1410 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1411 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1412 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1413 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
1406 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1407 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1408 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1409 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1410 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1411 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1412 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
14141413 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
14151414 ; GFX9-NEXT: flat_store_byte v[0:1], v0
14161415 ; GFX9-NEXT: s_endpgm
14391438 ; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
14401439 ; GFX9: ; %bb.0:
14411440 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1442 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1443 ; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
1444 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1445 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1446 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1441 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1442 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1443 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1444 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1445 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1446 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
14471447 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
14481448 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
14491449 ; GFX9-NEXT: flat_store_byte v[0:1], v0
5858 ; GFX10: ; %bb.0:
5959 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6060 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
61 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0
61 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
6262 ; GFX10-NEXT: ; implicit-def: $vcc_hi
6363 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
64 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
64 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
6565 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6666 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6767 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
7373 ; GFX9-LABEL: global_inst_valu_offset_13bit_max:
7474 ; GFX9: ; %bb.0:
7575 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
76 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
7777 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
78 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
78 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
7979 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8080 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8181 ;
8383 ; GFX10: ; %bb.0:
8484 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8585 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
86 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0
86 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
8787 ; GFX10-NEXT: ; implicit-def: $vcc_hi
8888 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
89 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
89 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
9090 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9191 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9292 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
175175 ; GFX10: ; %bb.0:
176176 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177177 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
178 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0
178 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
179179 ; GFX10-NEXT: ; implicit-def: $vcc_hi
180180 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
181 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
181 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
182182 ; GFX10-NEXT: s_waitcnt vmcnt(0)
183183 ; GFX10-NEXT: s_setpc_b64 s[30:31]
184184 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
190190 ; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max:
191191 ; GFX9: ; %bb.0:
192192 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
193 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
194194 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
195 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
195 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
196196 ; GFX9-NEXT: s_waitcnt vmcnt(0)
197197 ; GFX9-NEXT: s_setpc_b64 s[30:31]
198198 ;
200200 ; GFX10: ; %bb.0:
201201 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202202 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
203 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0
203 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
204204 ; GFX10-NEXT: ; implicit-def: $vcc_hi
205205 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
206 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
206 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
207207 ; GFX10-NEXT: s_waitcnt vmcnt(0)
208208 ; GFX10-NEXT: s_setpc_b64 s[30:31]
209209 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
215215 ; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max:
216216 ; GFX9: ; %bb.0:
217217 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3fff, v0
218 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
219219 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
220 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
220 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
221221 ; GFX9-NEXT: s_waitcnt vmcnt(0)
222222 ; GFX9-NEXT: s_setpc_b64 s[30:31]
223223 ;
225225 ; GFX10: ; %bb.0:
226226 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227227 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
228 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3fff, v0
228 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3800, v0
229229 ; GFX10-NEXT: ; implicit-def: $vcc_hi
230230 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
231 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
231 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
232232 ; GFX10-NEXT: s_waitcnt vmcnt(0)
233233 ; GFX10-NEXT: s_setpc_b64 s[30:31]
234234 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
314314 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0:
315315 ; GFX9: ; %bb.0:
316316 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
317 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
318318 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
319 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
319 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
320320 ; GFX9-NEXT: s_waitcnt vmcnt(0)
321321 ; GFX9-NEXT: s_setpc_b64 s[30:31]
322322 ;
324324 ; GFX10: ; %bb.0:
325325 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326326 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
327 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0
327 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0
328328 ; GFX10-NEXT: ; implicit-def: $vcc_hi
329329 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
330 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
330 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
331331 ; GFX10-NEXT: s_waitcnt vmcnt(0)
332332 ; GFX10-NEXT: s_setpc_b64 s[30:31]
333333 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
340340 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1:
341341 ; GFX9: ; %bb.0:
342342 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
343 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
344344 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
345 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
345 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
346346 ; GFX9-NEXT: s_waitcnt vmcnt(0)
347347 ; GFX9-NEXT: s_setpc_b64 s[30:31]
348348 ;
366366 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0:
367367 ; GFX9: ; %bb.0:
368368 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
369 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
370370 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
371 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
371 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
372372 ; GFX9-NEXT: s_waitcnt vmcnt(0)
373373 ; GFX9-NEXT: s_setpc_b64 s[30:31]
374374 ;
376376 ; GFX10: ; %bb.0:
377377 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378378 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
379 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0
379 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
380380 ; GFX10-NEXT: ; implicit-def: $vcc_hi
381381 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
382 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
382 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
383383 ; GFX10-NEXT: s_waitcnt vmcnt(0)
384384 ; GFX10-NEXT: s_setpc_b64 s[30:31]
385385 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
418418 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0:
419419 ; GFX9: ; %bb.0:
420420 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
421 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
422422 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
423 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
423 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
424424 ; GFX9-NEXT: s_waitcnt vmcnt(0)
425425 ; GFX9-NEXT: s_setpc_b64 s[30:31]
426426 ;
428428 ; GFX10: ; %bb.0:
429429 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430430 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
431 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0
431 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
432432 ; GFX10-NEXT: ; implicit-def: $vcc_hi
433433 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
434 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
434 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
435435 ; GFX10-NEXT: s_waitcnt vmcnt(0)
436436 ; GFX10-NEXT: s_setpc_b64 s[30:31]
437437 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
470470 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
471471 ; GFX9: ; %bb.0:
472472 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
474473 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
475 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
476 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
474 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
475 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
476 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
477477 ; GFX9-NEXT: s_waitcnt vmcnt(0)
478478 ; GFX9-NEXT: s_setpc_b64 s[30:31]
479479 ;
481481 ; GFX10: ; %bb.0:
482482 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483483 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
484 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0
484 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0
485485 ; GFX10-NEXT: ; implicit-def: $vcc_hi
486486 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
487 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
487 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
488488 ; GFX10-NEXT: s_waitcnt vmcnt(0)
489489 ; GFX10-NEXT: s_setpc_b64 s[30:31]
490490 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
497497 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
498498 ; GFX9: ; %bb.0:
499499 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
501500 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
502 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
503 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
501 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
502 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
503 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
504504 ; GFX9-NEXT: s_waitcnt vmcnt(0)
505505 ; GFX9-NEXT: s_setpc_b64 s[30:31]
506506 ;
508508 ; GFX10: ; %bb.0:
509509 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510510 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
511 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
511 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
512512 ; GFX10-NEXT: ; implicit-def: $vcc_hi
513513 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
514 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
514 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
515515 ; GFX10-NEXT: s_waitcnt vmcnt(0)
516516 ; GFX10-NEXT: s_setpc_b64 s[30:31]
517517 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
524524 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
525525 ; GFX9: ; %bb.0:
526526 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
527 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
528527 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
529 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
530 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
528 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
529 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
530 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
531531 ; GFX9-NEXT: s_waitcnt vmcnt(0)
532532 ; GFX9-NEXT: s_setpc_b64 s[30:31]
533533 ;
535535 ; GFX10: ; %bb.0:
536536 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537537 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
538 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0
538 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
539539 ; GFX10-NEXT: ; implicit-def: $vcc_hi
540540 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
541 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
541 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
542542 ; GFX10-NEXT: s_waitcnt vmcnt(0)
543543 ; GFX10-NEXT: s_setpc_b64 s[30:31]
544544 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
551551 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
552552 ; GFX9: ; %bb.0:
553553 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
554 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
555555 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
556 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
557 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
556 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
557 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
558558 ; GFX9-NEXT: s_waitcnt vmcnt(0)
559559 ; GFX9-NEXT: s_setpc_b64 s[30:31]
560560 ;
578578 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
579579 ; GFX9: ; %bb.0:
580580 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
581 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
582582 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
583 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
584 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
583 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
584 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
585585 ; GFX9-NEXT: s_waitcnt vmcnt(0)
586586 ; GFX9-NEXT: s_setpc_b64 s[30:31]
587587 ;
589589 ; GFX10: ; %bb.0:
590590 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591591 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
592 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0
592 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
593593 ; GFX10-NEXT: ; implicit-def: $vcc_hi
594594 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
595 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
595 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
596596 ; GFX10-NEXT: s_waitcnt vmcnt(0)
597597 ; GFX10-NEXT: s_setpc_b64 s[30:31]
598598 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
607607 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608608 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
609609 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
610 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
610 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
611611 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
612612 ; GFX9-NEXT: s_waitcnt vmcnt(0)
613613 ; GFX9-NEXT: s_setpc_b64 s[30:31]
702702 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
703703 ; GFX10-NEXT: ; implicit-def: $vcc_hi
704704 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
705 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
706 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
707 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
708 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
709 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
705 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0
706 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
707 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
710708 ; GFX10-NEXT: s_waitcnt vmcnt(0)
711709 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
712710 ; GFX10-NEXT: s_endpgm
721719 ; GFX9: ; %bb.0:
722720 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
723721 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
724 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
725 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
726 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
727 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
728 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
722 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
723 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
724 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
725 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
726 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
729727 ; GFX9-NEXT: s_waitcnt vmcnt(0)
730728 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
731729 ; GFX9-NEXT: s_endpgm
735733 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
736734 ; GFX10-NEXT: ; implicit-def: $vcc_hi
737735 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
738 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
739 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
740 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
741 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
742 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
736 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0
737 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
738 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
743739 ; GFX10-NEXT: s_waitcnt vmcnt(0)
744740 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
745741 ; GFX10-NEXT: s_endpgm
795791 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
796792 ; GFX10-NEXT: ; implicit-def: $vcc_hi
797793 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
798 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000
799 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
800 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
801 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
794 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0
795 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
802796 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
803797 ; GFX10-NEXT: s_waitcnt vmcnt(0)
804798 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
814808 ; GFX9: ; %bb.0:
815809 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
816810 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
817 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
818 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
819 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
820 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
811 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
812 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
813 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
814 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
821815 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
822816 ; GFX9-NEXT: s_waitcnt vmcnt(0)
823817 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
828822 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
829823 ; GFX10-NEXT: ; implicit-def: $vcc_hi
830824 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
831 ; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000
832 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
833 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
834 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
825 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0
826 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
835827 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
836828 ; GFX10-NEXT: s_waitcnt vmcnt(0)
837829 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
859851 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
860852 ; GFX10-NEXT: ; implicit-def: $vcc_hi
861853 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
862 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
863 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
864 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
865 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
866 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
854 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0
855 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
856 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
867857 ; GFX10-NEXT: s_waitcnt vmcnt(0)
868858 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
869859 ; GFX10-NEXT: s_endpgm
878868 ; GFX9: ; %bb.0:
879869 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
880870 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
881 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
882 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
883 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
884 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
885 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
871 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
872 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
873 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
874 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
875 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
886876 ; GFX9-NEXT: s_waitcnt vmcnt(0)
887877 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
888878 ; GFX9-NEXT: s_endpgm
892882 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
893883 ; GFX10-NEXT: ; implicit-def: $vcc_hi
894884 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
895 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
896 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
897 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
898 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
899 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
885 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0
886 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
887 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
900888 ; GFX10-NEXT: s_waitcnt vmcnt(0)
901889 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
902890 ; GFX10-NEXT: s_endpgm
911899 ; GFX9: ; %bb.0:
912900 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
913901 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
914 ; GFX9-NEXT: s_add_u32 s0, s0, 0x3fff
915 ; GFX9-NEXT: s_addc_u32 s1, s1, 0
916 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
917 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
918 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
902 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
903 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
904 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
905 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
906 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
919907 ; GFX9-NEXT: s_waitcnt vmcnt(0)
920908 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
921909 ; GFX9-NEXT: s_endpgm
925913 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
926914 ; GFX10-NEXT: ; implicit-def: $vcc_hi
927915 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
928 ; GFX10-NEXT: s_add_u32 s0, s0, 0x3fff
929 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
930 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
931 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
932 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
916 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x3800, s0
917 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
918 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
933919 ; GFX10-NEXT: s_waitcnt vmcnt(0)
934920 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
935921 ; GFX10-NEXT: s_endpgm
956942 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
957943 ; GFX10-NEXT: ; implicit-def: $vcc_hi
958944 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
959 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000
960 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
961 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
962 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
945 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0
946 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
963947 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
964948 ; GFX10-NEXT: s_waitcnt vmcnt(0)
965949 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
975959 ; GFX9: ; %bb.0:
976960 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
977961 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
978 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
979 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
980 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
981 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
962 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
963 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
964 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
965 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
982966 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
983967 ; GFX9-NEXT: s_waitcnt vmcnt(0)
984968 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
989973 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
990974 ; GFX10-NEXT: ; implicit-def: $vcc_hi
991975 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
992 ; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000
993 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
994 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
995 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
976 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0
977 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
996978 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
997979 ; GFX10-NEXT: s_waitcnt vmcnt(0)
998980 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1008990 ; GFX9: ; %bb.0:
1009991 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1010992 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1011 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffc000
1012 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
1013 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1014 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
993 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
994 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
995 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
996 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1015997 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1016998 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1017999 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
10221004 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
10231005 ; GFX10-NEXT: ; implicit-def: $vcc_hi
10241006 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1025 ; GFX10-NEXT: s_add_u32 s0, s0, 0xffffc000
1026 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
1027 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1028 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1007 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffc000, s0
1008 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
10291009 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
10301010 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10311011 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
10421022 ; GFX9: ; %bb.0:
10431023 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
10441024 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1045 ; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
1046 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1047 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1048 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1049 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1025 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1026 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1027 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1028 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
10501029 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10511030 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
10521031 ; GFX9-NEXT: s_endpgm
10561035 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
10571036 ; GFX10-NEXT: ; implicit-def: $vcc_hi
10581037 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1059 ; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff
1060 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
1061 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1062 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1063 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1038 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0, s0
1039 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1040 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
10641041 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10651042 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
10661043 ; GFX10-NEXT: s_endpgm
10761053 ; GFX9: ; %bb.0:
10771054 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
10781055 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1079 ; GFX9-NEXT: s_add_u32 s0, s0, 0x800
1080 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1081 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1082 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1083 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1056 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1057 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1058 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1059 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
10841060 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10851061 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
10861062 ; GFX9-NEXT: s_endpgm
10901066 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
10911067 ; GFX10-NEXT: ; implicit-def: $vcc_hi
10921068 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1093 ; GFX10-NEXT: s_add_u32 s0, s0, 0x800
1094 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
1095 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1096 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1069 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0
1070 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
10971071 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
10981072 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10991073 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
11101084 ; GFX9: ; %bb.0:
11111085 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11121086 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1113 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
1114 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1115 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1116 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1117 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1087 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1088 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1089 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1090 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
11181091 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11191092 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
11201093 ; GFX9-NEXT: s_endpgm
11241097 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11251098 ; GFX10-NEXT: ; implicit-def: $vcc_hi
11261099 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1127 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
1128 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
1129 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1130 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1131 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1100 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0
1101 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1102 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
11321103 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11331104 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
11341105 ; GFX10-NEXT: s_endpgm
11441115 ; GFX9: ; %bb.0:
11451116 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11461117 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1147 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
1148 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1149 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1150 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1118 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1119 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1120 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1121 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
11511122 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
11521123 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11531124 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
11581129 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11591130 ; GFX10-NEXT: ; implicit-def: $vcc_hi
11601131 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1161 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1000
1162 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
1163 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1164 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1132 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1000, s0
1133 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
11651134 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
11661135 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11671136 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
11781147 ; GFX9: ; %bb.0:
11791148 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11801149 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1181 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
1182 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1183 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1184 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1185 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1150 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1151 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1152 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1153 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1154 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
11861155 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11871156 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
11881157 ; GFX9-NEXT: s_endpgm
11921161 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
11931162 ; GFX10-NEXT: ; implicit-def: $vcc_hi
11941163 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1195 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
1196 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
1197 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1198 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1199 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1164 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0
1165 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1166 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
12001167 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12011168 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
12021169 ; GFX10-NEXT: s_endpgm
12121179 ; GFX9: ; %bb.0:
12131180 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
12141181 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1215 ; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
1216 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1217 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1218 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1182 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1183 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1184 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1185 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
12191186 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
12201187 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12211188 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
12261193 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
12271194 ; GFX10-NEXT: ; implicit-def: $vcc_hi
12281195 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1229 ; GFX10-NEXT: s_add_u32 s0, s0, 0x2000
1230 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
1231 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1232 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1196 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x2000, s0
1197 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
12331198 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
12341199 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12351200 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
12451210 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
12461211 ; GFX9: ; %bb.0:
12471212 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1248 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1249 ; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
1250 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1251 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1252 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1253 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1213 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1214 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1215 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1216 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1217 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1218 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
12541219 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12551220 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
12561221 ; GFX9-NEXT: s_endpgm
12601225 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
12611226 ; GFX10-NEXT: ; implicit-def: $vcc_hi
12621227 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1263 ; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff
1264 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1265 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
12661228 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1267 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1229 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, s0
1230 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1231 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
12681232 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12691233 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
12701234 ; GFX10-NEXT: s_endpgm
12791243 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
12801244 ; GFX9: ; %bb.0:
12811245 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1282 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1283 ; GFX9-NEXT: s_add_u32 s0, s0, 0x800
1284 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1285 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1286 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1287 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1246 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1247 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1248 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1249 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1250 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1251 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
12881252 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12891253 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
12901254 ; GFX9-NEXT: s_endpgm
12941258 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
12951259 ; GFX10-NEXT: ; implicit-def: $vcc_hi
12961260 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1297 ; GFX10-NEXT: s_add_u32 s0, s0, 0x800
1298 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1299 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
13001261 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1301 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1262 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1263 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1264 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
13021265 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13031266 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
13041267 ; GFX10-NEXT: s_endpgm
13131276 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
13141277 ; GFX9: ; %bb.0:
13151278 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1316 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1317 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
1318 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1319 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1320 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1321 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1279 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1280 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1281 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1282 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1283 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1284 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
13221285 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13231286 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
13241287 ; GFX9-NEXT: s_endpgm
13281291 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
13291292 ; GFX10-NEXT: ; implicit-def: $vcc_hi
13301293 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1331 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
1332 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1333 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
13341294 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1335 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1295 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1296 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1297 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
13361298 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13371299 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
13381300 ; GFX10-NEXT: s_endpgm
13471309 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
13481310 ; GFX9: ; %bb.0:
13491311 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1350 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1351 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
1352 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1353 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1354 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1355 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1312 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1313 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1314 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1315 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1316 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1317 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1318 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
13561319 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13571320 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
13581321 ; GFX9-NEXT: s_endpgm
13621325 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
13631326 ; GFX10-NEXT: ; implicit-def: $vcc_hi
13641327 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1365 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1000
1366 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1367 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
13681328 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1329 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1330 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
13691331 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
13701332 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13711333 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
13811343 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
13821344 ; GFX9: ; %bb.0:
13831345 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1384 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1385 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
1386 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1387 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1388 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1389 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1346 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1347 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1348 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1349 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1350 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1351 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1352 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
13901353 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13911354 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
13921355 ; GFX9-NEXT: s_endpgm
13961359 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
13971360 ; GFX10-NEXT: ; implicit-def: $vcc_hi
13981361 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1399 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
1400 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1401 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
14021362 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1403 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1363 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0
1364 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1365 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
14041366 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14051367 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
14061368 ; GFX10-NEXT: s_endpgm
14151377 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
14161378 ; GFX9: ; %bb.0:
14171379 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1418 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1419 ; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
1420 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1421 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1422 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1380 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1381 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1382 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1383 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1384 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1385 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
14231386 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
14241387 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14251388 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
14301393 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
14311394 ; GFX10-NEXT: ; implicit-def: $vcc_hi
14321395 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1433 ; GFX10-NEXT: s_add_u32 s0, s0, 0x2000
1434 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1435 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
14361396 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1397 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0
1398 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
14371399 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
14381400 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14391401 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1313 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
1414 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
1515 ;
16 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
17 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
18 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
19 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
20 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
21 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
22 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
23 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
16 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
17 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
18 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
19 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
20 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
21 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
22 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
23 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
24
2425 entry:
2526 %call = tail call i64 @_Z13get_global_idj(i32 0)
2627 %conv = and i64 %call, 255
7475 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
7576 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
7677 ;
77 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
78 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
79 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
80 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
81 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
82 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
83 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
7884 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
7985 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
8086 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
81 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
82 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
83 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
84 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
85 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
8687 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
8788 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
8889 entry:
183184 }
184185
185186 ; using 32bit address.
186 define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) {
187 define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) {
187188 ; GCN-LABEL: Address32:
188189 ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
189190 ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
196197 ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
197198 ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
198199 ;
199 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off
200 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
201 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
200202 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
201203 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048
202204 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072
203 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off
204 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-4096
205 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-3072
206 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048
207 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-1024
208 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off
205 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
206 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
207 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048
208 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072
209 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
209210 entry:
210211 %call = tail call i64 @_Z13get_global_idj(i32 0)
211212 %conv = and i64 %call, 255
264265 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
265266 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
266267 ;
267 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
268 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
269 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
270 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
268 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
269 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
270 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
271 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
271272 entry:
272273 %call = tail call i64 @_Z13get_global_idj(i32 0)
273274 %conv = and i64 %call, 255
305306 ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
306307 ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
307308 ;
308 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off
309 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off
310 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-1024
311 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off
309 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048
310 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072
311 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
312 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
312313 entry:
313314 %call = tail call i64 @_Z13get_global_idj(i32 0)
314315 %conv = and i64 %call, 255
346347 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
347348 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
348349 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
349 ;
350 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
351 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
352 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
353 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
354 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
355 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
350
351 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
352 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
353 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
354 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
355 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
356 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
356357 i8 addrspace(1)* %buffer2) {
357358 entry:
358359 %call = tail call i64 @_Z13get_global_idj(i32 0)
402403 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
403404 ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
404405 ;
405 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
406 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
407 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
408 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
409 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
410 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
411 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
406 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
407 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
408 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
409 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
410 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
411 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
412 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
412413 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
413414 entry:
414415 %call = tail call i64 @_Z13get_global_idj(i32 0)
None ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900 %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX906,NO-D16-HI %s
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900,GFX9 %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX906,GFX9,NO-D16-HI %s
22 ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s
33
44 ; GCN-LABEL: {{^}}store_global_hi_v2i16:
310310
311311 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
312312 ; GCN: s_waitcnt
313 ; GCN: v_add{{(_co)?}}_{{i|u}}32_e32
314
313 ; GFX803: v_add{{(_co)?}}_{{i|u}}32_e32
315314 ; GFX803: v_addc_u32_e32
316 ; GFX900: v_addc_co_u32_e32
317
318 ; GFX906-NEXT: v_lshrrev_b32_e32
319 ; GFX906-NEXT: v_addc_co_u32_e32
320 ; GFX906: flat_store_short v[0:1], v2
321
322 ; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
315
316 ; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
317 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v
318
319 ; GFX906-DAG: v_lshrrev_b32_e32
320 ; GFX906: flat_store_short v[0:1], v2 offset:2050{{$}}
321
322 ; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:2050{{$}}
323323 ; GFX803: flat_store_short v[0:1], v2{{$}}
324324 ; GCN-NEXT: s_waitcnt
325325 ; GCN-NEXT: s_setpc_b64
358358
359359 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
360360 ; GCN: s_waitcnt
361 ; GCN-DAG: v_add{{(_co)?}}_{{i|u}}32_e32
362
361
362 ; GFX803-DAG: v_add_u32_e32
363363 ; GFX803-DAG: v_addc_u32_e32
364 ; GFX900-DAG: v_addc_co_u32_e32
365 ; GFX906-DAG: v_add_co_u32_e32
366
367 ; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
368
369 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
370 ; GFX906-NEXT: v_addc_co_u32_e32
371 ; GFX906-NEXT: flat_store_byte v[0:1], v2{{$}}
364
365 ; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
366 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v{{[0-9]+}}, vcc
367
368 ; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:1{{$}}
369
370 ; GFX906-DAG: v_lshrrev_b32_e32 v2, 16, v2
371 ; GFX906: flat_store_byte v[0:1], v2 offset:1{{$}}
372372
373373 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
374374 ; GFX803: flat_store_byte v[0:1], v2{{$}}