llvm.org GIT mirror llvm / bc9fb90
AMDGPU: Don't use MUBUF vaddr if address may overflow Effectively revert r263964. Before we would not allow this if vaddr was not known to be positive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318240 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 years ago
15 changed file(s) with 479 addition(s) and 224 deletion(s). Raw diff Collapse all Expand all
344344 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
345345 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
346346 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
347
348 def FeatureEnableHugePrivateBuffer : SubtargetFeature<
349 "huge-private-buffer",
350 "EnableHugePrivateBuffer",
351 "true",
352 "Enable private/scratch buffer sizes greater than 128 GB"
353 >;
347354
348355 def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
349356 "EnableVGPRSpilling",
11591159 SDValue N1 = Addr.getOperand(1);
11601160
11611161 // Offsets in vaddr must be positive.
1162 //
1163 // The total computation of vaddr + soffset + offset must not overflow.
1164 // If vaddr is negative, even if offset is 0 the sgpr offset add will end up
1165 // overflowing.
11621166 ConstantSDNode *C1 = cast(N1);
1163 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1167 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1168 CurDAG->SignBitIsZero(N0)) {
11641169 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
11651170 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
11661171 return true;
120120 DebuggerReserveRegs(false),
121121 DebuggerEmitPrologue(false),
122122
123 EnableHugePrivateBuffer(false),
123124 EnableVGPRSpilling(false),
124125 EnablePromoteAlloca(false),
125126 EnableLoadStoreOpt(false),
129129 bool DebuggerEmitPrologue;
130130
131131 // Used as options.
132 bool EnableHugePrivateBuffer;
132133 bool EnableVGPRSpilling;
133134 bool EnablePromoteAlloca;
134135 bool EnableLoadStoreOpt;
350351 return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
351352 }
352353
354 bool enableHugePrivateBuffer() const {
355 return EnableHugePrivateBuffer;
356 }
357
353358 bool isPromoteAllocaEnabled() const {
354359 return EnablePromoteAlloca;
355360 }
9393 cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
9494 cl::init(false));
9595
96 static cl::opt AssumeFrameIndexHighZeroBits(
97 "amdgpu-frame-index-zero-bits",
98 cl::desc("High bits of frame index assumed to be zero"),
99 cl::init(5),
100 cl::ReallyHidden);
101
96102 static unsigned findFirstFreeSGPR(CCState &CCInfo) {
97103 unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
98104 for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
15991605 Reg = MF.addLiveIn(Reg, RC);
16001606 SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
16011607
1608 if (Arg.Flags.isSRet() && !getSubtarget()->enableHugePrivateBuffer()) {
1609 // The return object should be reasonably addressable.
1610
1611 // FIXME: This helps when the return is a real sret. If it is a
1612 // automatically inserted sret (i.e. CanLowerReturn returns false), an
1613 // extra copy is inserted in SelectionDAGBuilder which obscures this.
1614 unsigned NumBits = 32 - AssumeFrameIndexHighZeroBits;
1615 Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
1616 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits)));
1617 }
1618
16021619 // If this is an 8 or 16-bit value, it is really passed promoted
16031620 // to 32 bits. Insert an assert[sz]ext to capture this, then
16041621 // truncate to the right size.
32153232 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
32163233 case ISD::FP_ROUND:
32173234 return lowerFP_ROUND(Op, DAG);
3218
32193235 case ISD::TRAP:
32203236 case ISD::DEBUGTRAP:
32213237 return lowerTRAP(Op, DAG);
69967012
69977013 TargetLoweringBase::finalizeLowering(MF);
69987014 }
7015
7016 void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
7017 KnownBits &Known,
7018 const APInt &DemandedElts,
7019 const SelectionDAG &DAG,
7020 unsigned Depth) const {
7021 TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts,
7022 DAG, Depth);
7023
7024 if (getSubtarget()->enableHugePrivateBuffer())
7025 return;
7026
7027 // Technically it may be possible to have a dispatch with a single workitem
7028 // that uses the full private memory size, but that's not really useful. We
7029 // can't use vaddr in MUBUF instructions if we don't know the address
7030 // calculation won't overflow, so assume the sign bit is never set.
7031 Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
7032 }
276276 SDValue V) const;
277277
278278 void finalizeLowering(MachineFunction &MF) const override;
279
280 void computeKnownBitsForFrameIndex(const SDValue Op,
281 KnownBits &Known,
282 const APInt &DemandedElts,
283 const SelectionDAG &DAG,
284 unsigned Depth = 0) const override;
279285 };
280286
281287 } // End namespace llvm
None ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
1 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE
0 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
22 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
3 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA
4 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
5 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
6
7 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=HSAOPT -check-prefix=OPT %s
8 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=NOHSAOPT -check-prefix=OPT %s
3 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
4 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
5 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
6
7 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
8 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
99
1010 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
1111
390390 ; FUNC-LABEL: ptrtoint:
391391 ; SI-NOT: ds_write
392392 ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
393 ; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5 ;
393 ; SI: v_add_i32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5,
394 ; SI: buffer_load_dword v{{[0-9]+}}, [[ADD_OFFSET:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
394395 define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
395396 %alloca = alloca [16 x i32]
396397 %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
2929 %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
3030 %a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid
3131 %b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid
32 %a = load i32, i32 addrspace(1)* %a_ptr
33 %b = load i32, i32 addrspace(1)* %b_ptr
32 %a = load i32, i32 addrspace(1)* %a_ptr, !range !0
33 %b = load i32, i32 addrspace(1)* %b_ptr, !range !0
3434 %result = add i32 %a, %b
3535 %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b
3636 store i32 %result, i32* %alloca_ptr, align 4
3737 ; Dummy call
3838 call void @llvm.amdgcn.s.barrier()
39 %reload = load i32, i32* %alloca_ptr, align 4
39 %reload = load i32, i32* %alloca_ptr, align 4, !range !0
4040 %out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
4141 store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
4242 ret void
4545 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }
4646 attributes #1 = { nounwind readnone }
4747 attributes #2 = { nounwind convergent }
48
49 !0 = !{i32 0, i32 65536 }
100100 ret void
101101 }
102102
103 ; FIXME: Should be able to see that this can use vaddr, but the
104 ; FrameIndex is hidden behind a CopyFromReg in the second block.
105
103106 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block:
104107 ; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4
105108 ; GCN: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
107110 ; GCN: s_and_saveexec_b64
108111
109112 ; GCN: v_add_i32_e32 v0, vcc, 4, [[ADD]]
110 ; GCN: buffer_load_dword v1, v1, s[0:3], s4 offen offset:4
113 ; GCN: buffer_load_dword v1, v0, s[0:3], s4 offen{{$}}
111114 ; GCN: ds_write_b32
112115 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 }* byval %arg0, i32 %arg2) #0 {
113116 %cmp = icmp eq i32 %arg2, 0
194197 ret void
195198 }
196199
200 ; GCN-LABEL: {{^}}alloca_ptr_nonentry_block:
201 ; GCN: s_and_saveexec_b64
202 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s5 offset:12
203 define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
204 %alloca0 = alloca { i8, i32 }, align 4
205 %cmp = icmp eq i32 %arg0, 0
206 br i1 %cmp, label %bb, label %ret
207
208 bb:
209 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %alloca0, i32 0, i32 0
210 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %alloca0, i32 0, i32 1
211 %load1 = load volatile i32, i32* %gep1
212 store volatile i32* %gep1, i32* addrspace(3)* undef
213 br label %ret
214
215 ret:
216 ret void
217 }
218
197219 attributes #0 = { nounwind }
384384 ret void
385385 }
386386
387 ; FIXME: Should be able to fold offsets in all of these. Call lowering
388 ; introduces an extra CopyToReg/CopyFromReg obscuring the AssertZext
389 ; inserted. Not using it introduces the spills.
390
387391 ; GCN-LABEL: {{^}}v33i32_func_void:
392 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
393 ; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
394 ; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
395
388396 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
389 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}}
390 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}}
391 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}}
392 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}}
393 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}}
394 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}}
395 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}}
396 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}}
397 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}}
398 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}}
399 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}}
400 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}}
401 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}}
402 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}}
403 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}}
404 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}}
405 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}}
406 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}}
407 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}}
408 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}}
409 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}}
410 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}}
411 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}}
412 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}}
413 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}}
414 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}}
415 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}}
416 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}}
417 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}}
418 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}}
419 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}}
420 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}}
397
398 ; GCN-DAG: v_add_i32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
399 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}}
400
401 ; GCN-DAG: v_add_i32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
402 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}}
403
404 ; GCN-DAG: v_add_i32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
405 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}}
406
407 ; GCN-DAG: v_add_i32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
408 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}}
409
410 ; GCN-DAG: v_add_i32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
411 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}}
412
413 ; GCN-DAG: v_add_i32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
414 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}}
415
416 ; GCN-DAG: v_add_i32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
417 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}}
418
419 ; GCN-DAG: v_add_i32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
420 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}}
421
422 ; GCN-DAG: v_add_i32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
423 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}}
424
425 ; GCN-DAG: v_add_i32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
426 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}}
427
428 ; GCN-DAG: v_add_i32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
429 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}}
430
431 ; GCN-DAG: v_add_i32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
432 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}}
433
434 ; GCN-DAG: v_add_i32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
435 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}}
436
437 ; GCN-DAG: v_add_i32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
438 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}}
439
440 ; GCN-DAG: v_add_i32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
441 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}}
442
443 ; GCN-DAG: v_add_i32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
444 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}}
445
446 ; GCN-DAG: v_add_i32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
447 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}}
448
449 ; GCN-DAG: v_add_i32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
450 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}}
451
452 ; GCN-DAG: v_add_i32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
453 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}}
454
455 ; GCN-DAG: v_add_i32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
456 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}}
457
458 ; GCN-DAG: v_add_i32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
459 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}}
460
461 ; GCN-DAG: v_add_i32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
462 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}}
463
464 ; GCN-DAG: v_add_i32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
465 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}}
466
467 ; GCN-DAG: v_add_i32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
468 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}}
469
470 ; GCN-DAG: v_add_i32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
471 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}}
472
473 ; GCN-DAG: v_add_i32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
474 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}}
475
476 ; GCN-DAG: v_add_i32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
477 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}}
478
479 ; GCN-DAG: v_add_i32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
480 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}}
481
482 ; GCN-DAG: v_add_i32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
483 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}}
484
485 ; GCN-DAG: v_add_i32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
486 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}}
487
488 ; GCN-DAG: v_add_i32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
489 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}}
490
491 ; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
492 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
493
494 ; GCN: buffer_load_dword v34
495 ; GCN: buffer_load_dword v33
496 ; GCN: buffer_load_dword v32
421497 ; GCN: s_waitcnt vmcnt(0)
422498 ; GCN-NEXT: s_setpc_b64
423499 define <33 x i32> @v33i32_func_void() #0 {
427503 }
428504
429505 ; GCN-LABEL: {{^}}struct_v32i32_i32_func_void:
506 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
507 ; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
508 ; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
509
430510 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
431 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}}
432 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}}
433 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}}
434 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}}
435 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}}
436 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}}
437 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}}
438 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}}
439 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}}
440 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}}
441 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}}
442 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}}
443 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}}
444 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}}
445 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}}
446 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}}
447 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}}
448 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}}
449 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}}
450 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}}
451 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}}
452 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}}
453 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}}
454 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}}
455 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}}
456 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}}
457 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}}
458 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}}
459 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}}
460 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}}
461 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}}
462 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}}
511
512 ; GCN-DAG: v_add_i32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
513 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}}
514
515 ; GCN-DAG: v_add_i32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
516 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}}
517
518 ; GCN-DAG: v_add_i32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
519 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}}
520
521 ; GCN-DAG: v_add_i32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
522 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}}
523
524 ; GCN-DAG: v_add_i32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
525 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}}
526
527 ; GCN-DAG: v_add_i32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
528 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}}
529
530 ; GCN-DAG: v_add_i32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
531 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}}
532
533 ; GCN-DAG: v_add_i32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
534 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}}
535
536 ; GCN-DAG: v_add_i32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
537 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}}
538
539 ; GCN-DAG: v_add_i32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
540 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}}
541
542 ; GCN-DAG: v_add_i32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
543 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}}
544
545 ; GCN-DAG: v_add_i32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
546 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}}
547
548 ; GCN-DAG: v_add_i32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
549 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}}
550
551 ; GCN-DAG: v_add_i32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
552 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}}
553
554 ; GCN-DAG: v_add_i32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
555 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}}
556
557 ; GCN-DAG: v_add_i32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
558 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}}
559
560 ; GCN-DAG: v_add_i32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
561 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}}
562
563 ; GCN-DAG: v_add_i32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
564 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}}
565
566 ; GCN-DAG: v_add_i32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
567 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}}
568
569 ; GCN-DAG: v_add_i32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
570 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}}
571
572 ; GCN-DAG: v_add_i32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
573 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}}
574
575 ; GCN-DAG: v_add_i32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
576 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}}
577
578 ; GCN-DAG: v_add_i32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
579 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}}
580
581 ; GCN-DAG: v_add_i32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
582 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}}
583
584 ; GCN-DAG: v_add_i32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
585 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}}
586
587 ; GCN-DAG: v_add_i32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
588 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}}
589
590 ; GCN-DAG: v_add_i32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
591 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}}
592
593 ; GCN-DAG: v_add_i32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
594 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}}
595
596 ; GCN-DAG: v_add_i32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
597 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}}
598
599 ; GCN-DAG: v_add_i32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
600 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}}
601
602 ; GCN-DAG: v_add_i32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
603 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}}
604
605 ; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
606 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
607
608 ; GCN: buffer_load_dword v34
609 ; GCN: buffer_load_dword v33
610 ; GCN: buffer_load_dword v32
463611 ; GCN: s_waitcnt vmcnt(0)
464612 ; GCN-NEXT: s_setpc_b64
465613 define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
469617 }
470618
471619 ; GCN-LABEL: {{^}}struct_i32_v32i32_func_void:
620 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
621 ; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
622
472623 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
473 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}}
474 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:132{{$}}
475 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:136{{$}}
476 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:140{{$}}
477 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:144{{$}}
478 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:148{{$}}
479 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:152{{$}}
480 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:156{{$}}
481 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:160{{$}}
482 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:164{{$}}
483 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:168{{$}}
484 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:172{{$}}
485 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:176{{$}}
486 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:180{{$}}
487 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:184{{$}}
488 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:188{{$}}
489 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:192{{$}}
490 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:196{{$}}
491 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:200{{$}}
492 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:204{{$}}
493 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:208{{$}}
494 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:212{{$}}
495 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:216{{$}}
496 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:220{{$}}
497 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:224{{$}}
498 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:228{{$}}
499 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:232{{$}}
500 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:236{{$}}
501 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:240{{$}}
502 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:244{{$}}
503 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:248{{$}}
504 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:252{{$}}
624
625 ; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
626 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
627
628
629 ; GCN-DAG: v_add_i32_e32 [[ADD_256:v[0-9]+]], vcc, 0xfc, v0
630 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_256]], s[0:3], s4 offen{{$}}
631
632 ; GCN: buffer_load_dword v33
633 ; GCN: buffer_load_dword v32
505634 ; GCN: s_waitcnt vmcnt(0)
506635 ; GCN-NEXT: s_setpc_b64
507636 define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
0 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
1
2 ; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_small:
3 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
4 ; GCN-NOT: [[FI]]
5 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
6 define amdgpu_kernel void @scratch_buffer_known_high_bit_small() #0 {
7 %alloca = alloca i32, align 4
8 store volatile i32 0, i32* %alloca
9 %toint = ptrtoint i32* %alloca to i32
10 %masked = and i32 %toint, 2147483647
11 store volatile i32 %masked, i32 addrspace(1)* undef
12 ret void
13 }
14
15 ; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_huge:
16 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
17 ; GCN-DAG: buffer_store_dword
18 ; GCN-DAG: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x7ffffffc, [[FI]]
19 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
20 define amdgpu_kernel void @scratch_buffer_known_high_bit_huge() #1 {
21 %alloca = alloca i32, align 4
22 store volatile i32 0, i32* %alloca
23 %toint = ptrtoint i32* %alloca to i32
24 %masked = and i32 %toint, 2147483647
25 store volatile i32 %masked, i32 addrspace(1)* undef
26 ret void
27 }
28
29 attributes #0 = { nounwind }
30 attributes #1 = { nounwind "target-features"="+huge-private-buffer" }
294294
295295 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg:
296296 ; GCN: s_waitcnt
297 ; GFX9-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], s4 offen offset:4094{{$}}
298 ; GFX9-NEXT: s_waitcnt
299 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
300 ; GFX9-NEXT: s_waitcnt
301 ; GFX9-NEXT: s_setpc_b64
302
303 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
304 define void @load_private_hi_v2i16_reglo_vreg(i16* %in, i16 %reg) #0 {
305 entry:
306 %gep = getelementptr inbounds i16, i16* %in, i64 2047
297 ; GFX9: buffer_load_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
298 ; GFX9-NEXT: s_waitcnt
299 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
300 ; GFX9-NEXT: s_waitcnt
301 ; GFX9-NEXT: s_setpc_b64
302
303 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
304 define void @load_private_hi_v2i16_reglo_vreg(i16* byval %in, i16 %reg) #0 {
305 entry:
306 %gep = getelementptr inbounds i16, i16* %in, i64 2045
307307 %load = load i16, i16* %gep
308308 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
309309 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
313313
314314 ; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg:
315315 ; GCN: s_waitcnt
316 ; GFX9-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], s4 offen offset:4094{{$}}
317 ; GFX9-NEXT: s_waitcnt
318 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
319 ; GFX9-NEXT: s_waitcnt
320 ; GFX9-NEXT: s_setpc_b64
321
322 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
323 define void @load_private_hi_v2f16_reglo_vreg(half* %in, half %reg) #0 {
324 entry:
325 %gep = getelementptr inbounds half, half* %in, i64 2047
316 ; GFX9: buffer_load_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
317 ; GFX9-NEXT: s_waitcnt
318 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
319 ; GFX9-NEXT: s_waitcnt
320 ; GFX9-NEXT: s_setpc_b64
321
322 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
323 define void @load_private_hi_v2f16_reglo_vreg(half* byval %in, half %reg) #0 {
324 entry:
325 %gep = getelementptr inbounds half, half* %in, i64 2045
326326 %load = load half, half* %gep
327327 %build0 = insertelement <2 x half> undef, half %reg, i32 0
328328 %build1 = insertelement <2 x half> %build0, half %load, i32 1
332332
333333 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_nooff:
334334 ; GCN: s_waitcnt
335 ; GFX9-NEXT: buffer_load_short_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
336 ; GFX9-NEXT: s_waitcnt
337 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
335 ; GFX9: buffer_load_short_d16_hi v0, off, s[0:3], s4 offset:4094{{$}}
336 ; GFX9: s_waitcnt
337 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
338338 ; GFX9-NEXT: s_waitcnt
339339 ; GFX9-NEXT: s_setpc_b64
340340
341341 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
342 define void @load_private_hi_v2i16_reglo_vreg_nooff(i16* %in, i16 %reg) #0 {
342 define void @load_private_hi_v2i16_reglo_vreg_nooff(i16* byval %in, i16 %reg) #0 {
343343 entry:
344344 %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
345345 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
368368
369369 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_zexti8:
370370 ; GCN: s_waitcnt
371 ; GFX9-NEXT: buffer_load_ubyte_d16_hi v1, v0, s[0:3], s4 offen offset:2047{{$}}
372 ; GFX9-NEXT: s_waitcnt
373 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
374 ; GFX9-NEXT: s_waitcnt
375 ; GFX9-NEXT: s_setpc_b64
376
377 ; VI: buffer_load_ubyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}}
378 define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8* %in, i16 %reg) #0 {
379 entry:
380 %gep = getelementptr inbounds i8, i8* %in, i64 2047
371 ; GFX9: buffer_load_ubyte_d16_hi v0, off, s[0:3], s5 offset:4095{{$}}
372 ; GFX9-NEXT: s_waitcnt
373 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
374 ; GFX9-NEXT: s_waitcnt
375 ; GFX9-NEXT: s_setpc_b64
376
377 ; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
378 define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8* byval %in, i16 %reg) #0 {
379 entry:
380 %gep = getelementptr inbounds i8, i8* %in, i64 4091
381381 %load = load i8, i8* %gep
382382 %ext = zext i8 %load to i16
383383 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
388388
389389 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_sexti8:
390390 ; GCN: s_waitcnt
391 ; GFX9-NEXT: buffer_load_sbyte_d16_hi v1, v0, s[0:3], s4 offen offset:2047{{$}}
392 ; GFX9-NEXT: s_waitcnt
393 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
394 ; GFX9-NEXT: s_waitcnt
395 ; GFX9-NEXT: s_setpc_b64
396
397 ; VI: buffer_load_sbyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}}
398 define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8* %in, i16 %reg) #0 {
399 entry:
400 %gep = getelementptr inbounds i8, i8* %in, i64 2047
391 ; GFX9: buffer_load_sbyte_d16_hi v0, off, s[0:3], s5 offset:4095{{$}}
392 ; GFX9-NEXT: s_waitcnt
393 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
394 ; GFX9-NEXT: s_waitcnt
395 ; GFX9-NEXT: s_setpc_b64
396
397 ; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
398 define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8* byval %in, i16 %reg) #0 {
399 entry:
400 %gep = getelementptr inbounds i8, i8* %in, i64 4091
401401 %load = load i8, i8* %gep
402402 %ext = sext i8 %load to i16
403403 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
640640 ; FIXME: Is there a cost to using the extload over not?
641641 ; GCN-LABEL: {{^}}load_private_v2i16_split:
642642 ; GCN: s_waitcnt
643 ; GFX9-NEXT: buffer_load_ushort v1, v0, s[0:3], s4 offen{{$}}
644 ; GFX9-NEXT: s_waitcnt
645 ; GFX9-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], s4 offen offset:2
646 ; GFX9-NEXT: s_waitcnt
647 ; GFX9-NEXT: v_mov_b32_e32 v0, v1
648 ; GFX9-NEXT: s_setpc_b64
649 define <2 x i16> @load_private_v2i16_split(i16* %in) #0 {
643 ; GFX9: buffer_load_ushort v0, off, s[0:3], s5 offset:4{{$}}
644 ; GFX9-NEXT: s_waitcnt
645 ; GFX9-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], s5 offset:6
646 ; GFX9-NEXT: s_waitcnt
647 ; GFX9-NEXT: s_setpc_b64
648 define <2 x i16> @load_private_v2i16_split(i16* byval %in) #0 {
650649 entry:
651650 %gep = getelementptr inbounds i16, i16* %in, i32 1
652651 %load0 = load volatile i16, i16* %in
339339
340340 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg:
341341 ; GCN: s_waitcnt
342 ; GFX9-NEXT: buffer_load_short_d16 v1, v0, s[0:3], s4 offen offset:4094{{$}}
343 ; GFX9-NEXT: s_waitcnt
344 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
345 ; GFX9-NEXT: s_waitcnt
346 ; GFX9-NEXT: s_setpc_b64
347
348 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
349 define void @load_private_lo_v2i16_reglo_vreg(i16* %in, i32 %reg) #0 {
350 entry:
351 %reg.bc = bitcast i32 %reg to <2 x i16>
352 %gep = getelementptr inbounds i16, i16* %in, i64 2047
342 ; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}}
343 ; GFX9-NEXT: s_waitcnt
344 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
345 ; GFX9-NEXT: s_waitcnt
346 ; GFX9-NEXT: s_setpc_b64
347
348 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
349 define void @load_private_lo_v2i16_reglo_vreg(i16* byval %in, i32 %reg) #0 {
350 entry:
351 %reg.bc = bitcast i32 %reg to <2 x i16>
352 %gep = getelementptr inbounds i16, i16* %in, i64 2045
353353 %load = load i16, i16* %gep
354354 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
355355 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
358358
359359 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg:
360360 ; GCN: s_waitcnt
361 ; GFX9-NEXT: buffer_load_ushort v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen offset:4094{{$}}
361 ; GFX9: buffer_load_ushort v1, off, s[0:3], s5 offset:4094{{$}}
362362 ; GFX9-NEXT: s_waitcnt
363363 ; GFX9: v_and_b32
364364 ; GFX9: v_lshl_or_b32
367367 ; GFX9-NEXT: s_waitcnt
368368 ; GFX9-NEXT: s_setpc_b64
369369
370 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
371 define void @load_private_lo_v2i16_reghi_vreg(i16* %in, i16 %reg) #0 {
372 entry:
373 %gep = getelementptr inbounds i16, i16* %in, i64 2047
370 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
371 define void @load_private_lo_v2i16_reghi_vreg(i16* byval %in, i16 %reg) #0 {
372 entry:
373 %gep = getelementptr inbounds i16, i16* %in, i64 2045
374374 %load = load i16, i16* %gep
375375 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
376376 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
380380
381381 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg:
382382 ; GCN: s_waitcnt
383 ; GFX9-NEXT: buffer_load_short_d16 v1, v0, s[0:3], s4 offen offset:4094{{$}}
384 ; GFX9-NEXT: s_waitcnt
385 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
386 ; GFX9-NEXT: s_waitcnt
387 ; GFX9-NEXT: s_setpc_b64
388
389 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
390 define void @load_private_lo_v2f16_reglo_vreg(half* %in, i32 %reg) #0 {
383 ; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}}
384 ; GFX9-NEXT: s_waitcnt
385 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
386 ; GFX9-NEXT: s_waitcnt
387 ; GFX9-NEXT: s_setpc_b64
388
389 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
390 define void @load_private_lo_v2f16_reglo_vreg(half* byval %in, i32 %reg) #0 {
391391 entry:
392392 %reg.bc = bitcast i32 %reg to <2 x half>
393 %gep = getelementptr inbounds half, half* %in, i64 2047
393 %gep = getelementptr inbounds half, half* %in, i64 2045
394394 %load = load half, half* %gep
395395 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
396396 store <2 x half> %build1, <2 x half> addrspace(1)* undef
453453
454454 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8:
455455 ; GCN: s_waitcnt
456 ; GFX9-NEXT: buffer_load_ubyte_d16 v1, v0, s[0:3], s4 offen offset:2047{{$}}
457 ; GFX9-NEXT: s_waitcnt
458 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
459 ; GFX9-NEXT: s_waitcnt
460 ; GFX9-NEXT: s_setpc_b64
461
462 ; VI: buffer_load_ubyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}}
463 define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
464 entry:
465 %reg.bc = bitcast i32 %reg to <2 x i16>
466 %gep = getelementptr inbounds i8, i8* %in, i64 2047
456 ; GFX9: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095{{$}}
457 ; GFX9-NEXT: s_waitcnt
458 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
459 ; GFX9-NEXT: s_waitcnt
460 ; GFX9-NEXT: s_setpc_b64
461
462 ; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
463 define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8* byval %in, i32 %reg) #0 {
464 entry:
465 %reg.bc = bitcast i32 %reg to <2 x i16>
466 %gep = getelementptr inbounds i8, i8* %in, i64 4091
467467 %load = load i8, i8* %gep
468468 %ext = zext i8 %load to i16
469469 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
473473
474474 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8:
475475 ; GCN: s_waitcnt
476 ; GFX9-NEXT: buffer_load_sbyte_d16 v1, v0, s[0:3], s4 offen offset:2047{{$}}
477 ; GFX9-NEXT: s_waitcnt
478 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
479 ; GFX9-NEXT: s_waitcnt
480 ; GFX9-NEXT: s_setpc_b64
481
482 ; VI: buffer_load_sbyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}}
483 define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
484 entry:
485 %reg.bc = bitcast i32 %reg to <2 x i16>
486 %gep = getelementptr inbounds i8, i8* %in, i64 2047
476 ; GFX9: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095{{$}}
477 ; GFX9-NEXT: s_waitcnt
478 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
479 ; GFX9-NEXT: s_waitcnt
480 ; GFX9-NEXT: s_setpc_b64
481
482 ; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
483 define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8* byval %in, i32 %reg) #0 {
484 entry:
485 %reg.bc = bitcast i32 %reg to <2 x i16>
486 %gep = getelementptr inbounds i8, i8* %in, i64 4091
487487 %load = load i8, i8* %gep
488488 %ext = sext i8 %load to i16
489489 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
None ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN %s
0 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
22
33 ; When a frame index offset is more than 12-bits, make sure we don't store
44 ; it in mubuf's offset field.
8585 ret void
8686 }
8787
88 ; GCN-LABEL: {{^}}neg_vaddr_offset_inbounds:
89 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
90 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
91 define amdgpu_kernel void @neg_vaddr_offset_inbounds(i32 %offset) {
92 entry:
93 %array = alloca [8192 x i32]
94 %ptr_offset = add i32 %offset, 4
95 %ptr = getelementptr inbounds [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset
96 store i32 0, i32* %ptr
97 ret void
98 }
99
88100 ; GCN-LABEL: {{^}}neg_vaddr_offset:
89 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16{{$}}
101 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
102 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
90103 define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) {
91104 entry:
92105 %array = alloca [8192 x i32]
439439
440440 ; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
441441 ; GCN: s_waitcnt
442 ; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen offset:4094{{$}}
443
444 ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
445 ; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen offset:4094{{$}}
446
447 ; GCN-NEXT: s_waitcnt
448 ; GCN-NEXT: s_setpc_b64
449 define void @store_private_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
450 entry:
451 %value = bitcast i32 %arg to <2 x i16>
452 %hi = extractelement <2 x i16> %value, i32 1
453 %gep = getelementptr inbounds i16, i16* %out, i64 2047
442 ; GFX9: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
443
444 ; VI: v_lshrrev_b32_e32 v0, 16, v0
445 ; VI-NEXT: buffer_store_short v0, off, s[0:3], s5 offset:4094{{$}}
446
447 ; GCN-NEXT: s_waitcnt
448 ; GCN-NEXT: s_setpc_b64
449 define void @store_private_hi_v2i16_max_offset(i16* byval %out, i32 %arg) #0 {
450 entry:
451 %value = bitcast i32 %arg to <2 x i16>
452 %hi = extractelement <2 x i16> %value, i32 1
453 %gep = getelementptr inbounds i16, i16* %out, i64 2045
454454 store i16 %hi, i16* %gep
455455 ret void
456456 }