llvm.org GIT mirror llvm / eaf2a8e
AMDGPU: Disable stack realignment for kernels This is something of a workaround, and the state of stack realignment controls is kind of a mess. Ideally, we would be able to specify the stack is infinitely aligned on entry to a kernel. TargetFrameLowering provides multiple controls which apply at different points. The StackRealignable field is used during SelectionDAG, and for some reason distinct from this hook. StackAlignment is a single field not dependent on the function. It would probably be better to make that dependent on the calling convention, and the maximum value for kernels. Currently this doesn't really change anything, since the frame lowering mostly does its own thing. This helps avoid regressions in a future change which will rely more heavily on hasFP. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362447 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 months ago
4 changed file(s) with 334 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
242242 }
243243
244244 return Reserved;
245 }
246
247 bool SIRegisterInfo::canRealignStack(const MachineFunction &MF) const {
248 const SIMachineFunctionInfo *Info = MF.getInfo();
249 // On entry, the base address is 0, so it can't possibly need any more
250 // alignment.
251
252 // FIXME: Should be able to specify the entry frame alignment per calling
253 // convention instead.
254 if (Info->isEntryFunction())
255 return false;
256
257 return TargetRegisterInfo::canRealignStack(MF);
245258 }
246259
247260 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
7272
7373 unsigned getFrameRegister(const MachineFunction &MF) const override;
7474
75 bool canRealignStack(const MachineFunction &MF) const override;
7576 bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
7677
7778 bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3
4 ; Make sure the stack is never realigned for entry functions.
5
6 define amdgpu_kernel void @max_alignment_128() #0 {
7 ; VI-LABEL: max_alignment_128:
8 ; VI: ; %bb.0:
9 ; VI-NEXT: s_add_u32 s4, s4, s7
10 ; VI-NEXT: v_mov_b32_e32 v0, 9
11 ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5
12 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
13 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:128
14 ; VI-NEXT: s_endpgm
15 ; VI-NEXT: .section .rodata,#alloc
16 ; VI-NEXT: .p2align 6
17 ; VI-NEXT: .amdhsa_kernel max_alignment_128
18 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
19 ; VI-NEXT: .amdhsa_private_segment_fixed_size 256
20 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
21 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
22 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
23 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
24 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0
25 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
26 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
27 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
28 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
29 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
30 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
31 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
32 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0
33 ; VI-NEXT: .amdhsa_next_free_vgpr 1
34 ; VI-NEXT: .amdhsa_next_free_sgpr 8
35 ; VI-NEXT: .amdhsa_reserve_vcc 0
36 ; VI-NEXT: .amdhsa_float_round_mode_32 0
37 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
38 ; VI-NEXT: .amdhsa_float_denorm_mode_32 0
39 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
40 ; VI-NEXT: .amdhsa_dx10_clamp 1
41 ; VI-NEXT: .amdhsa_ieee_mode 1
42 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
43 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
44 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
45 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
46 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
47 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
48 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
49 ; VI-NEXT: .end_amdhsa_kernel
50 ; VI-NEXT: .text
51 ;
52 ; GFX9-LABEL: max_alignment_128:
53 ; GFX9: ; %bb.0:
54 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7
55 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
56 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
57 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:128
58 ; GFX9-NEXT: s_endpgm
59 ; GFX9-NEXT: .section .rodata,#alloc
60 ; GFX9-NEXT: .p2align 6
61 ; GFX9-NEXT: .amdhsa_kernel max_alignment_128
62 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
63 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256
64 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
65 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
66 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
67 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
68 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0
69 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
70 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
71 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
72 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
73 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
74 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
75 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
76 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0
77 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
78 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8
79 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
80 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
81 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
82 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0
83 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
84 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
85 ; GFX9-NEXT: .amdhsa_ieee_mode 1
86 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
87 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
88 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
89 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
90 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
91 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
92 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
93 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
94 ; GFX9-NEXT: .end_amdhsa_kernel
95 ; GFX9-NEXT: .text
96 %alloca.align = alloca i32, align 128, addrspace(5)
97 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
98 ret void
99 }
100
101 define amdgpu_kernel void @stackrealign_attr() #1 {
102 ; VI-LABEL: stackrealign_attr:
103 ; VI: ; %bb.0:
104 ; VI-NEXT: s_add_u32 s4, s4, s7
105 ; VI-NEXT: v_mov_b32_e32 v0, 9
106 ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5
107 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
108 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4
109 ; VI-NEXT: s_endpgm
110 ; VI-NEXT: .section .rodata,#alloc
111 ; VI-NEXT: .p2align 6
112 ; VI-NEXT: .amdhsa_kernel stackrealign_attr
113 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
114 ; VI-NEXT: .amdhsa_private_segment_fixed_size 8
115 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
116 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
117 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
118 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
119 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0
120 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
121 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
122 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
123 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
124 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
125 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
126 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
127 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0
128 ; VI-NEXT: .amdhsa_next_free_vgpr 1
129 ; VI-NEXT: .amdhsa_next_free_sgpr 8
130 ; VI-NEXT: .amdhsa_reserve_vcc 0
131 ; VI-NEXT: .amdhsa_float_round_mode_32 0
132 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
133 ; VI-NEXT: .amdhsa_float_denorm_mode_32 0
134 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
135 ; VI-NEXT: .amdhsa_dx10_clamp 1
136 ; VI-NEXT: .amdhsa_ieee_mode 1
137 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
138 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
139 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
140 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
141 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
142 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
143 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
144 ; VI-NEXT: .end_amdhsa_kernel
145 ; VI-NEXT: .text
146 ;
147 ; GFX9-LABEL: stackrealign_attr:
148 ; GFX9: ; %bb.0:
149 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7
150 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
151 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
152 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4
153 ; GFX9-NEXT: s_endpgm
154 ; GFX9-NEXT: .section .rodata,#alloc
155 ; GFX9-NEXT: .p2align 6
156 ; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
157 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
158 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8
159 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
160 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
161 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
162 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
163 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0
164 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
165 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
166 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
167 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
168 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
169 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
170 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
171 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0
172 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
173 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8
174 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
175 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
176 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
177 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0
178 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
179 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
180 ; GFX9-NEXT: .amdhsa_ieee_mode 1
181 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
182 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
183 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
184 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
185 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
186 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
187 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
188 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
189 ; GFX9-NEXT: .end_amdhsa_kernel
190 ; GFX9-NEXT: .text
191 %alloca.align = alloca i32, align 4, addrspace(5)
192 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
193 ret void
194 }
195
196 define amdgpu_kernel void @alignstack_attr() #2 {
197 ; VI-LABEL: alignstack_attr:
198 ; VI: ; %bb.0:
199 ; VI-NEXT: s_add_u32 s4, s4, s7
200 ; VI-NEXT: v_mov_b32_e32 v0, 9
201 ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5
202 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
203 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4
204 ; VI-NEXT: s_endpgm
205 ; VI-NEXT: .section .rodata,#alloc
206 ; VI-NEXT: .p2align 6
207 ; VI-NEXT: .amdhsa_kernel alignstack_attr
208 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0
209 ; VI-NEXT: .amdhsa_private_segment_fixed_size 128
210 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
211 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
212 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
213 ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
214 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0
215 ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
216 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0
217 ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
218 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
219 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
220 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
221 ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0
222 ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0
223 ; VI-NEXT: .amdhsa_next_free_vgpr 1
224 ; VI-NEXT: .amdhsa_next_free_sgpr 8
225 ; VI-NEXT: .amdhsa_reserve_vcc 0
226 ; VI-NEXT: .amdhsa_float_round_mode_32 0
227 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0
228 ; VI-NEXT: .amdhsa_float_denorm_mode_32 0
229 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3
230 ; VI-NEXT: .amdhsa_dx10_clamp 1
231 ; VI-NEXT: .amdhsa_ieee_mode 1
232 ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
233 ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0
234 ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
235 ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0
236 ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0
237 ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0
238 ; VI-NEXT: .amdhsa_exception_int_div_zero 0
239 ; VI-NEXT: .end_amdhsa_kernel
240 ; VI-NEXT: .text
241 ;
242 ; GFX9-LABEL: alignstack_attr:
243 ; GFX9: ; %bb.0:
244 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7
245 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
246 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
247 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4
248 ; GFX9-NEXT: s_endpgm
249 ; GFX9-NEXT: .section .rodata,#alloc
250 ; GFX9-NEXT: .p2align 6
251 ; GFX9-NEXT: .amdhsa_kernel alignstack_attr
252 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
253 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128
254 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
255 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
256 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
257 ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
258 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0
259 ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
260 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0
261 ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
262 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
263 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
264 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
265 ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0
266 ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0
267 ; GFX9-NEXT: .amdhsa_next_free_vgpr 1
268 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8
269 ; GFX9-NEXT: .amdhsa_reserve_vcc 0
270 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0
271 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
272 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0
273 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3
274 ; GFX9-NEXT: .amdhsa_dx10_clamp 1
275 ; GFX9-NEXT: .amdhsa_ieee_mode 1
276 ; GFX9-NEXT: .amdhsa_fp16_overflow 0
277 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
278 ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0
279 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
280 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0
281 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0
282 ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0
283 ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0
284 ; GFX9-NEXT: .end_amdhsa_kernel
285 ; GFX9-NEXT: .text
286 %alloca.align = alloca i32, align 4, addrspace(5)
287 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
288 ret void
289 }
290
291 attributes #0 = { nounwind }
292 attributes #1 = { nounwind "stackrealign" }
293 attributes #2 = { nounwind alignstack=128 }
119119 ret void
120120 }
121121
122 ; GCN-LABEL: {{^}}default_realign_align128:
123 ; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0
124 ; GCN-NEXT: s_and_b32 s5, [[TMP]], 0xffffe000
125 ; GCN-NEXT: s_add_u32 s32, s32, 0x6000
126 ; GCN-NOT: s5
127 ; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:128
128 ; GCN: s_sub_u32 s32, s32, 0x6000
129 define void @default_realign_align128(i32 %idx) #0 {
130 %alloca.align = alloca i32, align 128, addrspace(5)
131 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
132 ret void
133 }
134
135 ; GCN-LABEL: {{^}}disable_realign_align128:
136 ; GCN-NOT: s32
137 ; GCN: s_mov_b32 s5, s32
138 ; GCN-NOT: s32
139 ; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:16
140 ; GCN-NOT: s32
141 define void @disable_realign_align128(i32 %idx) #3 {
142 %alloca.align = alloca i32, align 128, addrspace(5)
143 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
144 ret void
145 }
146
122147 attributes #0 = { noinline nounwind }
123148 attributes #1 = { noinline nounwind "stackrealign" }
124149 attributes #2 = { noinline nounwind alignstack=4 }
150 attributes #3 = { noinline nounwind "no-realign-stack" }