llvm.org GIT mirror llvm / 0f915c6
AMDGPU: Remove unnecessary IR from MIR tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307311 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 years ago
7 changed file(s) with 132 addition(s) and 377 deletion(s). Raw diff Collapse all Expand all
0 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
1 --- |
2 define amdgpu_ps void @v_max_self_clamp_not_set_f32() #0 {
3 ret void
4 }
5
6 define amdgpu_ps void @v_clamp_omod_already_set_f32() #0 {
7 ret void
8 }
9
10 define amdgpu_ps void @v_omod_mul_omod_already_set_f32() #0 {
11 ret void
12 }
13
14 define amdgpu_ps void @v_omod_mul_clamp_already_set_f32() #0 {
15 ret void
16 }
17
18 define amdgpu_ps void @v_omod_add_omod_already_set_f32() #0 {
19 ret void
20 }
21
22 define amdgpu_ps void @v_omod_add_clamp_already_set_f32() #0 {
23 ret void
24 }
25
26 define amdgpu_ps void @v_max_reg_imm_f32() #0 {
27 ret void
28 }
29
30 attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
31
32 ...
331 ---
342 # GCN-LABEL: name: v_max_self_clamp_not_set_f32
353 # GCN: %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec
6937 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
7038 - { reg: '%vgpr0', virtual-reg: '%3' }
7139 body: |
72 bb.0 (%ir-block.0):
40 bb.0:
7341 liveins: %sgpr0_sgpr1, %vgpr0
7442
7543 %3 = COPY %vgpr0
13199 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
132100 - { reg: '%vgpr0', virtual-reg: '%3' }
133101 body: |
134 bb.0 (%ir-block.0):
102 bb.0:
135103 liveins: %sgpr0_sgpr1, %vgpr0
136104
137105 %3 = COPY %vgpr0
194162 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
195163 - { reg: '%vgpr0', virtual-reg: '%3' }
196164 body: |
197 bb.0 (%ir-block.0):
165 bb.0:
198166 liveins: %sgpr0_sgpr1, %vgpr0
199167
200168 %3 = COPY %vgpr0
259227 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
260228 - { reg: '%vgpr0', virtual-reg: '%3' }
261229 body: |
262 bb.0 (%ir-block.0):
230 bb.0:
263231 liveins: %sgpr0_sgpr1, %vgpr0
264232
265233 %3 = COPY %vgpr0
336304 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
337305 - { reg: '%vgpr0', virtual-reg: '%3' }
338306 body: |
339 bb.0 (%ir-block.0):
307 bb.0:
340308 liveins: %sgpr0_sgpr1, %vgpr0
341309
342310 %3 = COPY %vgpr0
401369 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
402370 - { reg: '%vgpr0', virtual-reg: '%3' }
403371 body: |
404 bb.0 (%ir-block.0):
372 bb.0:
405373 liveins: %sgpr0_sgpr1, %vgpr0
406374
407375 %3 = COPY %vgpr0
434402 - { id: 0, class: vgpr_32 }
435403 - { id: 1, class: vgpr_32 }
436404 body: |
437 bb.0 (%ir-block.0):
405 bb.0:
438406 liveins: %vgpr0
439407
440408 %0 = COPY %vgpr0
0 # RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
1 --- |
2 define amdgpu_kernel void @s_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
3 %and = and i32 %a, 1234567
4 store volatile i32 %and, i32 addrspace(1)* %out
5 ret void
6 }
7
8 define amdgpu_kernel void @v_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
9 %tid = call i32 @llvm.amdgcn.workitem.id.x()
10 %idxprom = sext i32 %tid to i64
11 %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
12 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
13 %a = load i32, i32 addrspace(1)* %gep.a
14 %and = and i32 %a, 1234567
15 store i32 %and, i32 addrspace(1)* %gep.out
16 ret void
17 }
18
19 define amdgpu_kernel void @s_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
20 %shl = shl i32 %a, 12
21 store volatile i32 %shl, i32 addrspace(1)* %out
22 ret void
23 }
24
25 define amdgpu_kernel void @v_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
26 %tid = call i32 @llvm.amdgcn.workitem.id.x()
27 %idxprom = sext i32 %tid to i64
28 %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
29 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
30 %a = load i32, i32 addrspace(1)* %gep.a
31 %shl = shl i32 %a, 12
32 store i32 %shl, i32 addrspace(1)* %gep.out
33 ret void
34 }
35
36 define amdgpu_kernel void @s_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
37 %ashr = ashr i32 %a, 12
38 store volatile i32 %ashr, i32 addrspace(1)* %out
39 ret void
40 }
41
42 define amdgpu_kernel void @v_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
43 %tid = call i32 @llvm.amdgcn.workitem.id.x()
44 %idxprom = sext i32 %tid to i64
45 %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
46 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
47 %a = load i32, i32 addrspace(1)* %gep.a
48 %ashr = ashr i32 %a, 12
49 store i32 %ashr, i32 addrspace(1)* %gep.out
50 ret void
51 }
52
53 define amdgpu_kernel void @s_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
54 %lshr = lshr i32 %a, 12
55 store volatile i32 %lshr, i32 addrspace(1)* %out
56 ret void
57 }
58
59 define amdgpu_kernel void @v_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
60 %tid = call i32 @llvm.amdgcn.workitem.id.x()
61 %idxprom = sext i32 %tid to i64
62 %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
63 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
64 %a = load i32, i32 addrspace(1)* %gep.a
65 %lshr = lshr i32 %a, 12
66 store i32 %lshr, i32 addrspace(1)* %gep.out
67 ret void
68 }
69
70 define amdgpu_kernel void @undefined_vreg_operand() {
71 unreachable
72 }
73
74 declare i32 @llvm.amdgcn.workitem.id.x() #1
75
76 attributes #0 = { nounwind }
77 attributes #1 = { nounwind readnone }
78
79 ...
80 ---
1 ...
812
823 # GCN-LABEL: name: s_fold_and_imm_regimm_32{{$}}
834 # GCN: %10 = V_MOV_B32_e32 1543, implicit %exec
11839 hasVAStart: false
11940 hasMustTailInVarArgFunc: false
12041 body: |
121 bb.0 (%ir-block.0):
42 bb.0:
12243 liveins: %sgpr0_sgpr1
12344
12445 %0 = COPY %sgpr0_sgpr1
125 %1 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
46 %1 = S_LOAD_DWORDX2_IMM %0, 36, 0
12647 %2 = COPY %1.sub1
12748 %3 = COPY %1.sub0
12849 %4 = S_MOV_B32 61440
13253 %8 = S_MOV_B32 9999
13354 %9 = S_AND_B32 killed %7, killed %8, implicit-def dead %scc
13455 %10 = COPY %9
135 BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
56 BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit %exec
13657 S_ENDPGM
13758
13859 ...
203124 hasVAStart: false
204125 hasMustTailInVarArgFunc: false
205126 body: |
206 bb.0 (%ir-block.0):
127 bb.0:
207128 liveins: %sgpr0_sgpr1, %vgpr0
208129
209130 %3 = COPY %vgpr0
210131 %0 = COPY %sgpr0_sgpr1
211 %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
132 %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
212133 %31 = V_ASHRREV_I32_e64 31, %3, implicit %exec
213134 %32 = REG_SEQUENCE %3, 1, %31, 2
214135 %33 = V_LSHLREV_B64 2, killed %32, implicit %exec
222143 %34 = V_MOV_B32_e32 63, implicit %exec
223144
224145 %27 = V_AND_B32_e64 %26, %24, implicit %exec
225 FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
146 FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr
226147
227148 %28 = V_AND_B32_e64 %24, %26, implicit %exec
228 FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
149 FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr
229150
230151 %29 = V_AND_B32_e32 %26, %24, implicit %exec
231 FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
152 FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr
232153
233154 %30 = V_AND_B32_e64 %26, %26, implicit %exec
234 FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
155 FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr
235156
236157 %31 = V_AND_B32_e64 %34, %34, implicit %exec
237 FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
158 FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr
238159
239160 S_ENDPGM
240161
284205 hasVAStart: false
285206 hasMustTailInVarArgFunc: false
286207 body: |
287 bb.0 (%ir-block.0):
208 bb.0:
288209 liveins: %sgpr0_sgpr1
289210
290211 %0 = COPY %sgpr0_sgpr1
291 %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
212 %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
292213 %5 = S_MOV_B32 1
293214 %6 = COPY %4.sub1
294215 %7 = COPY %4.sub0
297218 %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
298219 %12 = S_LSHL_B32 killed %5, 12, implicit-def dead %scc
299220 %13 = COPY %12
300 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
221 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec
301222 S_ENDPGM
302223
303224 ...
389310 hasVAStart: false
390311 hasMustTailInVarArgFunc: false
391312 body: |
392 bb.0 (%ir-block.0):
313 bb.0:
393314 liveins: %sgpr0_sgpr1, %vgpr0
394315
395316 %2 = COPY %vgpr0
410331 %27 = S_MOV_B32 -4
411332
412333 %11 = V_LSHLREV_B32_e64 12, %10, implicit %exec
413 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
334 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr
414335
415336 %12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec
416 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
337 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr
417338
418339 %13 = V_LSHL_B32_e64 %7, 12, implicit %exec
419 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
340 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr
420341
421342 %14 = V_LSHL_B32_e64 12, %7, implicit %exec
422 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
343 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr
423344
424345 %15 = V_LSHL_B32_e64 12, %24, implicit %exec
425 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
346 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr
426347
427348 %22 = V_LSHL_B32_e64 %6, 12, implicit %exec
428 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
349 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr
429350
430351 %23 = V_LSHL_B32_e64 %6, 32, implicit %exec
431 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
352 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr
432353
433354 %25 = V_LSHL_B32_e32 %6, %6, implicit %exec
434 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
355 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr
435356
436357 %26 = V_LSHLREV_B32_e32 11, %24, implicit %exec
437 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
358 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr
438359
439360 %28 = V_LSHL_B32_e32 %27, %6, implicit %exec
440 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
361 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr
441362
442363 S_ENDPGM
443364
484405 hasVAStart: false
485406 hasMustTailInVarArgFunc: false
486407 body: |
487 bb.0 (%ir-block.0):
408 bb.0:
488409 liveins: %sgpr0_sgpr1
489410
490411 %0 = COPY %sgpr0_sgpr1
491 %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
412 %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
492413 %5 = S_MOV_B32 999123
493414 %6 = COPY %4.sub1
494415 %7 = COPY %4.sub0
497418 %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
498419 %12 = S_ASHR_I32 killed %5, 12, implicit-def dead %scc
499420 %13 = COPY %12
500 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
421 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec
501422 S_ENDPGM
502423
503424 ...
592513 hasVAStart: false
593514 hasMustTailInVarArgFunc: false
594515 body: |
595 bb.0 (%ir-block.0):
516 bb.0:
596517 liveins: %sgpr0_sgpr1, %vgpr0
597518
598519 %2 = COPY %vgpr0
599520 %0 = COPY %sgpr0_sgpr1
600 %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
521 %3 = S_LOAD_DWORDX2_IMM %0, 36, 0
601522 %15 = V_ASHRREV_I32_e64 31, %2, implicit %exec
602523 %16 = REG_SEQUENCE %2, 1, %15, 2
603524 %17 = V_LSHLREV_B64 2, killed %16, implicit %exec
618539 %35 = V_MOV_B32_e32 2, implicit %exec
619540
620541 %11 = V_ASHRREV_I32_e64 8, %10, implicit %exec
621 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
542 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr
622543
623544 %12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec
624 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
545 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr
625546
626547 %13 = V_ASHR_I32_e64 %7, 3, implicit %exec
627 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
548 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr
628549
629550 %14 = V_ASHR_I32_e64 7, %32, implicit %exec
630 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
551 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr
631552
632553 %15 = V_ASHR_I32_e64 %27, %24, implicit %exec
633 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
554 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr
634555
635556 %22 = V_ASHR_I32_e64 %6, 4, implicit %exec
636 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
557 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr
637558
638559 %23 = V_ASHR_I32_e64 %6, %33, implicit %exec
639 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
560 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr
640561
641562 %25 = V_ASHR_I32_e32 %34, %34, implicit %exec
642 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
563 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr
643564
644565 %26 = V_ASHRREV_I32_e32 11, %10, implicit %exec
645 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
566 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr
646567
647568 %28 = V_ASHR_I32_e32 %27, %35, implicit %exec
648 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
569 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr
649570
650571 S_ENDPGM
651572
692613 hasVAStart: false
693614 hasMustTailInVarArgFunc: false
694615 body: |
695 bb.0 (%ir-block.0):
616 bb.0:
696617 liveins: %sgpr0_sgpr1
697618
698619 %0 = COPY %sgpr0_sgpr1
699 %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
620 %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
700621 %5 = S_MOV_B32 -999123
701622 %6 = COPY %4.sub1
702623 %7 = COPY %4.sub0
705626 %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
706627 %12 = S_LSHR_B32 killed %5, 12, implicit-def dead %scc
707628 %13 = COPY %12
708 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
629 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec
709630 S_ENDPGM
710631
711632 ...
801722 hasVAStart: false
802723 hasMustTailInVarArgFunc: false
803724 body: |
804 bb.0 (%ir-block.0):
725 bb.0:
805726 liveins: %sgpr0_sgpr1, %vgpr0
806727
807728 %2 = COPY %vgpr0
808729 %0 = COPY %sgpr0_sgpr1
809 %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
730 %3 = S_LOAD_DWORDX2_IMM %0, 36, 0
810731 %15 = V_ASHRREV_I32_e64 31, %2, implicit %exec
811732 %16 = REG_SEQUENCE %2, 1, %15, 2
812733 %17 = V_LSHLREV_B64 2, killed %16, implicit %exec
827748 %35 = V_MOV_B32_e32 2, implicit %exec
828749
829750 %11 = V_LSHRREV_B32_e64 8, %10, implicit %exec
830 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
751 FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr
831752
832753 %12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec
833 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
754 FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr
834755
835756 %13 = V_LSHR_B32_e64 %7, 3, implicit %exec
836 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
757 FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr
837758
838759 %14 = V_LSHR_B32_e64 7, %32, implicit %exec
839 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
760 FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr
840761
841762 %15 = V_LSHR_B32_e64 %27, %24, implicit %exec
842 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
763 FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr
843764
844765 %22 = V_LSHR_B32_e64 %6, 4, implicit %exec
845 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
766 FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr
846767
847768 %23 = V_LSHR_B32_e64 %6, %33, implicit %exec
848 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
769 FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr
849770
850771 %25 = V_LSHR_B32_e32 %34, %34, implicit %exec
851 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
772 FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr
852773
853774 %26 = V_LSHRREV_B32_e32 11, %10, implicit %exec
854 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
775 FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr
855776
856777 %28 = V_LSHR_B32_e32 %27, %35, implicit %exec
857 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
778 FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr
858779
859780 S_ENDPGM
860781
0 # RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o - %s | FileCheck %s
1 --- |
2 define amdgpu_kernel void @test0() { ret void }
3 define amdgpu_kernel void @test1() { ret void }
4 define amdgpu_kernel void @test2() { ret void }
5 define amdgpu_kernel void @test3() { ret void }
6 define amdgpu_kernel void @test4() { ret void }
7 define amdgpu_kernel void @test5() { ret void }
8 define amdgpu_kernel void @loop0() { ret void }
9 define amdgpu_kernel void @loop1() { ret void }
10 define amdgpu_kernel void @loop2() { ret void }
111 ...
122 ---
133 # Combined use/def transfer check, the basics.
0 # RUN: llc -march=amdgcn -run-pass peephole-opt -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
1
2 --- |
3 define amdgpu_kernel void @no_fold_imm_madak_mac_clamp_f32() #0 {
4 ret void
5 }
6
7 define amdgpu_kernel void @no_fold_imm_madak_mac_omod_f32() #0 {
8 ret void
9 }
10
11 define amdgpu_kernel void @no_fold_imm_madak_mad_clamp_f32() #0 {
12 ret void
13 }
14
15 define amdgpu_kernel void @no_fold_imm_madak_mad_omod_f32() #0 {
16 ret void
17 }
18
19 attributes #0 = { nounwind }
20
21 ...
22 ---
1 ...
232 # GCN-LABEL: name: no_fold_imm_madak_mac_clamp_f32
243 # GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec
254 # GCN-NEXT: %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec
6140 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
6241 - { reg: '%vgpr0', virtual-reg: '%3' }
6342 body: |
64 bb.0 (%ir-block.0):
65 liveins: %sgpr0_sgpr1, %vgpr0
66
67 %3 = COPY %vgpr0
68 %0 = COPY %sgpr0_sgpr1
69 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
70 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
71 %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
43 bb.0:
44 liveins: %sgpr0_sgpr1, %vgpr0
45
46 %3 = COPY %vgpr0
47 %0 = COPY %sgpr0_sgpr1
48 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
49 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
50 %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
7251 %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec
7352 %28 = REG_SEQUENCE %3, 1, %27, 2
7453 %11 = S_MOV_B32 61440
132111 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
133112 - { reg: '%vgpr0', virtual-reg: '%3' }
134113 body: |
135 bb.0 (%ir-block.0):
136 liveins: %sgpr0_sgpr1, %vgpr0
137
138 %3 = COPY %vgpr0
139 %0 = COPY %sgpr0_sgpr1
140 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
141 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
142 %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
114 bb.0:
115 liveins: %sgpr0_sgpr1, %vgpr0
116
117 %3 = COPY %vgpr0
118 %0 = COPY %sgpr0_sgpr1
119 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
120 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
121 %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
143122 %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec
144123 %28 = REG_SEQUENCE %3, 1, %27, 2
145124 %11 = S_MOV_B32 61440
203182 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
204183 - { reg: '%vgpr0', virtual-reg: '%3' }
205184 body: |
206 bb.0 (%ir-block.0):
207 liveins: %sgpr0_sgpr1, %vgpr0
208
209 %3 = COPY %vgpr0
210 %0 = COPY %sgpr0_sgpr1
211 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
212 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
213 %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
185 bb.0:
186 liveins: %sgpr0_sgpr1, %vgpr0
187
188 %3 = COPY %vgpr0
189 %0 = COPY %sgpr0_sgpr1
190 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
191 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
192 %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
214193 %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec
215194 %28 = REG_SEQUENCE %3, 1, %27, 2
216195 %11 = S_MOV_B32 61440
274253 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
275254 - { reg: '%vgpr0', virtual-reg: '%3' }
276255 body: |
277 bb.0 (%ir-block.0):
278 liveins: %sgpr0_sgpr1, %vgpr0
279
280 %3 = COPY %vgpr0
281 %0 = COPY %sgpr0_sgpr1
282 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
283 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
284 %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
256 bb.0:
257 liveins: %sgpr0_sgpr1, %vgpr0
258
259 %3 = COPY %vgpr0
260 %0 = COPY %sgpr0_sgpr1
261 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
262 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
263 %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
285264 %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec
286265 %28 = REG_SEQUENCE %3, 1, %27, 2
287266 %11 = S_MOV_B32 61440
0 # RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s
1
2 --- |
3 define amdgpu_kernel void @mov_in_use_list_2x() {
4 unreachable
5 }
6
71 ...
82 ---
93
55 # that the post-RA run does manage to shrink it, but right now the
66 # resume crashes
77
8 --- |
9 define amdgpu_kernel void @shrink_add_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
10 %tid = call i32 @llvm.amdgcn.workitem.id.x()
11 %tid.ext = sext i32 %tid to i64
12 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
13 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
14 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
15 %a = load volatile i32, i32 addrspace(1)* %a.ptr
16 %b = load volatile i32, i32 addrspace(1)* %b.ptr
17 %result = add i32 %a, %b
18 store volatile i32 %result, i32 addrspace(1)* %out.gep
19 ret void
20 }
21
22 define amdgpu_kernel void @shrink_sub_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
23 %tid = call i32 @llvm.amdgcn.workitem.id.x()
24 %tid.ext = sext i32 %tid to i64
25 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
26 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
27 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
28 %a = load volatile i32, i32 addrspace(1)* %a.ptr
29 %b = load volatile i32, i32 addrspace(1)* %b.ptr
30 %result = sub i32 %a, %b
31 store volatile i32 %result, i32 addrspace(1)* %out.gep
32 ret void
33 }
34
35 define amdgpu_kernel void @shrink_subrev_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
36 %tid = call i32 @llvm.amdgcn.workitem.id.x()
37 %tid.ext = sext i32 %tid to i64
38 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
39 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
40 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
41 %a = load volatile i32, i32 addrspace(1)* %a.ptr
42 %b = load volatile i32, i32 addrspace(1)* %b.ptr
43 %result = sub i32 %a, %b
44 store volatile i32 %result, i32 addrspace(1)* %out.gep
45 ret void
46 }
47
48 define amdgpu_kernel void @check_addc_src2_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
49 %tid = call i32 @llvm.amdgcn.workitem.id.x()
50 %tid.ext = sext i32 %tid to i64
51 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
52 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
53 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
54 %a = load volatile i32, i32 addrspace(1)* %a.ptr
55 %b = load volatile i32, i32 addrspace(1)* %b.ptr
56 %result = add i32 %a, %b
57 store volatile i32 %result, i32 addrspace(1)* %out.gep
58 ret void
59 }
60
61 define amdgpu_kernel void @shrink_addc_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
62 %tid = call i32 @llvm.amdgcn.workitem.id.x()
63 %tid.ext = sext i32 %tid to i64
64 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
65 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
66 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
67 %a = load volatile i32, i32 addrspace(1)* %a.ptr
68 %b = load volatile i32, i32 addrspace(1)* %b.ptr
69 %result = add i32 %a, %b
70 store volatile i32 %result, i32 addrspace(1)* %out.gep
71 ret void
72 }
73
74 define amdgpu_kernel void @shrink_addc_undef_vcc(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
75 %tid = call i32 @llvm.amdgcn.workitem.id.x()
76 %tid.ext = sext i32 %tid to i64
77 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
78 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
79 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
80 %a = load volatile i32, i32 addrspace(1)* %a.ptr
81 %b = load volatile i32, i32 addrspace(1)* %b.ptr
82 %result = add i32 %a, %b
83 store volatile i32 %result, i32 addrspace(1)* %out.gep
84 ret void
85 }
86
87 declare i32 @llvm.amdgcn.workitem.id.x() #1
88
89 attributes #0 = { nounwind }
90 attributes #1 = { nounwind readnone }
91
92 ...
93 ---
8 ...
949 # GCN-LABEL: name: shrink_add_vop3{{$}}
9510 # GCN: %29, %9 = V_ADD_I32_e64 %19, %17, implicit %exec
9611 # GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
15065 hasVAStart: false
15166 hasMustTailInVarArgFunc: false
15267 body: |
153 bb.0 (%ir-block.0):
68 bb.0:
15469 liveins: %sgpr0_sgpr1, %vgpr0
15570
15671 %3 = COPY %vgpr0
15772 %0 = COPY %sgpr0_sgpr1
158 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
159 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
73 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
74 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
16075 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
16176 %27 = REG_SEQUENCE %3, 1, %26, 2
16277 %10 = S_MOV_B32 61440
16580 %13 = REG_SEQUENCE killed %5, 17, %12, 18
16681 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
16782 %16 = REG_SEQUENCE killed %4, 17, %12, 18
168 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
169 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
83 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec
84 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec
17085 %29, %9 = V_ADD_I32_e64 %19, %17, implicit %exec
17186 %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
172 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
87 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec
17388 S_ENDPGM
17489
17590 ...
234149 hasVAStart: false
235150 hasMustTailInVarArgFunc: false
236151 body: |
237 bb.0 (%ir-block.0):
152 bb.0:
238153 liveins: %sgpr0_sgpr1, %vgpr0
239154
240155 %3 = COPY %vgpr0
241156 %0 = COPY %sgpr0_sgpr1
242 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
243 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
157 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
158 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
244159 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
245160 %27 = REG_SEQUENCE %3, 1, %26, 2
246161 %10 = S_MOV_B32 61440
249164 %13 = REG_SEQUENCE killed %5, 17, %12, 18
250165 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
251166 %16 = REG_SEQUENCE killed %4, 17, %12, 18
252 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
253 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
167 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec
168 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec
254169 %29, %9 = V_SUB_I32_e64 %19, %17, implicit %exec
255170 %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
256 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
171 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec
257172 S_ENDPGM
258173
259174 ...
318233 hasVAStart: false
319234 hasMustTailInVarArgFunc: false
320235 body: |
321 bb.0 (%ir-block.0):
236 bb.0:
322237 liveins: %sgpr0_sgpr1, %vgpr0
323238
324239 %3 = COPY %vgpr0
325240 %0 = COPY %sgpr0_sgpr1
326 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
327 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
241 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
242 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
328243 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
329244 %27 = REG_SEQUENCE %3, 1, %26, 2
330245 %10 = S_MOV_B32 61440
333248 %13 = REG_SEQUENCE killed %5, 17, %12, 18
334249 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
335250 %16 = REG_SEQUENCE killed %4, 17, %12, 18
336 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
337 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
251 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec
252 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec
338253 %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit %exec
339254 %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
340 BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
255 BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec
341256 S_ENDPGM
342257
343258 ...
401316 hasVAStart: false
402317 hasMustTailInVarArgFunc: false
403318 body: |
404 bb.0 (%ir-block.0):
319 bb.0:
405320 liveins: %sgpr0_sgpr1, %vgpr0
406321
407322 %3 = COPY %vgpr0
408323 %0 = COPY %sgpr0_sgpr1
409 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
410 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
324 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
325 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
411326 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
412327 %27 = REG_SEQUENCE %3, 1, %26, 2
413328 %10 = S_MOV_B32 61440
416331 %13 = REG_SEQUENCE killed %5, 17, %12, 18
417332 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
418333 %16 = REG_SEQUENCE killed %4, 17, %12, 18
419 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
420 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
334 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec
335 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec
421336 %9 = S_MOV_B64 0
422337 %29, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec
423338 %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
424 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
339 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec
425340 S_ENDPGM
426341
427342 ...
486401 hasVAStart: false
487402 hasMustTailInVarArgFunc: false
488403 body: |
489 bb.0 (%ir-block.0):
404 bb.0:
490405 liveins: %sgpr0_sgpr1, %vgpr0
491406
492407 %3 = COPY %vgpr0
493408 %0 = COPY %sgpr0_sgpr1
494 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
495 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
409 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
410 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
496411 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
497412 %27 = REG_SEQUENCE %3, 1, %26, 2
498413 %10 = S_MOV_B32 61440
501416 %13 = REG_SEQUENCE killed %5, 17, %12, 18
502417 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
503418 %16 = REG_SEQUENCE killed %4, 17, %12, 18
504 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
505 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
419 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec
420 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec
506421 %vcc = S_MOV_B64 0
507422 %29, %vcc = V_ADDC_U32_e64 %19, %17, %vcc, implicit %exec
508423 %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
509 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
424 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec
510425 S_ENDPGM
511426
512427 ...
571486 hasVAStart: false
572487 hasMustTailInVarArgFunc: false
573488 body: |
574 bb.0 (%ir-block.0):
489 bb.0:
575490 liveins: %sgpr0_sgpr1, %vgpr0
576491
577492 %3 = COPY %vgpr0
578493 %0 = COPY %sgpr0_sgpr1
579 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
580 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
494 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
495 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
581496 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
582497 %27 = REG_SEQUENCE %3, 1, %26, 2
583498 %10 = S_MOV_B32 61440
586501 %13 = REG_SEQUENCE killed %5, 17, %12, 18
587502 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
588503 %16 = REG_SEQUENCE killed %4, 17, %12, 18
589 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
590 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
504 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec
505 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec
591506 %29, %vcc = V_ADDC_U32_e64 %19, %17, undef %vcc, implicit %exec
592507 %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
593 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
508 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec
594509 S_ENDPGM
595510
596511 ...
0 # RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
11
2 --- |
3 define float @waitcnt-permute(i32 %x, i32 %y) {
4 entry:
5 %0 = call i32 @llvm.amdgcn.ds.bpermute(i32 %x, i32 %y)
6 %1 = bitcast i32 %0 to float
7 %2 = fadd float 1.000000e+00, %1
8 ret float %2
9 }
10
11 declare i32 @llvm.amdgcn.ds.bpermute(i32, i32)
12
132 ...
14 ---
153 # CHECK-LABEL: name: waitcnt-permute{{$}}
164 # CHECK: DS_BPERMUTE_B32
175 # CHECK-NEXT: S_WAITCNT 127