llvm.org GIT mirror llvm / 982faf2
AMDGPU: Use i64 scalar compare instructions VI added eq/ne for i64, so use them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281800 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 years ago
7 changed file(s) with 422 addition(s) and 174 deletion(s). Raw diff Collapse all Expand all
3636 //===----------------------------------------------------------------------===//
3737
3838 namespace {
39
40 static bool isCBranchSCC(const SDNode *N) {
41 assert(N->getOpcode() == ISD::BRCOND);
42 if (!N->hasOneUse())
43 return false;
44
45 SDValue Cond = N->getOperand(1);
46 if (Cond.getOpcode() == ISD::CopyToReg)
47 Cond = Cond.getOperand(2);
48 return Cond.getOpcode() == ISD::SETCC &&
49 Cond.getOperand(0).getValueType() == MVT::i32 && Cond.hasOneUse();
50 }
5139
5240 /// AMDGPU specific code to select AMDGPU machine instructions for
5341 /// SelectionDAG operations.
149137 uint32_t Offset, uint32_t Width);
150138 void SelectS_BFEFromShifts(SDNode *N);
151139 void SelectS_BFE(SDNode *N);
140 bool isCBranchSCC(const SDNode *N) const;
152141 void SelectBRCOND(SDNode *N);
153142 void SelectATOMIC_CMP_SWAP(SDNode *N);
154143
13361325 SelectCode(N);
13371326 }
13381327
1328 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1329 assert(N->getOpcode() == ISD::BRCOND);
1330 if (!N->hasOneUse())
1331 return false;
1332
1333 SDValue Cond = N->getOperand(1);
1334 if (Cond.getOpcode() == ISD::CopyToReg)
1335 Cond = Cond.getOperand(2);
1336
1337 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1338 return false;
1339
1340 MVT VT = Cond.getOperand(0).getSimpleValueType();
1341 if (VT == MVT::i32)
1342 return true;
1343
1344 if (VT == MVT::i64) {
1345 auto ST = static_cast(Subtarget);
1346
1347 ISD::CondCode CC = cast(Cond.getOperand(2))->get();
1348 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1349 }
1350
1351 return false;
1352 }
1353
13391354 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
13401355 SDValue Cond = N->getOperand(1);
13411356
487487 return Has16BitInsts;
488488 }
489489
490 bool hasScalarCompareEq64() const {
491 return getGeneration() >= VOLCANIC_ISLANDS;
492 }
493
490494 bool enableSIScheduler() const {
491495 return EnableSIScheduler;
492496 }
19291929 case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
19301930 case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
19311931 case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
1932 case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
1933 case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
19321934 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
19331935 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
19341936 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
629629 let isCommutable = 1;
630630 }
631631
632 class SOPC_CMP_64 op, string opName,
633 PatLeaf cond = COND_NULL, string revOp = opName>
634 : SOPC_Helper,
635 Commutable_REV {
636 let isCompare = 1;
637 let isCommutable = 1;
638 }
639
632640 class SOPC_32 op, string opName, list pattern = []>
633641 : SOPC_Base;
634642
654662 def S_BITCMP1_B64 : SOPC_64_32 <0x0f, "s_bitcmp1_b64">;
655663 def S_SETVSKIP : SOPC_32 <0x10, "s_setvskip">;
656664
665 let SubtargetPredicate = isVI in {
666 def S_CMP_EQ_U64 : SOPC_CMP_64 <0x12, "s_cmp_eq_u64", COND_EQ>;
667 def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>;
668 }
657669
658670 //===----------------------------------------------------------------------===//
659671 // SOPP Instructions
568568 ret void
569569 }
570570
571 ; GCN-LABEL: {{^}}br_scc_eq_i64_inline_imm:
572 ; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 4
573
574 ; SI: v_cmp_eq_i64_e64
575 define void @br_scc_eq_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 {
576 entry:
577 %cmp0 = icmp eq i64 %cond, 4
578 br i1 %cmp0, label %endif, label %if
579
580 if:
581 call void asm sideeffect "", ""()
582 br label %endif
583
584 endif:
585 store volatile i32 1, i32 addrspace(1)* %out
586 ret void
587 }
588
589 ; GCN-LABEL: {{^}}br_scc_eq_i64_simm16:
590 ; VI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x4d2
591 ; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0
592 ; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
593
594 ; SI: v_cmp_eq_i64_e32
595 define void @br_scc_eq_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
596 entry:
597 %cmp0 = icmp eq i64 %cond, 1234
598 br i1 %cmp0, label %endif, label %if
599
600 if:
601 call void asm sideeffect "", ""()
602 br label %endif
603
604 endif:
605 store volatile i32 1, i32 addrspace(1)* %out
606 ret void
607 }
608
609 ; GCN-LABEL: {{^}}br_scc_ne_i64_inline_imm:
610 ; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 4
611
612 ; SI: v_cmp_ne_i64_e64
613 define void @br_scc_ne_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 {
614 entry:
615 %cmp0 = icmp ne i64 %cond, 4
616 br i1 %cmp0, label %endif, label %if
617
618 if:
619 call void asm sideeffect "", ""()
620 br label %endif
621
622 endif:
623 store volatile i32 1, i32 addrspace(1)* %out
624 ret void
625 }
626
627 ; GCN-LABEL: {{^}}br_scc_ne_i64_simm16:
628 ; VI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x4d2
629 ; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0
630 ; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
631
632 ; SI: v_cmp_ne_i64_e32
633 define void @br_scc_ne_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
634 entry:
635 %cmp0 = icmp ne i64 %cond, 1234
636 br i1 %cmp0, label %endif, label %if
637
638 if:
639 call void asm sideeffect "", ""()
640 br label %endif
641
642 endif:
643 store volatile i32 1, i32 addrspace(1)* %out
644 ret void
645 }
646
571647 attributes #0 = { nounwind }
572648 attributes #1 = { nounwind readnone }
None ; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
1 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
2
3 ; SI-LABEL: {{^}}uniform_if_scc:
4 ; SI-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0
5 ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
6 ; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
7
8 ; Fall-through to the else
9 ; SI: v_mov_b32_e32 [[STORE_VAL]], 1
10
11 ; SI: [[IF_LABEL]]:
12 ; SI: buffer_store_dword [[STORE_VAL]]
0 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2
3 ; GCN-LABEL: {{^}}uniform_if_scc:
4 ; GCN-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0
5 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
6 ; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
7
8 ; Fall-through to the else
9 ; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
10
11 ; GCN: [[IF_LABEL]]:
12 ; GCN: buffer_store_dword [[STORE_VAL]]
1313 define void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) {
1414 entry:
1515 %cmp0 = icmp eq i32 %cond, 0
2727 ret void
2828 }
2929
30 ; SI-LABEL: {{^}}uniform_if_vcc:
30 ; GCN-LABEL: {{^}}uniform_if_vcc:
3131 ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
3232 ; also scheduled the write first.
33 ; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
34 ; SI-DAG: s_and_b64 vcc, exec, [[COND]]
35 ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
36 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
37
38 ; Fall-through to the else
39 ; SI: v_mov_b32_e32 [[STORE_VAL]], 1
40
41 ; SI: [[IF_LABEL]]:
42 ; SI: buffer_store_dword [[STORE_VAL]]
33 ; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
34 ; GCN-DAG: s_and_b64 vcc, exec, [[COND]]
35 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
36 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
37
38 ; Fall-through to the else
39 ; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
40
41 ; GCN: [[IF_LABEL]]:
42 ; GCN: buffer_store_dword [[STORE_VAL]]
4343 define void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) {
4444 entry:
4545 %cmp0 = fcmp oeq float %cond, 0.0
5757 ret void
5858 }
5959
60 ; SI-LABEL: {{^}}uniform_if_swap_br_targets_scc:
61 ; SI-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0
62 ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
63 ; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
64
65 ; Fall-through to the else
66 ; SI: v_mov_b32_e32 [[STORE_VAL]], 1
67
68 ; SI: [[IF_LABEL]]:
69 ; SI: buffer_store_dword [[STORE_VAL]]
60 ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc:
61 ; GCN-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0
62 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
63 ; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
64
65 ; Fall-through to the else
66 ; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
67
68 ; GCN: [[IF_LABEL]]:
69 ; GCN: buffer_store_dword [[STORE_VAL]]
7070 define void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) {
7171 entry:
7272 %cmp0 = icmp eq i32 %cond, 0
8484 ret void
8585 }
8686
87 ; SI-LABEL: {{^}}uniform_if_swap_br_targets_vcc:
87 ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_vcc:
8888 ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
8989 ; also scheduled the write first.
90 ; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
91 ; SI-DAG: s_and_b64 vcc, exec, [[COND]]
92 ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
93 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
94
95 ; Fall-through to the else
96 ; SI: v_mov_b32_e32 [[STORE_VAL]], 1
97
98 ; SI: [[IF_LABEL]]:
99 ; SI: buffer_store_dword [[STORE_VAL]]
90 ; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
91 ; GCN-DAG: s_and_b64 vcc, exec, [[COND]]
92 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
93 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
94
95 ; Fall-through to the else
96 ; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
97
98 ; GCN: [[IF_LABEL]]:
99 ; GCN: buffer_store_dword [[STORE_VAL]]
100100 define void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) {
101101 entry:
102102 %cmp0 = fcmp oeq float %cond, 0.0
114114 ret void
115115 }
116116
117 ; SI-LABEL: {{^}}uniform_if_move_valu:
118 ; SI: v_add_f32_e32 [[CMP:v[0-9]+]]
117 ; GCN-LABEL: {{^}}uniform_if_move_valu:
118 ; GCN: v_add_f32_e32 [[CMP:v[0-9]+]]
119119 ; Using a floating-point value in an integer compare will cause the compare to
120120 ; be selected for the SALU and then later moved to the VALU.
121 ; SI: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
122 ; SI: s_and_b64 vcc, exec, [[COND]]
123 ; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
124 ; SI: buffer_store_dword
125 ; SI: [[ENDIF_LABEL]]:
126 ; SI: s_endpgm
121 ; GCN: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
122 ; GCN: s_and_b64 vcc, exec, [[COND]]
123 ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
124 ; GCN: buffer_store_dword
125 ; GCN: [[ENDIF_LABEL]]:
126 ; GCN: s_endpgm
127127 define void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) {
128128 entry:
129129 %a.0 = fadd float %a, 10.0
139139 ret void
140140 }
141141
142 ; SI-LABEL: {{^}}uniform_if_move_valu_commute:
143 ; SI: v_add_f32_e32 [[CMP:v[0-9]+]]
142 ; GCN-LABEL: {{^}}uniform_if_move_valu_commute:
143 ; GCN: v_add_f32_e32 [[CMP:v[0-9]+]]
144144 ; Using a floating-point value in an integer compare will cause the compare to
145145 ; be selected for the SALU and then later moved to the VALU.
146 ; SI: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]]
147 ; SI: s_and_b64 vcc, exec, [[COND]]
148 ; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
149 ; SI: buffer_store_dword
150 ; SI: [[ENDIF_LABEL]]:
151 ; SI: s_endpgm
146 ; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]]
147 ; GCN: s_and_b64 vcc, exec, [[COND]]
148 ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
149 ; GCN: buffer_store_dword
150 ; GCN: [[ENDIF_LABEL]]:
151 ; GCN: s_endpgm
152152 define void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) {
153153 entry:
154154 %a.0 = fadd float %a, 10.0
165165 }
166166
167167
168 ; SI-LABEL: {{^}}uniform_if_else_ret:
169 ; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
170 ; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
171
172 ; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
173 ; SI: buffer_store_dword [[TWO]]
174 ; SI: s_endpgm
175
176 ; SI: {{^}}[[IF_LABEL]]:
177 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
178 ; SI: buffer_store_dword [[ONE]]
179 ; SI: s_endpgm
168 ; GCN-LABEL: {{^}}uniform_if_else_ret:
169 ; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0
170 ; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
171
172 ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
173 ; GCN: buffer_store_dword [[TWO]]
174 ; GCN: s_endpgm
175
176 ; GCN: {{^}}[[IF_LABEL]]:
177 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
178 ; GCN: buffer_store_dword [[ONE]]
179 ; GCN: s_endpgm
180180 define void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) {
181181 entry:
182182 %cmp = icmp eq i32 %a, 0
194194 ret void
195195 }
196196
197 ; SI-LABEL: {{^}}uniform_if_else:
198 ; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
199 ; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
200
201 ; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
202 ; SI: buffer_store_dword [[TWO]]
203 ; SI: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
204
205 ; SI: [[IF_LABEL]]:
206 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
207 ; SI: buffer_store_dword [[ONE]]
208
209 ; SI: [[ENDIF_LABEL]]:
210 ; SI: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
211 ; SI: buffer_store_dword [[THREE]]
212 ; SI: s_endpgm
197 ; GCN-LABEL: {{^}}uniform_if_else:
198 ; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0
199 ; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
200
201 ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
202 ; GCN: buffer_store_dword [[TWO]]
203 ; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
204
205 ; GCN: [[IF_LABEL]]:
206 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
207 ; GCN: buffer_store_dword [[ONE]]
208
209 ; GCN: [[ENDIF_LABEL]]:
210 ; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
211 ; GCN: buffer_store_dword [[THREE]]
212 ; GCN: s_endpgm
213213 define void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) {
214214 entry:
215215 %cmp = icmp eq i32 %a, 0
228228 ret void
229229 }
230230
231 ; SI-LABEL: {{^}}icmp_2_users:
232 ; SI: s_cmp_lt_i32 s{{[0-9]+}}, 1
233 ; SI: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]]
234 ; SI: buffer_store_dword
235 ; SI: [[LABEL]]:
236 ; SI: s_endpgm
231 ; GCN-LABEL: {{^}}icmp_2_users:
232 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1
233 ; GCN: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]]
234 ; GCN: buffer_store_dword
235 ; GCN: [[LABEL]]:
236 ; GCN: s_endpgm
237237 define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) {
238238 main_body:
239239 %0 = icmp sgt i32 %cond, 0
248248 ret void
249249 }
250250
251 ; SI-LABEL: {{^}}icmp_users_different_blocks:
252 ; SI: s_load_dword [[COND:s[0-9]+]]
253 ; SI: s_cmp_lt_i32 [[COND]], 1
254 ; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
255 ; SI: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}}
256 ; SI: s_and_b64 vcc, exec, [[MASK]]
257 ; SI: s_cbranch_vccnz [[EXIT]]
258 ; SI: buffer_store
259 ; SI: {{^}}[[EXIT]]:
260 ; SI: s_endpgm
251 ; GCN-LABEL: {{^}}icmp_users_different_blocks:
252 ; GCN: s_load_dword [[COND:s[0-9]+]]
253 ; GCN: s_cmp_lt_i32 [[COND]], 1
254 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
255 ; GCN: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}}
256 ; GCN: s_and_b64 vcc, exec, [[MASK]]
257 ; GCN: s_cbranch_vccnz [[EXIT]]
258 ; GCN: buffer_store
259 ; GCN: {{^}}[[EXIT]]:
260 ; GCN: s_endpgm
261261 define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {
262262 bb:
263263 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
278278 ret void
279279 }
280280
281 ; SI-LABEL: {{^}}uniform_loop:
282 ; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]:
283 ; FIXME: We need to teach SIFixSGPRCopies about uniform branches so we
281 ; GCN-LABEL: {{^}}uniform_loop:
282 ; GCN: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]:
283 ; FIXME: We need to teach GCNFixSGPRCopies about uniform branches so we
284284 ; get s_add_i32 here.
285 ; SI: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}}
286 ; SI: v_cmp_ne_i32_e32 vcc, 0, [[I]]
287 ; SI: s_and_b64 vcc, exec, vcc
288 ; SI: s_cbranch_vccnz [[LOOP_LABEL]]
289 ; SI: s_endpgm
285 ; GCN: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}}
286 ; GCN: v_cmp_ne_i32_e32 vcc, 0, [[I]]
287 ; GCN: s_and_b64 vcc, exec, vcc
288 ; GCN: s_cbranch_vccnz [[LOOP_LABEL]]
289 ; GCN: s_endpgm
290290 define void @uniform_loop(i32 addrspace(1)* %out, i32 %a) {
291291 entry:
292292 br label %loop
303303
304304 ; Test uniform and divergent.
305305
306 ; SI-LABEL: {{^}}uniform_inside_divergent:
307 ; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
308 ; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
309 ; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
310 ; SI: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
311 ; SI: s_cmp_lg_i32 {{s[0-9]+}}, 0
312 ; SI: s_cbranch_scc1 [[ENDIF_LABEL]]
313 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
314 ; SI: buffer_store_dword [[ONE]]
306 ; GCN-LABEL: {{^}}uniform_inside_divergent:
307 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
308 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
309 ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
310 ; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
311 ; GCN: s_cmp_lg_i32 {{s[0-9]+}}, 0
312 ; GCN: s_cbranch_scc1 [[ENDIF_LABEL]]
313 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
314 ; GCN: buffer_store_dword [[ONE]]
315315 define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) {
316316 entry:
317317 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
331331 ret void
332332 }
333333
334 ; SI-LABEL: {{^}}divergent_inside_uniform:
335 ; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
336 ; SI: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
337 ; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
338 ; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
339 ; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
340 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
341 ; SI: buffer_store_dword [[ONE]]
342 ; SI: [[ENDIF_LABEL]]:
343 ; SI: s_endpgm
334 ; GCN-LABEL: {{^}}divergent_inside_uniform:
335 ; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0
336 ; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
337 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
338 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
339 ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
340 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
341 ; GCN: buffer_store_dword [[ONE]]
342 ; GCN: [[ENDIF_LABEL]]:
343 ; GCN: s_endpgm
344344 define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
345345 entry:
346346 %u_cmp = icmp eq i32 %cond, 0
360360 ret void
361361 }
362362
363 ; SI-LABEL: {{^}}divergent_if_uniform_if:
364 ; SI: v_cmp_eq_i32_e32 vcc, 0, v0
365 ; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
366 ; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
367 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
368 ; SI: buffer_store_dword [[ONE]]
369 ; SI: s_or_b64 exec, exec, [[MASK]]
370 ; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
371 ; SI: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]]
372 ; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
373 ; SI: buffer_store_dword [[TWO]]
374 ; SI: [[EXIT]]:
375 ; SI: s_endpgm
363 ; GCN-LABEL: {{^}}divergent_if_uniform_if:
364 ; GCN: v_cmp_eq_i32_e32 vcc, 0, v0
365 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
366 ; GCN: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
367 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
368 ; GCN: buffer_store_dword [[ONE]]
369 ; GCN: s_or_b64 exec, exec, [[MASK]]
370 ; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0
371 ; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]]
372 ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
373 ; GCN: buffer_store_dword [[TWO]]
374 ; GCN: [[EXIT]]:
375 ; GCN: s_endpgm
376376 define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
377377 entry:
378378 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
400400 ; the first, leaving an scc use in a different block than it was
401401 ; defed.
402402
403 ; SI-LABEL: {{^}}cse_uniform_condition_different_blocks:
404 ; SI: s_load_dword [[COND:s[0-9]+]]
405 ; SI: s_cmp_lt_i32 [[COND]], 1
406 ; SI: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3
407
408 ; SI: BB#1:
409 ; SI-NOT: cmp
410 ; SI: buffer_load_dword
411 ; SI: buffer_store_dword
412 ; SI: s_cbranch_scc1 BB[[FNNUM]]_3
413
414 ; SI: BB[[FNNUM]]_3:
415 ; SI: s_endpgm
403 ; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks:
404 ; GCN: s_load_dword [[COND:s[0-9]+]]
405 ; GCN: s_cmp_lt_i32 [[COND]], 1
406 ; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3
407
408 ; GCN: BB#1:
409 ; GCN-NOT: cmp
410 ; GCN: buffer_load_dword
411 ; GCN: buffer_store_dword
412 ; GCN: s_cbranch_scc1 BB[[FNNUM]]_3
413
414 ; GCN: BB[[FNNUM]]_3:
415 ; GCN: s_endpgm
416416 define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
417417 bb:
418418 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
433433 ret void
434434 }
435435
436 ; GCN-LABEL: {{^}}uniform_if_scc_i64_eq:
437 ; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
438 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
439
440 ; SI: v_cmp_eq_i64_e64
441 ; SI: s_and_b64 vcc, exec,
442 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
443
444 ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
445
446 ; Fall-through to the else
447 ; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
448
449 ; GCN: [[IF_LABEL]]:
450 ; GCN: buffer_store_dword [[STORE_VAL]]
451 define void @uniform_if_scc_i64_eq(i64 %cond, i32 addrspace(1)* %out) {
452 entry:
453 %cmp0 = icmp eq i64 %cond, 0
454 br i1 %cmp0, label %if, label %else
455
456 if:
457 br label %done
458
459 else:
460 br label %done
461
462 done:
463 %value = phi i32 [0, %if], [1, %else]
464 store i32 %value, i32 addrspace(1)* %out
465 ret void
466 }
467
468 ; GCN-LABEL: {{^}}uniform_if_scc_i64_ne:
469 ; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
470 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
471
472 ; SI: v_cmp_ne_i64_e64
473 ; SI: s_and_b64 vcc, exec,
474 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
475
476 ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
477
478 ; Fall-through to the else
479 ; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
480
481 ; GCN: [[IF_LABEL]]:
482 ; GCN: buffer_store_dword [[STORE_VAL]]
483 define void @uniform_if_scc_i64_ne(i64 %cond, i32 addrspace(1)* %out) {
484 entry:
485 %cmp0 = icmp ne i64 %cond, 0
486 br i1 %cmp0, label %if, label %else
487
488 if:
489 br label %done
490
491 else:
492 br label %done
493
494 done:
495 %value = phi i32 [0, %if], [1, %else]
496 store i32 %value, i32 addrspace(1)* %out
497 ret void
498 }
499
500 ; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt:
501 ; GCN: v_cmp_gt_i64_e64
502 ; GCN: s_and_b64 vcc, exec,
503 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
504
505 ; Fall-through to the else
506 ; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
507
508 ; GCN: [[IF_LABEL]]:
509 ; GCN: buffer_store_dword [[STORE_VAL]]
510 define void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) {
511 entry:
512 %cmp0 = icmp sgt i64 %cond, 0
513 br i1 %cmp0, label %if, label %else
514
515 if:
516 br label %done
517
518 else:
519 br label %done
520
521 done:
522 %value = phi i32 [0, %if], [1, %else]
523 store i32 %value, i32 addrspace(1)* %out
524 ret void
525 }
526
527 ; GCN-LABEL: {{^}}move_to_valu_i64_eq:
528 ; SI: v_cmp_eq_i64_e32
529 ; VI: v_cmp_eq_u64_e32
530 define void @move_to_valu_i64_eq(i32 addrspace(1)* %out) {
531 %cond = load volatile i64, i64 addrspace(3)* undef
532 %cmp0 = icmp eq i64 %cond, 0
533 br i1 %cmp0, label %if, label %else
534
535 if:
536 br label %done
537
538 else:
539 br label %done
540
541 done:
542 %value = phi i32 [0, %if], [1, %else]
543 store i32 %value, i32 addrspace(1)* %out
544 ret void
545 }
546
547 ; GCN-LABEL: {{^}}move_to_valu_i64_ne:
548 ; SI: v_cmp_ne_i64_e32
549 ; VI: v_cmp_ne_u64_e32
550 define void @move_to_valu_i64_ne(i32 addrspace(1)* %out) {
551 %cond = load volatile i64, i64 addrspace(3)* undef
552 %cmp0 = icmp ne i64 %cond, 0
553 br i1 %cmp0, label %if, label %else
554
555 if:
556 br label %done
557
558 else:
559 br label %done
560
561 done:
562 %value = phi i32 [0, %if], [1, %else]
563 store i32 %value, i32 addrspace(1)* %out
564 ret void
565 }
566
436567 declare i32 @llvm.amdgcn.workitem.id.x() #0
437568
438 attributes #0 = { readnone }
569 attributes #0 = { nounwind readnone }
None // RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN %s
1 // RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck --check-prefix=GCN %s
2 // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=GCN %s
0 // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s
1 // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2 // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICI %s
33
44 //===----------------------------------------------------------------------===//
55 // SOPC Instructions
5555
5656 s_setvskip s3, s5
5757 // GCN: s_setvskip s3, s5 ; encoding: [0x03,0x05,0x10,0xbf]
58
59 s_cmp_eq_u64 s[0:1], s[2:3]
60 // VI: s_cmp_eq_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x12,0xbf]
61 // NOSICI: error: instruction not supported on this GPU
62
63 s_cmp_lg_u64 s[0:1], s[2:3]
64 // VI: s_cmp_lg_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x13,0xbf]
65 // NOSICI: error: instruction not supported on this GPU