llvm.org GIT mirror llvm / e2d9355
[AMDGPU] Combine and x, (sext cc from i1) => select cc, x, 0 Also factored out function to check if a boolean is an already deserialized value which does not require v_cndmask_b32 to be loaded. Added binary logical operators to its check. Differential Revision: https://reviews.llvm.org/D34500 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306439 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 2 years ago
3 changed file(s) with 75 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
43134313 return SDValue();
43144314 }
43154315
4316 // Returns true if argument is a boolean value which is not serialized into
4317 // memory or argument and does not require v_cmdmask_b32 to be deserialized.
4318 static bool isBoolSGPR(SDValue V) {
4319 if (V.getValueType() != MVT::i1)
4320 return false;
4321 switch (V.getOpcode()) {
4322 default: break;
4323 case ISD::SETCC:
4324 case ISD::AND:
4325 case ISD::OR:
4326 case ISD::XOR:
4327 case AMDGPUISD::FP_CLASS:
4328 return true;
4329 }
4330 return false;
4331 }
4332
43164333 SDValue SITargetLowering::performAndCombine(SDNode *N,
43174334 DAGCombinerInfo &DCI) const {
43184335 if (DCI.isBeforeLegalize())
43994416 X, DAG.getConstant(Mask, DL, MVT::i32));
44004417 }
44014418 }
4419 }
4420
4421 if (VT == MVT::i32 &&
4422 (RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
4423 // and x, (sext cc from i1) => select cc, x, 0
4424 if (RHS.getOpcode() != ISD::SIGN_EXTEND)
4425 std::swap(LHS, RHS);
4426 if (isBoolSGPR(RHS.getOperand(0)))
4427 return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0),
4428 LHS, DAG.getConstant(0, SDLoc(N), MVT::i32));
44024429 }
44034430
44044431 return SDValue();
49404967 case ISD::SIGN_EXTEND:
49414968 case ISD::ANY_EXTEND: {
49424969 auto Cond = RHS.getOperand(0);
4943 if (Cond.getOpcode() != ISD::SETCC &&
4944 Cond.getOpcode() != AMDGPUISD::FP_CLASS)
4970 if (!isBoolSGPR(Cond))
49454971 break;
49464972 SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
49474973 SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1
2 ; GCN-LABEL: {{^}}and_i1_sext_bool:
3 ; GCN: v_cmp_{{gt|le}}_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
4 ; GCN: v_cndmask_b32_e{{32|64}} [[VAL:v[0-9]+]], 0, v{{[0-9]+}}, [[CC]]
5 ; GCN: store_dword {{.*}}[[VAL]]
6 ; GCN-NOT: v_cndmask_b32_e64 v{{[0-9]+}}, {{0|-1}}, {{0|-1}}
7 ; GCN-NOT: v_and_b32_e32
8
9 define amdgpu_kernel void @and_i1_sext_bool(i32 addrspace(1)* nocapture %arg) {
10 bb:
11 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
12 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
13 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
14 %v = load i32, i32 addrspace(1)* %gep, align 4
15 %cmp = icmp ugt i32 %x, %y
16 %ext = sext i1 %cmp to i32
17 %and = and i32 %v, %ext
18 store i32 %and, i32 addrspace(1)* %gep, align 4
19 ret void
20 }
21
22 declare i32 @llvm.amdgcn.workitem.id.x() #0
23
24 declare i32 @llvm.amdgcn.workitem.id.y() #0
25
26 attributes #0 = { nounwind readnone speculatable }
149149 ret void
150150 }
151151
152 ; GCN-LABEL: {{^}}add_and:
153 ; GCN: s_and_b64 [[CC:[^,]+]],
154 ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
155 ; GCN-NOT: v_cndmask
156
157 define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) {
158 bb:
159 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
160 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
161 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
162 %v = load i32, i32 addrspace(1)* %gep, align 4
163 %cmp1 = icmp ugt i32 %x, %y
164 %cmp2 = icmp ugt i32 %x, 1
165 %cmp = and i1 %cmp1, %cmp2
166 %ext = zext i1 %cmp to i32
167 %add = add i32 %v, %ext
168 store i32 %add, i32 addrspace(1)* %gep, align 4
169 ret void
170 }
171
152172 declare i1 @llvm.amdgcn.class.f32(float, i32) #0
153173
154174 declare i32 @llvm.amdgcn.workitem.id.x() #0