llvm.org GIT mirror llvm / 41cf920
AMDGPU: Handle alloca promoting with null operands If the second pointer in a multi-pointer instruction is a constant, we can replace the type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269945 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
4 changed file(s) with 128 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
509509 if (Val == OtherOp)
510510 OtherOp = Inst->getOperand(OpIdx1);
511511
512 if (isa(OtherOp))
513 return true;
514
512515 Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
513516 if (!isa(OtherObj))
514517 return false;
572575 if (ICmpInst *ICmp = dyn_cast(UseInst)) {
573576 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
574577 return false;
578
579 // May need to rewrite constant operands.
580 WorkList.push_back(ICmp);
575581 }
576582
577583 if (!User->getType()->isPointerTy())
712718 for (Value *V : WorkList) {
713719 CallInst *Call = dyn_cast(V);
714720 if (!Call) {
715 Type *EltTy = V->getType()->getPointerElementType();
716 PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
721 if (ICmpInst *CI = dyn_cast(V)) {
722 Value *Src0 = CI->getOperand(0);
723 Type *EltTy = Src0->getType()->getPointerElementType();
724 PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
725
726 if (isa(CI->getOperand(0)))
727 CI->setOperand(0, ConstantPointerNull::get(NewTy));
728
729 if (isa(CI->getOperand(1)))
730 CI->setOperand(1, ConstantPointerNull::get(NewTy));
731
732 continue;
733 }
717734
718735 // The operand's value should be corrected on its own.
719736 if (isa(V))
720737 continue;
721738
739 Type *EltTy = V->getType()->getPointerElementType();
740 PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
741
722742 // FIXME: It doesn't really make sense to try to do this for all
723743 // instructions.
724744 V->mutateType(NewTy);
745
746 // Adjust the types of any constant operands.
747 if (SelectInst *SI = dyn_cast(V)) {
748 if (isa(SI->getOperand(1)))
749 SI->setOperand(1, ConstantPointerNull::get(NewTy));
750
751 if (isa(SI->getOperand(2)))
752 SI->setOperand(2, ConstantPointerNull::get(NewTy));
753 } else if (PHINode *Phi = dyn_cast(V)) {
754 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
755 if (isa(Phi->getIncomingValue(I)))
756 Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
757 }
758 }
759
725760 continue;
726761 }
727762
1212 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
1313 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
1414 %cmp = icmp eq i32* %ptr0, %ptr1
15 %zext = zext i1 %cmp to i32
16 store volatile i32 %zext, i32 addrspace(1)* %out
17 ret void
18 }
19
20 ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs(
21 ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 %{{[0-9]+}}
22 ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
23 ; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, null
24 define void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
25 %alloca = alloca [16 x i32], align 4
26 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
27 %cmp = icmp eq i32* %ptr0, null
28 %zext = zext i1 %cmp to i32
29 store volatile i32 %zext, i32 addrspace(1)* %out
30 ret void
31 }
32
33 ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs(
34 ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 %{{[0-9]+}}
35 ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
36 ; CHECK: %cmp = icmp eq i32 addrspace(3)* null, %ptr0
37 define void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
38 %alloca = alloca [16 x i32], align 4
39 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
40 %cmp = icmp eq i32* null, %ptr0
1541 %zext = zext i1 %cmp to i32
1642 store volatile i32 %zext, i32 addrspace(1)* %out
1743 ret void
2727
2828 endif:
2929 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
30 store i32 0, i32* %phi.ptr, align 4
31 ret void
32 }
33
34 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_0(
35 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
36 define void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
37 entry:
38 %alloca = alloca [64 x i32], align 4
39 br i1 undef, label %if, label %endif
40
41 if:
42 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
43 br label %endif
44
45 endif:
46 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ]
47 store i32 0, i32* %phi.ptr, align 4
48 ret void
49 }
50
51 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_1(
52 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ]
53 define void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
54 entry:
55 %alloca = alloca [64 x i32], align 4
56 br i1 undef, label %if, label %endif
57
58 if:
59 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
60 br label %endif
61
62 endif:
63 %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ]
3064 store i32 0, i32* %phi.ptr, align 4
3165 ret void
3266 }
9898 ret void
9999 }
100100
101 ; CHECK-LABEL: @select_null_rhs(
102 ; CHECK-NOT: alloca
103 ; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null
104 define void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
105 bb:
106 %tmp = alloca double, align 8
107 store double 0.000000e+00, double* %tmp, align 8
108 %tmp2 = icmp eq i32 %arg1, 0
109 %tmp3 = select i1 %tmp2, double* %tmp, double* null
110 store double 1.000000e+00, double* %tmp3, align 8
111 %tmp4 = load double, double* %tmp, align 8
112 store double %tmp4, double addrspace(1)* %arg
113 ret void
114 }
115
116 ; CHECK-LABEL: @select_null_lhs(
117 ; CHECK-NOT: alloca
118 ; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}}
119 define void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
120 bb:
121 %tmp = alloca double, align 8
122 store double 0.000000e+00, double* %tmp, align 8
123 %tmp2 = icmp eq i32 %arg1, 0
124 %tmp3 = select i1 %tmp2, double* null, double* %tmp
125 store double 1.000000e+00, double* %tmp3, align 8
126 %tmp4 = load double, double* %tmp, align 8
127 store double %tmp4, double addrspace(1)* %arg
128 ret void
129 }
130
101131 attributes #0 = { norecurse nounwind "amdgpu-max-waves-per-eu"="1" }
132 attributes #1 = { norecurse nounwind }