llvm.org GIT mirror llvm / 48a6409
[LV] Don't attempt to type-shrink scalarized instructions After r288909, instructions feeding predicated instructions may be scalarized if profitable. Since these instructions will remain scalar, we shouldn't attempt to type-shrink them. We should only truncate vector types to their minimal bit widths. This bug was exposed by enabling the vectorization of loops containing conditional stores by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289958 91177308-0d34-0410-b5e6-96231b3b80d8 Matthew Simpson 3 years ago
2 changed file(s) with 74 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
19161916 return Scalars->second.count(I);
19171917 }
19181918
1919 /// \returns True if instruction \p I can be truncated to a smaller bitwidth
1920 /// for vectorization factor \p VF.
1921 bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
1922 return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
1923 !Legal->isScalarAfterVectorization(I);
1924 }
1925
19191926 private:
19201927 /// The vectorization cost is a combination of the cost itself and a boolean
19211928 /// indicating whether any of the contributing operations will actually
37243731 //
37253732 SmallPtrSet Erased;
37263733 for (const auto &KV : Cost->getMinimalBitwidths()) {
3734 // If the value wasn't vectorized, we must maintain the original scalar
3735 // type. The absence of the value from VectorLoopValueMap indicates that it
3736 // wasn't vectorized.
3737 if (!VectorLoopValueMap.hasVector(KV.first))
3738 continue;
37273739 VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
37283740 for (Value *&I : Parts) {
37293741 if (Erased.count(I) || I->use_empty() || !isa(I))
38163828
38173829 // We'll have created a bunch of ZExts that are now parentless. Clean up.
38183830 for (const auto &KV : Cost->getMinimalBitwidths()) {
3831 // If the value wasn't vectorized, we must maintain the original scalar
3832 // type. The absence of the value from VectorLoopValueMap indicates that it
3833 // wasn't vectorized.
3834 if (!VectorLoopValueMap.hasVector(KV.first))
3835 continue;
38193836 VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
38203837 for (Value *&I : Parts) {
38213838 ZExtInst *Inst = dyn_cast(I);
68366853 unsigned VF,
68376854 Type *&VectorTy) {
68386855 Type *RetTy = I->getType();
6839 if (VF > 1 && MinBWs.count(I))
6856 if (canTruncateToMinimalBitwidth(I, VF))
68406857 RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
68416858 VectorTy = ToVectorTy(RetTy, VF);
68426859 auto SE = PSE.getSE();
69576974 case Instruction::FCmp: {
69586975 Type *ValTy = I->getOperand(0)->getType();
69596976 Instruction *Op0AsInstruction = dyn_cast(I->getOperand(0));
6960 auto It = MinBWs.find(Op0AsInstruction);
6961 if (VF > 1 && It != MinBWs.end())
6962 ValTy = IntegerType::get(ValTy->getContext(), It->second);
6977 if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
6978 ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
69636979 VectorTy = ToVectorTy(ValTy, VF);
69646980 return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
69656981 }
71077123
71087124 Type *SrcScalarTy = I->getOperand(0)->getType();
71097125 Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
7110 if (VF > 1 && MinBWs.count(I)) {
7126 if (canTruncateToMinimalBitwidth(I, VF)) {
71117127 // This cast is going to be shrunk. This may remove the cast or it might
71127128 // turn it into slightly different cast. For example, if MinBW == 16,
71137129 // "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
130130 %iNewChunks.1.lcssa = phi i32 [ undef, %for.body9 ], [ %iNewChunks.2, %for.inc23 ]
131131 unreachable
132132 }
133
134 ; VEC-LABEL: @minimal_bit_widths(
135 ;
136 ; In the test below, it's more profitable for the expression feeding the
137 ; conditional store to remain scalar. Since we can only type-shrink vector
138 ; types, we shouldn't try to represent the expression in a smaller type.
139 ;
140 ; VEC: vector.body:
141 ; VEC: %wide.load = load <2 x i8>, <2 x i8>* {{.*}}, align 1
142 ; VEC: br i1 {{.*}}, label %[[IF0:.+]], label %[[CONT0:.+]]
143 ; VEC: [[IF0]]:
144 ; VEC: %[[E0:.+]] = extractelement <2 x i8> %wide.load, i32 0
145 ; VEC: %[[Z0:.+]] = zext i8 %[[E0]] to i32
146 ; VEC: %[[T0:.+]] = trunc i32 %[[Z0]] to i8
147 ; VEC: store i8 %[[T0]], i8* {{.*}}, align 1
148 ; VEC: br label %[[CONT0]]
149 ; VEC: [[CONT0]]:
150 ; VEC: br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
151 ; VEC: [[IF1]]:
152 ; VEC: %[[E1:.+]] = extractelement <2 x i8> %wide.load, i32 1
153 ; VEC: %[[Z1:.+]] = zext i8 %[[E1]] to i32
154 ; VEC: %[[T1:.+]] = trunc i32 %[[Z1]] to i8
155 ; VEC: store i8 %[[T1]], i8* {{.*}}, align 1
156 ; VEC: br label %[[CONT1]]
157 ; VEC: [[CONT1]]:
158 ; VEC: br i1 {{.*}}, label %middle.block, label %vector.body
159 ;
160 define void @minimal_bit_widths(i1 %c) {
161 entry:
162 br label %for.body
163
164 for.body:
165 %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
166 %tmp1 = phi i64 [ %tmp7, %for.inc ], [ undef, %entry ]
167 %tmp2 = getelementptr i8, i8* undef, i64 %tmp0
168 %tmp3 = load i8, i8* %tmp2, align 1
169 br i1 %c, label %if.then, label %for.inc
170
171 if.then:
172 %tmp4 = zext i8 %tmp3 to i32
173 %tmp5 = trunc i32 %tmp4 to i8
174 store i8 %tmp5, i8* %tmp2, align 1
175 br label %for.inc
176
177 for.inc:
178 %tmp6 = add nuw nsw i64 %tmp0, 1
179 %tmp7 = add i64 %tmp1, -1
180 %tmp8 = icmp eq i64 %tmp7, 0
181 br i1 %tmp8, label %for.end, label %for.body
182
183 for.end:
184 ret void
185 }