llvm.org GIT mirror
[ConstantFold] Improve the bitcast folding logic for constant vectors. The constant folder didn't know how to always fold bitcasts of constant integer vectors. In particular, it was unable to handle the case where a constant vector had some undef elements, and the resulting (i.e. bitcasted) vector type had more elements than the original vector type. Example: %cast = bitcast <2 x i64><i64 undef, i64 2> to <4 x i32> On a little endian target, %cast could have been folded to: <4 x i32><i32 undef, i32 undef, i32 2, i32 0> This patch improves the folding logic by teaching how to correctly propagate undef elements in the folded vector. Differential Revision: https://reviews.llvm.org/D24301 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281343 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 3 years ago
3 changed file(s) with 35 addition(s) and 25 deletion(s).
 223 223 224 224 // Loop over each source value, expanding into multiple results. 225 225 for (unsigned i = 0; i != NumSrcElt; ++i) { 226 auto *Src = dyn_cast_or_null(C->getAggregateElement(i)); 227 if (!Src) // Reject constantexpr elements.⏎ 226 auto *Element = C->getAggregateElement(i);⏎ 227 228 if (!Element) // Reject constantexpr elements. 229 return ConstantExpr::getBitCast(C, DestTy); 230 231 if (isa(Element)) { 232 // Correctly Propagate undef values. 233 Result.append(Ratio, UndefValue::get(DstEltTy)); 234 continue; 235 } 236 237 auto *Src = dyn_cast(Element); 238 if (!Src) 228 239 return ConstantExpr::getBitCast(C, DestTy); 229 240 230 241 unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
 56 56 57 57 define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) { 58 58 ; CHECK-LABEL: @test_extrq_call_constexpr( 59 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> to <16 x i8>)) 60 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]⏎ 59 ; CHECK-NEXT: ret <2 x i64> %x⏎ 61 60 ; 62 61 %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> to <16 x i8>)) 63 62 ret <2 x i64> %1 132 131 133 132 define <2 x i64> @test_extrqi_call_constexpr() { 134 133 ; CHECK-LABEL: @test_extrqi_call_constexpr( 135 ; CHECK-NEXT: ret <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>), i32 2), i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef> to <2 x i64>)⏎ 134 ; CHECK-NEXT: ret <2 x i64> zeroinitializer⏎ 136 135 ; 137 136 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16) 138 137 ret <2 x i64> %1 178 177 179 178 define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) { 180 179 ; CHECK-LABEL: @test_insertq_call_constexpr( 181 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>))⏎ 180 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> , i8 2, i8 0)⏎ 182 181 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 183 182 ; 184 183 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>)) 223 222 224 223 define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) { 225 224 ; CHECK-LABEL: @test_insertqi_call_constexpr( 226 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)⏎ 225 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> , i8 48, i8 3)⏎ 227 226 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 228 227 ; 229 228 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
 125 125 126 126 define <4 x i32> @bitcast_constexpr_4i32_2i64_u2() { 127 127 ; CHECK-LABEL: @bitcast_constexpr_4i32_2i64_u2( 128 ; CHECK-NEXT: ret <4 x i32> bitcast (<2 x i64> to <4 x i32>)⏎ 128 ; CHECK-NEXT: ret <4 x i32> ⏎ 129 129 ; 130 130 %cast = bitcast <2 x i64> to <4 x i32> 131 131 ret <4 x i32> %cast 133 133 134 134 define <4 x i32> @bitcast_constexpr_4i32_2i64_1u() { 135 135 ; CHECK-LABEL: @bitcast_constexpr_4i32_2i64_1u( 136 ; CHECK-NEXT: ret <4 x i32> bitcast (<2 x i64> to <4 x i32>)⏎ 136 ; CHECK-NEXT: ret <4 x i32> ⏎ 137 137 ; 138 138 %cast = bitcast <2 x i64> to <4 x i32> 139 139 ret <4 x i32> %cast 141 141 142 142 define <4 x i32> @bitcast_constexpr_4i32_2i64() { 143 143 ; CHECK-LABEL: @bitcast_constexpr_4i32_2i64( 144 ; CHECK-NEXT: ret <4 x i32> bitcast (<2 x i64> to <4 x i32>)⏎ 144 ; CHECK-NEXT: ret <4 x i32> ⏎ 145 145 ; 146 146 %cast = bitcast <2 x i64> to <4 x i32> 147 147 ret <4 x i32> %cast 149 149 150 150 define <8 x i16> @bitcast_constexpr_8i16_2i64_u2() { 151 151 ; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_u2( 152 ; CHECK-NEXT: ret <8 x i16> bitcast (<2 x i64> to <8 x i16>)⏎ 152 ; CHECK-NEXT: ret <8 x i16> ⏎ 153 153 ; 154 154 %cast = bitcast <2 x i64> to <8 x i16> 155 155 ret <8 x i16> %cast 157 157 158 158 define <8 x i16> @bitcast_constexpr_8i16_2i64_1u() { 159 159 ; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_1u( 160 ; CHECK-NEXT: ret <8 x i16> bitcast (<2 x i64> to <8 x i16>)⏎ 160 ; CHECK-NEXT: ret <8 x i16> ⏎ 161 161 ; 162 162 %cast = bitcast <2 x i64> to <8 x i16> 163 163 ret <8 x i16> %cast 165 165 166 166 define <8 x i16> @bitcast_constexpr_8i16_2i64_u65536() { 167 167 ; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_u65536( 168 ; CHECK-NEXT: ret <8 x i16> bitcast (<2 x i64> to <8 x i16>)⏎ 168 ; CHECK-NEXT: ret <8 x i16> ⏎ 169 169 ; 170 170 %cast = bitcast <2 x i64> to <8 x i16> 171 171 ret <8 x i16> %cast 173 173 174 174 define <16 x i8> @bitcast_constexpr_16i8_2i64_u2() { 175 175 ; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_u2( 176 ; CHECK-NEXT: ret <16 x i8> bitcast (<2 x i64> to <16 x i8>)⏎ 176 ; CHECK-NEXT: ret <16 x i8> ⏎ 177 177 ; 178 178 %cast = bitcast <2 x i64> to <16 x i8> 179 179 ret <16 x i8> %cast 181 181 182 182 define <16 x i8> @bitcast_constexpr_16i8_2i64_256u() { 183 183 ; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_256u( 184 ; CHECK-NEXT: ret <16 x i8> bitcast (<2 x i64> to <16 x i8>)⏎ 184 ; CHECK-NEXT: ret <16 x i8> ⏎ 185 185 ; 186 186 %cast = bitcast <2 x i64> to <16 x i8> 187 187 ret <16 x i8> %cast 189 189 190 190 define <16 x i8> @bitcast_constexpr_16i8_2i64_u256() { 191 191 ; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_u256( 192 ; CHECK-NEXT: ret <16 x i8> bitcast (<2 x i64> to <16 x i8>)⏎ 192 ; CHECK-NEXT: ret <16 x i8> ⏎ 193 193 ; 194 194 %cast = bitcast <2 x i64> to <16 x i8> 195 195 ret <16 x i8> %cast 197 197 198 198 define <8 x i16> @bitcast_constexpr_8i16_4i32_uu22() { 199 199 ; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_uu22( 200 ; CHECK-NEXT: ret <8 x i16> bitcast (<4 x i32> to <8 x i16>)⏎ 200 ; CHECK-NEXT: ret <8 x i16> ⏎ 201 201 ; 202 202 %cast = bitcast <4 x i32> to <8 x i16> 203 203 ret <8 x i16> %cast 205 205 206 206 define <8 x i16> @bitcast_constexpr_8i16_4i32_10uu() { 207 207 ; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_10uu( 208 ; CHECK-NEXT: ret <8 x i16> bitcast (<4 x i32> to <8 x i16>)⏎ 208 ; CHECK-NEXT: ret <8 x i16> ⏎ 209 209 ; 210 210 %cast = bitcast <4 x i32> to <8 x i16> 211 211 ret <8 x i16> %cast 213 213 214 214 define <8 x i16> @bitcast_constexpr_8i16_4i32_u257u256() { 215 215 ; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_u257u256( 216 ; CHECK-NEXT: ret <8 x i16> bitcast (<4 x i32> to <8 x i16>)⏎ 216 ; CHECK-NEXT: ret <8 x i16> ⏎ 217 217 ; 218 218 %cast = bitcast <4 x i32> to <8 x i16> 219 219 ret <8 x i16> %cast 221 221 222 222 define <16 x i8> @bitcast_constexpr_16i8_4i32_u2u2() { 223 223 ; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_u2u2( 224 ; CHECK-NEXT: ret <16 x i8> bitcast (<4 x i32> to <16 x i8>)⏎ 224 ; CHECK-NEXT: ret <16 x i8> ⏎ 225 225 ; 226 226 %cast = bitcast <4 x i32> to <16 x i8> 227 227 ret <16 x i8> %cast 229 229 230 230 define <16 x i8> @bitcast_constexpr_16i8_4i32_1u1u() { 231 231 ; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_1u1u( 232 ; CHECK-NEXT: ret <16 x i8> bitcast (<4 x i32> to <16 x i8>)⏎ 232 ; CHECK-NEXT: ret <16 x i8> ⏎ 233 233 ; 234 234 %cast = bitcast <4 x i32> to <16 x i8> 235 235 ret <16 x i8> %cast 237 237 238 238 define <16 x i8> @bitcast_constexpr_16i8_4i32_u256uu() { 239 239 ; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_u256uu( 240 ; CHECK-NEXT: ret <16 x i8> bitcast (<4 x i32> to <16 x i8>)⏎ 240 ; CHECK-NEXT: ret <16 x i8> ⏎ 241 241 ; 242 242 %cast = bitcast <4 x i32> to <16 x i8> 243 243 ret <16 x i8> %cast 245 245 246 246 define <16 x i8> @bitcast_constexpr_16i8_8i16_u2u2u2u2() { 247 247 ; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_u2u2u2u2( 248 ; CHECK-NEXT: ret <16 x i8> bitcast (<8 x i16> to <16 x i8>)⏎ 248 ; CHECK-NEXT: ret <16 x i8> ⏎ 249 249 ; 250 250 %cast = bitcast <8 x i16> to <16 x i8> 251 251 ret <16 x i8> %cast 253 253 254 254 define <16 x i8> @bitcast_constexpr_16i8_8i16_1u1u1u1u() { 255 255 ; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_1u1u1u1u( 256 ; CHECK-NEXT: ret <16 x i8> bitcast (<8 x i16> to <16 x i8>)⏎ 256 ; CHECK-NEXT: ret <16 x i8> ⏎ 257 257 ; 258 258 %cast = bitcast <8 x i16> to <16 x i8> 259 259 ret <16 x i8> %cast 261 261 262 262 define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() { 263 263 ; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_u256uuu256uu( 264 ; CHECK-NEXT: ret <16 x i8> bitcast (<8 x i16> to <16 x i8>)⏎ 264 ; CHECK-NEXT: ret <16 x i8> ⏎ 265 265 ; 266 266 %cast = bitcast <8 x i16> to <16 x i8> 267 267 ret <16 x i8> %cast