llvm.org GIT mirror llvm / da77e83
Improve pattern match from v1i8 to v1i32 for AArch64 Neon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200119 91177308-0d34-0410-b5e6-96231b3b80d8 Jiangning Liu 6 years ago
2 changed file(s) with 23 addition(s) and 14 deletion(s). Raw diff Collapse all Expand all
62326232 (v8i16 (!cast(prefix # "_8B")
62336233 (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
62346234 sub_16)>;
6235
6236 // v1i8 -> v1i32
6237 def : Pat<(v1i32 (ExtOp (v1i8 FPR8:$Rn))),
6238 (EXTRACT_SUBREG
6239 (v4i32 (!cast(prefix # "_4H")
6240 (v4i16 (SUBREG_TO_REG (i64 0),
6241 (v1i16 (EXTRACT_SUBREG
6242 (v8i16 (!cast(prefix # "_8B")
6243 (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6244 sub_16)),
6245 sub_16)), 0)),
6246 sub_32)>;
62476235 }
62486236
62496237 defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>;
62506238 defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>;
6239
6240 // zext v1i8 -> v1i32
6241 def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))),
6242 (v1i32 (EXTRACT_SUBREG
6243 (v1i64 (SUBREG_TO_REG (i64 0),
6244 (v1i8 (DUPbv_B
6245 (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
6246 0)),
6247 sub_8)),
6248 sub_32))>;
62516249
62526250 // zext v1i8 -> v1i64
62536251 def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))),
62656263 0)),
62666264 sub_16))>;
62676265
6266 // sext v1i8 -> v1i32
6267 def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))),
6268 (EXTRACT_SUBREG
6269 (v4i32 (SSHLLvvi_4H
6270 (v4i16 (SUBREG_TO_REG (i64 0),
6271 (v1i16 (EXTRACT_SUBREG
6272 (v8i16 (SSHLLvvi_8B
6273 (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6274 sub_16)),
6275 sub_16)), 0)),
6276 sub_32)>;
6277
62686278 // sext v1i8 -> v1i64
62696279 def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))),
62706280 (EXTRACT_SUBREG
2828
2929 define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone {
3030 ; CHECK-LABEL: test_zext_v1i8_v1i32:
31 ; CHECK: ushll v0.8h, v0.8b, #0
32 ; CHECK: ushll v0.4s, v0.4h, #0
31 ; CHECK: dup b0, v0.b[0]
3332 %1 = extractelement <8 x i8> %v, i32 0
3433 %2 = insertelement <1 x i8> undef, i8 %1, i32 0
3534 %3 = zext <1 x i8> %2 to <1 x i32>