llvm.org GIT mirror llvm / a53bf06
Implemented aarch64 Neon scalar vmulx_lane intrinsics Implemented aarch64 Neon scalar vfma_lane intrinsics Implemented aarch64 Neon scalar vfms_lane intrinsics Implemented legacy vmul_n_f64, vmul_lane_f64, vmul_laneq_f64 intrinsics (v1f64 parameter type) using Neon scalar instructions. Implemented legacy vfma_lane_f64, vfms_lane_f64, vfma_laneq_f64, vfms_laneq_f64 intrinsics (v1f64 parameter type) using Neon scalar instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194888 91177308-0d34-0410-b5e6-96231b3b80d8 Ana Pazos 6 years ago
5 changed file(s) with 410 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
6666 // Vector Pairwise minNum (Floating Point)
6767 def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
6868
69 // Vector Multiply Extended (Floating Point)
70 def int_aarch64_neon_vmulx : Neon_2Arg_Intrinsic;
69 // Vector Multiply Extended and Scalar Multiply Extended (Floating Point)
70 def int_aarch64_neon_vmulx :
71 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
7172
7273 class Neon_N2V_Intrinsic
7374 : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
36523652 // End of post-index vector load/store multiple N-element structure
36533653 // (class SIMD lselem-post)
36543654
3655
3656 // Neon Scalar instructions implementation
36553657 // Scalar Three Same
36563658
36573659 class NeonI_Scalar3Same_size size, bits<5> opcode, string asmop,
43594361
43604362 // Patterns to match llvm.aarch64.* intrinsic for
43614363 // Scalar Floating-point Multiply Extended,
4362 defm : Neon_Scalar3Same_SD_size_patterns
4363 FMULXddd>;
4364 multiclass Neon_Scalar3Same_MULX_SD_size_patterns
4365 Instruction INSTS,
4366 Instruction INSTD> {
4367 def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4368 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4369 def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4370 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4371 }
4372
4373 defm : Neon_Scalar3Same_MULX_SD_size_patterns
4374 FMULXsss,FMULXddd>;
43644375
43654376 // Scalar Integer Shift Left (Signed, Unsigned)
43664377 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
47934804 let Inst{20-16} = MRm;
47944805 }
47954806
4807 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
4808 SDPatternOperator opnode,
4809 Instruction INST,
4810 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
4811 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
4812
4813 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
4814 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
4815 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4816
4817 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
4818 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
4819 (ResTy (INST (ResTy FPRC:$Rn),
4820 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4821 OpNImm:$Imm))>;
4822
4823 // swapped operands
4824 def : Pat<(ResTy (opnode
4825 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
4826 (ResTy FPRC:$Rn))),
4827 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4828
4829 def : Pat<(ResTy (opnode
4830 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
4831 (ResTy FPRC:$Rn))),
4832 (ResTy (INST (ResTy FPRC:$Rn),
4833 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4834 OpNImm:$Imm))>;
4835 }
4836
4837 // Patterns for Scalar Floating Point multiply (scalar, by element)
4838 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns
4839 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
4840 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns
4841 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
4842
4843 // Patterns for Scalar Floating Point multiply extended (scalar, by element)
4844 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns
4845 FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
4846 v2f32, v4f32, neon_uimm1_bare>;
4847 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns
4848 FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
4849 v1f64, v2f64, neon_uimm0_bare>;
4850
4851
47964852 // Scalar Floating Point fused multiply-add (scalar, by element)
47974853 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
47984854 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
48204876 let Inst{21} = 0b0; // l
48214877 let Inst{20-16} = MRm;
48224878 }
4879 // We are allowed to match the fma instruction regardless of compile options.
4880 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
4881 Instruction FMLAI, Instruction FMLSI,
4882 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
4883 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
4884 // fmla
4885 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
4886 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
4887 (ResTy FPRC:$Ra))),
4888 (ResTy (FMLAI (ResTy FPRC:$Ra),
4889 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4890
4891 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
4892 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
4893 (ResTy FPRC:$Ra))),
4894 (ResTy (FMLAI (ResTy FPRC:$Ra),
4895 (ResTy FPRC:$Rn),
4896 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4897 OpNImm:$Imm))>;
4898
4899 // swapped fmla operands
4900 def : Pat<(ResTy (fma
4901 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
4902 (ResTy FPRC:$Rn),
4903 (ResTy FPRC:$Ra))),
4904 (ResTy (FMLAI (ResTy FPRC:$Ra),
4905 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4906
4907 def : Pat<(ResTy (fma
4908 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
4909 (ResTy FPRC:$Rn),
4910 (ResTy FPRC:$Ra))),
4911 (ResTy (FMLAI (ResTy FPRC:$Ra),
4912 (ResTy FPRC:$Rn),
4913 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4914 OpNImm:$Imm))>;
4915
4916 // fmls
4917 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
4918 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
4919 (ResTy FPRC:$Ra))),
4920 (ResTy (FMLSI (ResTy FPRC:$Ra),
4921 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4922
4923 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
4924 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
4925 (ResTy FPRC:$Ra))),
4926 (ResTy (FMLSI (ResTy FPRC:$Ra),
4927 (ResTy FPRC:$Rn),
4928 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4929 OpNImm:$Imm))>;
4930
4931 // swapped fmls operands
4932 def : Pat<(ResTy (fma
4933 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
4934 (ResTy FPRC:$Rn),
4935 (ResTy FPRC:$Ra))),
4936 (ResTy (FMLSI (ResTy FPRC:$Ra),
4937 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4938
4939 def : Pat<(ResTy (fma
4940 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
4941 (ResTy FPRC:$Rn),
4942 (ResTy FPRC:$Ra))),
4943 (ResTy (FMLSI (ResTy FPRC:$Ra),
4944 (ResTy FPRC:$Rn),
4945 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4946 OpNImm:$Imm))>;
4947 }
4948
4949 // Scalar Floating Point fused multiply-add and multiply-subtract (scalar, by element)
4950 defm : Neon_ScalarXIndexedElem_FMA_Patterns
4951 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
4952 defm : Neon_ScalarXIndexedElem_FMA_Patterns
4953 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
4954 defm : Neon_ScalarXIndexedElem_FMA_Patterns
4955 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
48234956
48244957 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
48254958 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
49885121 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
49895122 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
49905123 }
5124
5125 multiclass NeonI_Scalar_DUP_Elt_pattern
5126 ValueType OpTy, Operand OpImm,
5127 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5128
5129 def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
5130 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
5131
5132 def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
5133 (ResTy (DUPI
5134 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5135 OpNImm:$Imm))>;
5136 }
5137
5138 // Patterns for vector extract of FP data using scalar DUP instructions
5139 defm : NeonI_Scalar_DUP_Elt_pattern
5140 v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5141 defm : NeonI_Scalar_DUP_Elt_pattern
5142 v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5143
5144 multiclass NeonI_Scalar_DUP_alias
5145 Instruction DUPI, Operand OpImm,
5146 RegisterClass ResRC> {
5147 def : NeonInstAlias
5148 (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
5149 }
5150
5151 // Aliases for Scalar copy - DUP element (scalar)
5152 // FIXME: This is actually the preferred syntax but TableGen can't deal with
5153 // custom printing of aliases.
5154 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
5155 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
5156 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
5157 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
49915158
49925159
49935160 //===----------------------------------------------------------------------===//
0 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
1
2 declare float @llvm.fma.f32(float, float, float)
3 declare double @llvm.fma.f64(double, double, double)
4
5 define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
6 ; CHECK: test_fmla_ss4S
7 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
8 %tmp1 = extractelement <4 x float> %v, i32 3
9 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
10 ret float %tmp2
11 }
12
13 define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
14 ; CHECK: test_fmla_ss4S_swap
15 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
16 %tmp1 = extractelement <4 x float> %v, i32 3
17 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
18 ret float %tmp2
19 }
20
21 define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
22 ; CHECK: test_fmla_ss2S
23 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1]
24 %tmp1 = extractelement <2 x float> %v, i32 1
25 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
26 ret float %tmp2
27 }
28
29 define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
30 ; CHECK: test_fmla_ddD
31 ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0]
32 %tmp1 = extractelement <1 x double> %v, i32 0
33 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
34 ret double %tmp2
35 }
36
37 define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
38 ; CHECK: test_fmla_dd2D
39 ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
40 %tmp1 = extractelement <2 x double> %v, i32 1
41 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
42 ret double %tmp2
43 }
44
45 define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
46 ; CHECK: test_fmla_dd2D_swap
47 ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
48 %tmp1 = extractelement <2 x double> %v, i32 1
49 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
50 ret double %tmp2
51 }
52
53 define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
54 ; CHECK: test_fmls_ss4S
55 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
56 %tmp1 = extractelement <4 x float> %v, i32 3
57 %tmp2 = fsub float -0.0, %tmp1
58 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
59 ret float %tmp3
60 }
61
62 define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
63 ; CHECK: test_fmls_ss4S_swap
64 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
65 %tmp1 = extractelement <4 x float> %v, i32 3
66 %tmp2 = fsub float -0.0, %tmp1
67 %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
68 ret float %tmp3
69 }
70
71
72 define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
73 ; CHECK: test_fmls_ss2S
74 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1]
75 %tmp1 = extractelement <2 x float> %v, i32 1
76 %tmp2 = fsub float -0.0, %tmp1
77 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
78 ret float %tmp3
79 }
80
81 define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
82 ; CHECK: test_fmls_ddD
83 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0]
84 %tmp1 = extractelement <1 x double> %v, i32 0
85 %tmp2 = fsub double -0.0, %tmp1
86 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
87 ret double %tmp3
88 }
89
90 define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
91 ; CHECK: test_fmls_dd2D
92 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
93 %tmp1 = extractelement <2 x double> %v, i32 1
94 %tmp2 = fsub double -0.0, %tmp1
95 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
96 ret double %tmp3
97 }
98
99 define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
100 ; CHECK: test_fmls_dd2D_swap
101 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
102 %tmp1 = extractelement <2 x double> %v, i32 1
103 %tmp2 = fsub double -0.0, %tmp1
104 %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)
105 ret double %tmp3
106 }
107
0 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
1
2 define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) {
3 ; CHECK: test_fmul_lane_ss2S
4 ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
5 %tmp1 = extractelement <2 x float> %v, i32 1
6 %tmp2 = fmul float %a, %tmp1;
7 ret float %tmp2;
8 }
9
10 define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
11 ; CHECK: test_fmul_lane_ss2S_swap
12 ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
13 %tmp1 = extractelement <2 x float> %v, i32 1
14 %tmp2 = fmul float %tmp1, %a;
15 ret float %tmp2;
16 }
17
18
19 define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) {
20 ; CHECK: test_fmul_lane_ss4S
21 ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
22 %tmp1 = extractelement <4 x float> %v, i32 3
23 %tmp2 = fmul float %a, %tmp1;
24 ret float %tmp2;
25 }
26
27 define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
28 ; CHECK: test_fmul_lane_ss4S_swap
29 ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
30 %tmp1 = extractelement <4 x float> %v, i32 3
31 %tmp2 = fmul float %tmp1, %a;
32 ret float %tmp2;
33 }
34
35
36 define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
37 ; CHECK: test_fmul_lane_ddD
38 ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
39 %tmp1 = extractelement <1 x double> %v, i32 0
40 %tmp2 = fmul double %a, %tmp1;
41 ret double %tmp2;
42 }
43
44
45
46 define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
47 ; CHECK: test_fmul_lane_dd2D
48 ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
49 %tmp1 = extractelement <2 x double> %v, i32 1
50 %tmp2 = fmul double %a, %tmp1;
51 ret double %tmp2;
52 }
53
54
55 define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
56 ; CHECK: test_fmul_lane_dd2D_swap
57 ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
58 %tmp1 = extractelement <2 x double> %v, i32 1
59 %tmp2 = fmul double %tmp1, %a;
60 ret double %tmp2;
61 }
62
63 declare float @llvm.aarch64.neon.vmulx.f32(float, float)
64
65 define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
66 ; CHECK: test_fmulx_lane_f32
67 ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
68 %tmp1 = extractelement <2 x float> %v, i32 1
69 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
70 ret float %tmp2;
71 }
72
73 define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
74 ; CHECK: test_fmulx_laneq_f32
75 ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
76 %tmp1 = extractelement <4 x float> %v, i32 3
77 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
78 ret float %tmp2;
79 }
80
81 define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
82 ; CHECK: test_fmulx_laneq_f32_swap
83 ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
84 %tmp1 = extractelement <4 x float> %v, i32 3
85 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a)
86 ret float %tmp2;
87 }
88
89 declare double @llvm.aarch64.neon.vmulx.f64(double, double)
90
91 define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
92 ; CHECK: test_fmulx_lane_f64
93 ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
94 %tmp1 = extractelement <1 x double> %v, i32 0
95 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
96 ret double %tmp2;
97 }
98
99 define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
100 ; CHECK: test_fmulx_laneq_f64_0
101 ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
102 %tmp1 = extractelement <2 x double> %v, i32 0
103 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
104 ret double %tmp2;
105 }
106
107
108 define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
109 ; CHECK: test_fmulx_laneq_f64_1
110 ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
111 %tmp1 = extractelement <2 x double> %v, i32 1
112 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
113 ret double %tmp2;
114 }
115
116 define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
117 ; CHECK: test_fmulx_laneq_f64_1_swap
118 ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
119 %tmp1 = extractelement <2 x double> %v, i32 1
120 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a)
121 ret double %tmp2;
122 }
123
4848 define float @test_vmulxs_f32(float %a, float %b) {
4949 ; CHECK: test_vmulxs_f32
5050 ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
51 %1 = insertelement <1 x float> undef, float %a, i32 0
52 %2 = insertelement <1 x float> undef, float %b, i32 0
53 %3 = call <1 x float> @llvm.aarch64.neon.vmulx.v1f32(<1 x float> %1, <1 x float> %2)
54 %4 = extractelement <1 x float> %3, i32 0
55 ret float %4
51 %1 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %b)
52 ret float %1
5653 }
5754
5855 define double @test_vmulxd_f64(double %a, double %b) {
5956 ; CHECK: test_vmulxd_f64
6057 ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
61 %1 = insertelement <1 x double> undef, double %a, i32 0
62 %2 = insertelement <1 x double> undef, double %b, i32 0
63 %3 = call <1 x double> @llvm.aarch64.neon.vmulx.v1f64(<1 x double> %1, <1 x double> %2)
64 %4 = extractelement <1 x double> %3, i32 0
65 ret double %4
58 %1 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %b)
59 ret double %1
6660 }
6761
68 declare <1 x float> @llvm.aarch64.neon.vmulx.v1f32(<1 x float>, <1 x float>)
69 declare <1 x double> @llvm.aarch64.neon.vmulx.v1f64(<1 x double>, <1 x double>)
62 declare float @llvm.aarch64.neon.vmulx.f32(float, float)
63 declare double @llvm.aarch64.neon.vmulx.f64(double, double)
7064
7165 define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) {
7266 ; CHECK: test_vqdmlalh_s16