llvm.org GIT mirror llvm / 9b379dc
Convert more tests to FileCheck. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@81915 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 10 years ago
6 changed file(s) with 153 addition(s) and 45 deletion(s). Raw diff Collapse all Expand all
None ; RUN: llc < %s -march=arm -mattr=+neon > %t
1 ; RUN: grep veor %t | count 8
2 ; Note: function names do not include "veor" to allow simple grep for opcodes
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
31
42 define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: v_eori8:
4 ;CHECK: veor
55 %tmp1 = load <8 x i8>* %A
66 %tmp2 = load <8 x i8>* %B
77 %tmp3 = xor <8 x i8> %tmp1, %tmp2
99 }
1010
1111 define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: v_eori16:
13 ;CHECK: veor
1214 %tmp1 = load <4 x i16>* %A
1315 %tmp2 = load <4 x i16>* %B
1416 %tmp3 = xor <4 x i16> %tmp1, %tmp2
1618 }
1719
1820 define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: v_eori32:
22 ;CHECK: veor
1923 %tmp1 = load <2 x i32>* %A
2024 %tmp2 = load <2 x i32>* %B
2125 %tmp3 = xor <2 x i32> %tmp1, %tmp2
2327 }
2428
2529 define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
30 ;CHECK: v_eori64:
31 ;CHECK: veor
2632 %tmp1 = load <1 x i64>* %A
2733 %tmp2 = load <1 x i64>* %B
2834 %tmp3 = xor <1 x i64> %tmp1, %tmp2
3036 }
3137
3238 define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
39 ;CHECK: v_eorQi8:
40 ;CHECK: veor
3341 %tmp1 = load <16 x i8>* %A
3442 %tmp2 = load <16 x i8>* %B
3543 %tmp3 = xor <16 x i8> %tmp1, %tmp2
3745 }
3846
3947 define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
48 ;CHECK: v_eorQi16:
49 ;CHECK: veor
4050 %tmp1 = load <8 x i16>* %A
4151 %tmp2 = load <8 x i16>* %B
4252 %tmp3 = xor <8 x i16> %tmp1, %tmp2
4454 }
4555
4656 define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
57 ;CHECK: v_eorQi32:
58 ;CHECK: veor
4759 %tmp1 = load <4 x i32>* %A
4860 %tmp2 = load <4 x i32>* %B
4961 %tmp3 = xor <4 x i32> %tmp1, %tmp2
5163 }
5264
5365 define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
66 ;CHECK: v_eorQi64:
67 ;CHECK: veor
5468 %tmp1 = load <2 x i64>* %A
5569 %tmp2 = load <2 x i64>* %B
5670 %tmp3 = xor <2 x i64> %tmp1, %tmp2
None ; RUN: llc < %s -march=arm -mattr=+neon > %t
1 ; RUN: grep {vceq\\.f32} %t | count 1
2 ; RUN: grep {vcgt\\.f32} %t | count 9
3 ; RUN: grep {vcge\\.f32} %t | count 5
4 ; RUN: grep vorr %t | count 4
5 ; RUN: grep vmvn %t | count 7
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
61
72 ; This tests fcmp operations that do not map directly to NEON instructions.
83
94 ; une is implemented with VCEQ/VMVN
105 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
6 ;CHECK: vcunef32:
7 ;CHECK: vceq.f32
8 ;CHECK-NEXT: vmvn
119 %tmp1 = load <2 x float>* %A
1210 %tmp2 = load <2 x float>* %B
1311 %tmp3 = fcmp une <2 x float> %tmp1, %tmp2
1715
1816 ; olt is implemented with VCGT
1917 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
18 ;CHECK: vcoltf32:
19 ;CHECK: vcgt.f32
2020 %tmp1 = load <2 x float>* %A
2121 %tmp2 = load <2 x float>* %B
2222 %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
2626
2727 ; ole is implemented with VCGE
2828 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
29 ;CHECK: vcolef32:
30 ;CHECK: vcge.f32
2931 %tmp1 = load <2 x float>* %A
3032 %tmp2 = load <2 x float>* %B
3133 %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
3537
3638 ; uge is implemented with VCGT/VMVN
3739 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
40 ;CHECK: vcugef32:
41 ;CHECK: vcgt.f32
42 ;CHECK-NEXT: vmvn
3843 %tmp1 = load <2 x float>* %A
3944 %tmp2 = load <2 x float>* %B
4045 %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
4449
4550 ; ule is implemented with VCGT/VMVN
4651 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
52 ;CHECK: vculef32:
53 ;CHECK: vcgt.f32
54 ;CHECK-NEXT: vmvn
4755 %tmp1 = load <2 x float>* %A
4856 %tmp2 = load <2 x float>* %B
4957 %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
5361
5462 ; ugt is implemented with VCGE/VMVN
5563 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
64 ;CHECK: vcugtf32:
65 ;CHECK: vcge.f32
66 ;CHECK-NEXT: vmvn
5667 %tmp1 = load <2 x float>* %A
5768 %tmp2 = load <2 x float>* %B
5869 %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
6273
6374 ; ult is implemented with VCGE/VMVN
6475 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
76 ;CHECK: vcultf32:
77 ;CHECK: vcge.f32
78 ;CHECK-NEXT: vmvn
6579 %tmp1 = load <2 x float>* %A
6680 %tmp2 = load <2 x float>* %B
6781 %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
7185
7286 ; ueq is implemented with VCGT/VCGT/VORR/VMVN
7387 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
88 ;CHECK: vcueqf32:
89 ;CHECK: vcgt.f32
90 ;CHECK-NEXT: vcgt.f32
91 ;CHECK-NEXT: vorr
92 ;CHECK-NEXT: vmvn
7493 %tmp1 = load <2 x float>* %A
7594 %tmp2 = load <2 x float>* %B
7695 %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
8099
81100 ; one is implemented with VCGT/VCGT/VORR
82101 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
102 ;CHECK: vconef32:
103 ;CHECK: vcgt.f32
104 ;CHECK-NEXT: vcgt.f32
105 ;CHECK-NEXT: vorr
83106 %tmp1 = load <2 x float>* %A
84107 %tmp2 = load <2 x float>* %B
85108 %tmp3 = fcmp one <2 x float> %tmp1, %tmp2
89112
90113 ; uno is implemented with VCGT/VCGE/VORR/VMVN
91114 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
115 ;CHECK: vcunof32:
116 ;CHECK: vcge.f32
117 ;CHECK-NEXT: vcgt.f32
118 ;CHECK-NEXT: vorr
119 ;CHECK-NEXT: vmvn
92120 %tmp1 = load <2 x float>* %A
93121 %tmp2 = load <2 x float>* %B
94122 %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
98126
99127 ; ord is implemented with VCGT/VCGE/VORR
100128 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
129 ;CHECK: vcordf32:
130 ;CHECK: vcge.f32
131 ;CHECK-NEXT: vcgt.f32
132 ;CHECK-NEXT: vorr
101133 %tmp1 = load <2 x float>* %A
102134 %tmp2 = load <2 x float>* %B
103135 %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
None ; RUN: llc < %s -march=arm -mattr=+vfp2 | \
1 ; RUN: grep fabs | count 2
2 ; RUN: llc < %s -march=arm -mattr=+vfp2 | \
3 ; RUN: grep fmscs | count 1
4 ; RUN: llc < %s -march=arm -mattr=+vfp2 | \
5 ; RUN: grep fcvt | count 2
6 ; RUN: llc < %s -march=arm -mattr=+vfp2 | \
7 ; RUN: grep fuito | count 2
8 ; RUN: llc < %s -march=arm -mattr=+vfp2 | \
9 ; RUN: grep fto.i | count 4
10 ; RUN: llc < %s -march=arm -mattr=+vfp2 | \
11 ; RUN: grep bmi | count 1
12 ; RUN: llc < %s -march=arm -mattr=+vfp2 | \
13 ; RUN: grep bgt | count 1
14 ; RUN: llc < %s -march=arm -mattr=+vfp2 | \
15 ; RUN: grep fcmpezs | count 1
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
161
172 define void @test(float* %P, double* %D) {
183 %A = load float* %P ; [#uses=1]
2712 declare double @fabs(double)
2813
2914 define void @test_abs(float* %P, double* %D) {
15 ;CHECK: test_abs:
3016 %a = load float* %P ; [#uses=1]
17 ;CHECK: fabss
3118 %b = call float @fabsf( float %a ) ; [#uses=1]
3219 store float %b, float* %P
3320 %A = load double* %D ; [#uses=1]
21 ;CHECK: fabsd
3422 %B = call double @fabs( double %A ) ; [#uses=1]
3523 store double %B, double* %D
3624 ret void
3725 }
3826
3927 define void @test_add(float* %P, double* %D) {
28 ;CHECK: test_add:
4029 %a = load float* %P ; [#uses=2]
4130 %b = fadd float %a, %a ; [#uses=1]
4231 store float %b, float* %P
4736 }
4837
4938 define void @test_ext_round(float* %P, double* %D) {
39 ;CHECK: test_ext_round:
5040 %a = load float* %P ; [#uses=1]
41 ;CHECK: fcvtds
5142 %b = fpext float %a to double ; [#uses=1]
5243 %A = load double* %D ; [#uses=1]
44 ;CHECK: fcvtsd
5345 %B = fptrunc double %A to float ; [#uses=1]
5446 store double %b, double* %D
5547 store float %B, float* %P
5749 }
5850
5951 define void @test_fma(float* %P1, float* %P2, float* %P3) {
52 ;CHECK: test_fma:
6053 %a1 = load float* %P1 ; [#uses=1]
6154 %a2 = load float* %P2 ; [#uses=1]
6255 %a3 = load float* %P3 ; [#uses=1]
56 ;CHECK: fmscs
6357 %X = fmul float %a1, %a2 ; [#uses=1]
6458 %Y = fsub float %X, %a3 ; [#uses=1]
6559 store float %Y, float* %P1
6761 }
6862
6963 define i32 @test_ftoi(float* %P1) {
64 ;CHECK: test_ftoi:
7065 %a1 = load float* %P1 ; [#uses=1]
66 ;CHECK: ftosizs
7167 %b1 = fptosi float %a1 to i32 ; [#uses=1]
7268 ret i32 %b1
7369 }
7470
7571 define i32 @test_ftou(float* %P1) {
72 ;CHECK: test_ftou:
7673 %a1 = load float* %P1 ; [#uses=1]
74 ;CHECK: ftouizs
7775 %b1 = fptoui float %a1 to i32 ; [#uses=1]
7876 ret i32 %b1
7977 }
8078
8179 define i32 @test_dtoi(double* %P1) {
80 ;CHECK: test_dtoi:
8281 %a1 = load double* %P1 ; [#uses=1]
82 ;CHECK: ftosizd
8383 %b1 = fptosi double %a1 to i32 ; [#uses=1]
8484 ret i32 %b1
8585 }
8686
8787 define i32 @test_dtou(double* %P1) {
88 ;CHECK: test_dtou:
8889 %a1 = load double* %P1 ; [#uses=1]
90 ;CHECK: ftouizd
8991 %b1 = fptoui double %a1 to i32 ; [#uses=1]
9092 ret i32 %b1
9193 }
9294
9395 define void @test_utod(double* %P1, i32 %X) {
96 ;CHECK: test_utod:
97 ;CHECK: fuitod
9498 %b1 = uitofp i32 %X to double ; [#uses=1]
9599 store double %b1, double* %P1
96100 ret void
97101 }
98102
99103 define void @test_utod2(double* %P1, i8 %X) {
104 ;CHECK: test_utod2:
105 ;CHECK: fuitod
100106 %b1 = uitofp i8 %X to double ; [#uses=1]
101107 store double %b1, double* %P1
102108 ret void
103109 }
104110
105111 define void @test_cmp(float* %glob, i32 %X) {
112 ;CHECK: test_cmp:
106113 entry:
107114 %tmp = load float* %glob ; [#uses=2]
108115 %tmp3 = getelementptr float* %glob, i32 2 ; [#uses=1]
110117 %tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4 ; [#uses=1]
111118 %tmp5 = fcmp uno float %tmp, %tmp4 ; [#uses=1]
112119 %tmp6 = or i1 %tmp.upgrd.1, %tmp5 ; [#uses=1]
120 ;CHECK: bmi
121 ;CHECK-NEXT: bgt
113122 br i1 %tmp6, label %cond_true, label %cond_false
114123
115124 cond_true: ; preds = %entry
128137 declare i32 @baz(...)
129138
130139 define void @test_cmpfp0(float* %glob, i32 %X) {
140 ;CHECK: test_cmpfp0:
131141 entry:
132142 %tmp = load float* %glob ; [#uses=1]
143 ;CHECK: fcmpezs
133144 %tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00 ; [#uses=1]
134145 br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
135146
None ; RUN: llc < %s -march=arm -mattr=+neon > %t
1 ; RUN: grep {vmov\\.s8} %t | count 2
2 ; RUN: grep {vmov\\.s16} %t | count 2
3 ; RUN: grep {vmov\\.u8} %t | count 2
4 ; RUN: grep {vmov\\.u16} %t | count 2
5 ; RUN: grep {vmov\\.32} %t | count 2
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
61
72 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
3 ;CHECK: vget_lanes8:
4 ;CHECK: vmov.s8
85 %tmp1 = load <8 x i8>* %A
96 %tmp2 = extractelement <8 x i8> %tmp1, i32 1
107 %tmp3 = sext i8 %tmp2 to i32
129 }
1310
1411 define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
12 ;CHECK: vget_lanes16:
13 ;CHECK: vmov.s16
1514 %tmp1 = load <4 x i16>* %A
1615 %tmp2 = extractelement <4 x i16> %tmp1, i32 1
1716 %tmp3 = sext i16 %tmp2 to i32
1918 }
2019
2120 define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
21 ;CHECK: vget_laneu8:
22 ;CHECK: vmov.u8
2223 %tmp1 = load <8 x i8>* %A
2324 %tmp2 = extractelement <8 x i8> %tmp1, i32 1
2425 %tmp3 = zext i8 %tmp2 to i32
2627 }
2728
2829 define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
30 ;CHECK: vget_laneu16:
31 ;CHECK: vmov.u16
2932 %tmp1 = load <4 x i16>* %A
3033 %tmp2 = extractelement <4 x i16> %tmp1, i32 1
3134 %tmp3 = zext i16 %tmp2 to i32
3437
3538 ; Do a vector add to keep the extraction from being done directly from memory.
3639 define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
40 ;CHECK: vget_lanei32:
41 ;CHECK: vmov.32
3742 %tmp1 = load <2 x i32>* %A
3843 %tmp2 = add <2 x i32> %tmp1, %tmp1
3944 %tmp3 = extractelement <2 x i32> %tmp2, i32 1
4146 }
4247
4348 define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
49 ;CHECK: vgetQ_lanes8:
50 ;CHECK: vmov.s8
4451 %tmp1 = load <16 x i8>* %A
4552 %tmp2 = extractelement <16 x i8> %tmp1, i32 1
4653 %tmp3 = sext i8 %tmp2 to i32
4855 }
4956
5057 define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
58 ;CHECK: vgetQ_lanes16:
59 ;CHECK: vmov.s16
5160 %tmp1 = load <8 x i16>* %A
5261 %tmp2 = extractelement <8 x i16> %tmp1, i32 1
5362 %tmp3 = sext i16 %tmp2 to i32
5564 }
5665
5766 define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
67 ;CHECK: vgetQ_laneu8:
68 ;CHECK: vmov.u8
5869 %tmp1 = load <16 x i8>* %A
5970 %tmp2 = extractelement <16 x i8> %tmp1, i32 1
6071 %tmp3 = zext i8 %tmp2 to i32
6273 }
6374
6475 define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
76 ;CHECK: vgetQ_laneu16:
77 ;CHECK: vmov.u16
6578 %tmp1 = load <8 x i16>* %A
6679 %tmp2 = extractelement <8 x i16> %tmp1, i32 1
6780 %tmp3 = zext i16 %tmp2 to i32
7083
7184 ; Do a vector add to keep the extraction from being done directly from memory.
7285 define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
86 ;CHECK: vgetQ_lanei32:
87 ;CHECK: vmov.32
7388 %tmp1 = load <4 x i32>* %A
7489 %tmp2 = add <4 x i32> %tmp1, %tmp1
7590 %tmp3 = extractelement <4 x i32> %tmp2, i32 1
None ; RUN: llc < %s -march=arm -mattr=+neon > %t
1 ; RUN: grep {vhadd\\.s8} %t | count 2
2 ; RUN: grep {vhadd\\.s16} %t | count 2
3 ; RUN: grep {vhadd\\.s32} %t | count 2
4 ; RUN: grep {vhadd\\.u8} %t | count 2
5 ; RUN: grep {vhadd\\.u16} %t | count 2
6 ; RUN: grep {vhadd\\.u32} %t | count 2
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
71
82 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vhadds8:
4 ;CHECK: vhadd.s8
95 %tmp1 = load <8 x i8>* %A
106 %tmp2 = load <8 x i8>* %B
117 %tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
139 }
1410
1511 define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: vhadds16:
13 ;CHECK: vhadd.s16
1614 %tmp1 = load <4 x i16>* %A
1715 %tmp2 = load <4 x i16>* %B
1816 %tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
2018 }
2119
2220 define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: vhadds32:
22 ;CHECK: vhadd.s32
2323 %tmp1 = load <2 x i32>* %A
2424 %tmp2 = load <2 x i32>* %B
2525 %tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
2727 }
2828
2929 define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
30 ;CHECK: vhaddu8:
31 ;CHECK: vhadd.u8
3032 %tmp1 = load <8 x i8>* %A
3133 %tmp2 = load <8 x i8>* %B
3234 %tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
3436 }
3537
3638 define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
39 ;CHECK: vhaddu16:
40 ;CHECK: vhadd.u16
3741 %tmp1 = load <4 x i16>* %A
3842 %tmp2 = load <4 x i16>* %B
3943 %tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
4145 }
4246
4347 define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
48 ;CHECK: vhaddu32:
49 ;CHECK: vhadd.u32
4450 %tmp1 = load <2 x i32>* %A
4551 %tmp2 = load <2 x i32>* %B
4652 %tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
4854 }
4955
5056 define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
57 ;CHECK: vhaddQs8:
58 ;CHECK: vhadd.s8
5159 %tmp1 = load <16 x i8>* %A
5260 %tmp2 = load <16 x i8>* %B
5361 %tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
5563 }
5664
5765 define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
66 ;CHECK: vhaddQs16:
67 ;CHECK: vhadd.s16
5868 %tmp1 = load <8 x i16>* %A
5969 %tmp2 = load <8 x i16>* %B
6070 %tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
6272 }
6373
6474 define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
75 ;CHECK: vhaddQs32:
76 ;CHECK: vhadd.s32
6577 %tmp1 = load <4 x i32>* %A
6678 %tmp2 = load <4 x i32>* %B
6779 %tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
6981 }
7082
7183 define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
84 ;CHECK: vhaddQu8:
85 ;CHECK: vhadd.u8
7286 %tmp1 = load <16 x i8>* %A
7387 %tmp2 = load <16 x i8>* %B
7488 %tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
7690 }
7791
7892 define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
93 ;CHECK: vhaddQu16:
94 ;CHECK: vhadd.u16
7995 %tmp1 = load <8 x i16>* %A
8096 %tmp2 = load <8 x i16>* %B
8197 %tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
8399 }
84100
85101 define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
102 ;CHECK: vhaddQu32:
103 ;CHECK: vhadd.u32
86104 %tmp1 = load <4 x i32>* %A
87105 %tmp2 = load <4 x i32>* %B
88106 %tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
None ; RUN: llc < %s -march=arm -mattr=+neon > %t
1 ; RUN: grep {vhsub\\.s8} %t | count 2
2 ; RUN: grep {vhsub\\.s16} %t | count 2
3 ; RUN: grep {vhsub\\.s32} %t | count 2
4 ; RUN: grep {vhsub\\.u8} %t | count 2
5 ; RUN: grep {vhsub\\.u16} %t | count 2
6 ; RUN: grep {vhsub\\.u32} %t | count 2
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
71
82 define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vhsubs8:
4 ;CHECK: vhsub.s8
95 %tmp1 = load <8 x i8>* %A
106 %tmp2 = load <8 x i8>* %B
117 %tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
139 }
1410
1511 define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: vhsubs16:
13 ;CHECK: vhsub.s16
1614 %tmp1 = load <4 x i16>* %A
1715 %tmp2 = load <4 x i16>* %B
1816 %tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
2018 }
2119
2220 define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: vhsubs32:
22 ;CHECK: vhsub.s32
2323 %tmp1 = load <2 x i32>* %A
2424 %tmp2 = load <2 x i32>* %B
2525 %tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
2727 }
2828
2929 define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
30 ;CHECK: vhsubu8:
31 ;CHECK: vhsub.u8
3032 %tmp1 = load <8 x i8>* %A
3133 %tmp2 = load <8 x i8>* %B
3234 %tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
3436 }
3537
3638 define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
39 ;CHECK: vhsubu16:
40 ;CHECK: vhsub.u16
3741 %tmp1 = load <4 x i16>* %A
3842 %tmp2 = load <4 x i16>* %B
3943 %tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
4145 }
4246
4347 define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
48 ;CHECK: vhsubu32:
49 ;CHECK: vhsub.u32
4450 %tmp1 = load <2 x i32>* %A
4551 %tmp2 = load <2 x i32>* %B
4652 %tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
4854 }
4955
5056 define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
57 ;CHECK: vhsubQs8:
58 ;CHECK: vhsub.s8
5159 %tmp1 = load <16 x i8>* %A
5260 %tmp2 = load <16 x i8>* %B
5361 %tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
5563 }
5664
5765 define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
66 ;CHECK: vhsubQs16:
67 ;CHECK: vhsub.s16
5868 %tmp1 = load <8 x i16>* %A
5969 %tmp2 = load <8 x i16>* %B
6070 %tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
6272 }
6373
6474 define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
75 ;CHECK: vhsubQs32:
76 ;CHECK: vhsub.s32
6577 %tmp1 = load <4 x i32>* %A
6678 %tmp2 = load <4 x i32>* %B
6779 %tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
6981 }
7082
7183 define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
84 ;CHECK: vhsubQu8:
85 ;CHECK: vhsub.u8
7286 %tmp1 = load <16 x i8>* %A
7387 %tmp2 = load <16 x i8>* %B
7488 %tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
7690 }
7791
7892 define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
93 ;CHECK: vhsubQu16:
94 ;CHECK: vhsub.u16
7995 %tmp1 = load <8 x i16>* %A
8096 %tmp2 = load <8 x i16>* %B
8197 %tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
8399 }
84100
85101 define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
102 ;CHECK: vhsubQu32:
103 ;CHECK: vhsub.u32
86104 %tmp1 = load <4 x i32>* %A
87105 %tmp2 = load <4 x i32>* %B
88106 %tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)