llvm.org GIT mirror llvm / ec1d81c
Update NEON struct names to match llvm-gcc changes. (This is not required for correctness but might help with sanity.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@83415 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 10 years ago
8 changed file(s) with 165 addition(s) and 195 deletion(s). Raw diff Collapse all Expand all
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
11
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> }
2 %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
3 %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
4 %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
5 %struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
66
77 define <8 x i8> @vld2i8(i8* %A) nounwind {
88 ;CHECK: vld2i8:
99 ;CHECK: vld2.8
10 %tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8* %A)
11 %tmp2 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 0
12 %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 1
10 %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A)
11 %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
12 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
1313 %tmp4 = add <8 x i8> %tmp2, %tmp3
1414 ret <8 x i8> %tmp4
1515 }
1717 define <4 x i16> @vld2i16(i16* %A) nounwind {
1818 ;CHECK: vld2i16:
1919 ;CHECK: vld2.16
20 %tmp1 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i16* %A)
21 %tmp2 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 0
22 %tmp3 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 1
20 %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i16* %A)
21 %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
22 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
2323 %tmp4 = add <4 x i16> %tmp2, %tmp3
2424 ret <4 x i16> %tmp4
2525 }
2727 define <2 x i32> @vld2i32(i32* %A) nounwind {
2828 ;CHECK: vld2i32:
2929 ;CHECK: vld2.32
30 %tmp1 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i32* %A)
31 %tmp2 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 0
32 %tmp3 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 1
30 %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i32* %A)
31 %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
32 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
3333 %tmp4 = add <2 x i32> %tmp2, %tmp3
3434 ret <2 x i32> %tmp4
3535 }
3737 define <2 x float> @vld2f(float* %A) nounwind {
3838 ;CHECK: vld2f:
3939 ;CHECK: vld2.32
40 %tmp1 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(float* %A)
41 %tmp2 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 0
42 %tmp3 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 1
40 %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(float* %A)
41 %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
42 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
4343 %tmp4 = add <2 x float> %tmp2, %tmp3
4444 ret <2 x float> %tmp4
4545 }
4646
47 declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
48 declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
49 declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
50 declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
47 declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
48 declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
49 declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
50 declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
11
2 %struct.__builtin_neon_v8qi3 = type { <8 x i8>, <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi3 = type { <4 x i16>, <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si3 = type { <2 x i32>, <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf3 = type { <2 x float>, <2 x float>, <2 x float> }
2 %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
3 %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
4 %struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
5 %struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
66
77 define <8 x i8> @vld3i8(i8* %A) nounwind {
88 ;CHECK: vld3i8:
99 ;CHECK: vld3.8
10 %tmp1 = call %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3.v8i8(i8* %A)
11 %tmp2 = extractvalue %struct.__builtin_neon_v8qi3 %tmp1, 0
12 %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp1, 2
10 %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A)
11 %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
12 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
1313 %tmp4 = add <8 x i8> %tmp2, %tmp3
1414 ret <8 x i8> %tmp4
1515 }
1717 define <4 x i16> @vld3i16(i16* %A) nounwind {
1818 ;CHECK: vld3i16:
1919 ;CHECK: vld3.16
20 %tmp1 = call %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3.v4i16(i16* %A)
21 %tmp2 = extractvalue %struct.__builtin_neon_v4hi3 %tmp1, 0
22 %tmp3 = extractvalue %struct.__builtin_neon_v4hi3 %tmp1, 2
20 %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i16* %A)
21 %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
22 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
2323 %tmp4 = add <4 x i16> %tmp2, %tmp3
2424 ret <4 x i16> %tmp4
2525 }
2727 define <2 x i32> @vld3i32(i32* %A) nounwind {
2828 ;CHECK: vld3i32:
2929 ;CHECK: vld3.32
30 %tmp1 = call %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3.v2i32(i32* %A)
31 %tmp2 = extractvalue %struct.__builtin_neon_v2si3 %tmp1, 0
32 %tmp3 = extractvalue %struct.__builtin_neon_v2si3 %tmp1, 2
30 %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i32* %A)
31 %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
32 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
3333 %tmp4 = add <2 x i32> %tmp2, %tmp3
3434 ret <2 x i32> %tmp4
3535 }
3737 define <2 x float> @vld3f(float* %A) nounwind {
3838 ;CHECK: vld3f:
3939 ;CHECK: vld3.32
40 %tmp1 = call %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3.v2f32(float* %A)
41 %tmp2 = extractvalue %struct.__builtin_neon_v2sf3 %tmp1, 0
42 %tmp3 = extractvalue %struct.__builtin_neon_v2sf3 %tmp1, 2
40 %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(float* %A)
41 %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
42 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
4343 %tmp4 = add <2 x float> %tmp2, %tmp3
4444 ret <2 x float> %tmp4
4545 }
4646
47 declare %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
48 declare %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
49 declare %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
50 declare %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
47 declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
48 declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
49 declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
50 declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
11
2 %struct.__builtin_neon_v8qi4 = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi4 = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si4 = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf4 = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
2 %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
3 %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
4 %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
5 %struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
66
77 define <8 x i8> @vld4i8(i8* %A) nounwind {
88 ;CHECK: vld4i8:
99 ;CHECK: vld4.8
10 %tmp1 = call %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4.v8i8(i8* %A)
11 %tmp2 = extractvalue %struct.__builtin_neon_v8qi4 %tmp1, 0
12 %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp1, 2
10 %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A)
11 %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
12 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
1313 %tmp4 = add <8 x i8> %tmp2, %tmp3
1414 ret <8 x i8> %tmp4
1515 }
1717 define <4 x i16> @vld4i16(i16* %A) nounwind {
1818 ;CHECK: vld4i16:
1919 ;CHECK: vld4.16
20 %tmp1 = call %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4.v4i16(i16* %A)
21 %tmp2 = extractvalue %struct.__builtin_neon_v4hi4 %tmp1, 0
22 %tmp3 = extractvalue %struct.__builtin_neon_v4hi4 %tmp1, 2
20 %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i16* %A)
21 %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
22 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
2323 %tmp4 = add <4 x i16> %tmp2, %tmp3
2424 ret <4 x i16> %tmp4
2525 }
2727 define <2 x i32> @vld4i32(i32* %A) nounwind {
2828 ;CHECK: vld4i32:
2929 ;CHECK: vld4.32
30 %tmp1 = call %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4.v2i32(i32* %A)
31 %tmp2 = extractvalue %struct.__builtin_neon_v2si4 %tmp1, 0
32 %tmp3 = extractvalue %struct.__builtin_neon_v2si4 %tmp1, 2
30 %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i32* %A)
31 %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
32 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
3333 %tmp4 = add <2 x i32> %tmp2, %tmp3
3434 ret <2 x i32> %tmp4
3535 }
3737 define <2 x float> @vld4f(float* %A) nounwind {
3838 ;CHECK: vld4f:
3939 ;CHECK: vld4.32
40 %tmp1 = call %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4.v2f32(float* %A)
41 %tmp2 = extractvalue %struct.__builtin_neon_v2sf4 %tmp1, 0
42 %tmp3 = extractvalue %struct.__builtin_neon_v2sf4 %tmp1, 2
40 %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(float* %A)
41 %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
42 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
4343 %tmp4 = add <2 x float> %tmp2, %tmp3
4444 ret <2 x float> %tmp4
4545 }
4646
47 declare %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
48 declare %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
49 declare %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
50 declare %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
47 declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
48 declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
49 declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
50 declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
11
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> }
2 %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
3 %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
4 %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
5 %struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
66
77 define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
88 ;CHECK: vld2lanei8:
99 ;CHECK: vld2.8
1010 %tmp1 = load <8 x i8>* %B
11 %tmp2 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
12 %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 0
13 %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 1
11 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
12 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
13 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
1414 %tmp5 = add <8 x i8> %tmp3, %tmp4
1515 ret <8 x i8> %tmp5
1616 }
1919 ;CHECK: vld2lanei16:
2020 ;CHECK: vld2.16
2121 %tmp1 = load <4 x i16>* %B
22 %tmp2 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
23 %tmp3 = extractvalue %struct.__builtin_neon_v4hi2 %tmp2, 0
24 %tmp4 = extractvalue %struct.__builtin_neon_v4hi2 %tmp2, 1
22 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
23 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
24 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
2525 %tmp5 = add <4 x i16> %tmp3, %tmp4
2626 ret <4 x i16> %tmp5
2727 }
3030 ;CHECK: vld2lanei32:
3131 ;CHECK: vld2.32
3232 %tmp1 = load <2 x i32>* %B
33 %tmp2 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
34 %tmp3 = extractvalue %struct.__builtin_neon_v2si2 %tmp2, 0
35 %tmp4 = extractvalue %struct.__builtin_neon_v2si2 %tmp2, 1
33 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
34 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
35 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
3636 %tmp5 = add <2 x i32> %tmp3, %tmp4
3737 ret <2 x i32> %tmp5
3838 }
4141 ;CHECK: vld2lanef:
4242 ;CHECK: vld2.32
4343 %tmp1 = load <2 x float>* %B
44 %tmp2 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
45 %tmp3 = extractvalue %struct.__builtin_neon_v2sf2 %tmp2, 0
46 %tmp4 = extractvalue %struct.__builtin_neon_v2sf2 %tmp2, 1
44 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
45 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
46 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
4747 %tmp5 = add <2 x float> %tmp3, %tmp4
4848 ret <2 x float> %tmp5
4949 }
5050
51 declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
52 declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
53 declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
54 declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
51 declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
52 declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
53 declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
54 declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
5555
56 %struct.__builtin_neon_v8qi3 = type { <8 x i8>, <8 x i8>, <8 x i8> }
57 %struct.__builtin_neon_v4hi3 = type { <4 x i16>, <4 x i16>, <4 x i16> }
58 %struct.__builtin_neon_v2si3 = type { <2 x i32>, <2 x i32>, <2 x i32> }
59 %struct.__builtin_neon_v2sf3 = type { <2 x float>, <2 x float>, <2 x float> }
56 %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
57 %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
58 %struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
59 %struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
6060
6161 define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
6262 ;CHECK: vld3lanei8:
6363 ;CHECK: vld3.8
6464 %tmp1 = load <8 x i8>* %B
65 %tmp2 = call %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
66 %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 0
67 %tmp4 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 1
68 %tmp5 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 2
65 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
66 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
67 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
68 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
6969 %tmp6 = add <8 x i8> %tmp3, %tmp4
7070 %tmp7 = add <8 x i8> %tmp5, %tmp6
7171 ret <8 x i8> %tmp7
7575 ;CHECK: vld3lanei16:
7676 ;CHECK: vld3.16
7777 %tmp1 = load <4 x i16>* %B
78 %tmp2 = call %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
79 %tmp3 = extractvalue %struct.__builtin_neon_v4hi3 %tmp2, 0
80 %tmp4 = extractvalue %struct.__builtin_neon_v4hi3 %tmp2, 1
81 %tmp5 = extractvalue %struct.__builtin_neon_v4hi3 %tmp2, 2
78 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
79 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
80 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
81 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
8282 %tmp6 = add <4 x i16> %tmp3, %tmp4
8383 %tmp7 = add <4 x i16> %tmp5, %tmp6
8484 ret <4 x i16> %tmp7
8888 ;CHECK: vld3lanei32:
8989 ;CHECK: vld3.32
9090 %tmp1 = load <2 x i32>* %B
91 %tmp2 = call %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
92 %tmp3 = extractvalue %struct.__builtin_neon_v2si3 %tmp2, 0
93 %tmp4 = extractvalue %struct.__builtin_neon_v2si3 %tmp2, 1
94 %tmp5 = extractvalue %struct.__builtin_neon_v2si3 %tmp2, 2
91 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
92 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
93 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
94 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
9595 %tmp6 = add <2 x i32> %tmp3, %tmp4
9696 %tmp7 = add <2 x i32> %tmp5, %tmp6
9797 ret <2 x i32> %tmp7
101101 ;CHECK: vld3lanef:
102102 ;CHECK: vld3.32
103103 %tmp1 = load <2 x float>* %B
104 %tmp2 = call %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
105 %tmp3 = extractvalue %struct.__builtin_neon_v2sf3 %tmp2, 0
106 %tmp4 = extractvalue %struct.__builtin_neon_v2sf3 %tmp2, 1
107 %tmp5 = extractvalue %struct.__builtin_neon_v2sf3 %tmp2, 2
104 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
105 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
106 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
107 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
108108 %tmp6 = add <2 x float> %tmp3, %tmp4
109109 %tmp7 = add <2 x float> %tmp5, %tmp6
110110 ret <2 x float> %tmp7
111111 }
112112
113 declare %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
114 declare %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
115 declare %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
116 declare %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
113 declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
114 declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
115 declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
116 declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
117117
118 %struct.__builtin_neon_v8qi4 = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
119 %struct.__builtin_neon_v4hi4 = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
120 %struct.__builtin_neon_v2si4 = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
121 %struct.__builtin_neon_v2sf4 = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
118 %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
119 %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
120 %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
121 %struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
122122
123123 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
124124 ;CHECK: vld4lanei8:
125125 ;CHECK: vld4.8
126126 %tmp1 = load <8 x i8>* %B
127 %tmp2 = call %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
128 %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 0
129 %tmp4 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 1
130 %tmp5 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 2
131 %tmp6 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 3
127 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
128 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
129 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
130 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
131 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
132132 %tmp7 = add <8 x i8> %tmp3, %tmp4
133133 %tmp8 = add <8 x i8> %tmp5, %tmp6
134134 %tmp9 = add <8 x i8> %tmp7, %tmp8
139139 ;CHECK: vld4lanei16:
140140 ;CHECK: vld4.16
141141 %tmp1 = load <4 x i16>* %B
142 %tmp2 = call %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
143 %tmp3 = extractvalue %struct.__builtin_neon_v4hi4 %tmp2, 0
144 %tmp4 = extractvalue %struct.__builtin_neon_v4hi4 %tmp2, 1
145 %tmp5 = extractvalue %struct.__builtin_neon_v4hi4 %tmp2, 2
146 %tmp6 = extractvalue %struct.__builtin_neon_v4hi4 %tmp2, 3
142 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
143 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
144 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
145 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
146 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
147147 %tmp7 = add <4 x i16> %tmp3, %tmp4
148148 %tmp8 = add <4 x i16> %tmp5, %tmp6
149149 %tmp9 = add <4 x i16> %tmp7, %tmp8
154154 ;CHECK: vld4lanei32:
155155 ;CHECK: vld4.32
156156 %tmp1 = load <2 x i32>* %B
157 %tmp2 = call %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
158 %tmp3 = extractvalue %struct.__builtin_neon_v2si4 %tmp2, 0
159 %tmp4 = extractvalue %struct.__builtin_neon_v2si4 %tmp2, 1
160 %tmp5 = extractvalue %struct.__builtin_neon_v2si4 %tmp2, 2
161 %tmp6 = extractvalue %struct.__builtin_neon_v2si4 %tmp2, 3
157 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
158 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
159 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
160 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
161 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
162162 %tmp7 = add <2 x i32> %tmp3, %tmp4
163163 %tmp8 = add <2 x i32> %tmp5, %tmp6
164164 %tmp9 = add <2 x i32> %tmp7, %tmp8
169169 ;CHECK: vld4lanef:
170170 ;CHECK: vld4.32
171171 %tmp1 = load <2 x float>* %B
172 %tmp2 = call %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
173 %tmp3 = extractvalue %struct.__builtin_neon_v2sf4 %tmp2, 0
174 %tmp4 = extractvalue %struct.__builtin_neon_v2sf4 %tmp2, 1
175 %tmp5 = extractvalue %struct.__builtin_neon_v2sf4 %tmp2, 2
176 %tmp6 = extractvalue %struct.__builtin_neon_v2sf4 %tmp2, 3
172 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
173 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
174 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
175 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
176 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
177177 %tmp7 = add <2 x float> %tmp3, %tmp4
178178 %tmp8 = add <2 x float> %tmp5, %tmp6
179179 %tmp9 = add <2 x float> %tmp7, %tmp8
180180 ret <2 x float> %tmp9
181181 }
182182
183 declare %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
184 declare %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
185 declare %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
186 declare %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
183 declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
184 declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
185 declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
186 declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
11
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v8qi3 = type { <8 x i8>, <8 x i8>, <8 x i8> }
4 %struct.__builtin_neon_v8qi4 = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
2 %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
3 %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
4 %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
55
66 define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
77 ;CHECK: vtbl1:
1212 ret <8 x i8> %tmp3
1313 }
1414
15 define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__builtin_neon_v8qi2* %B) nounwind {
15 define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
1616 ;CHECK: vtbl2:
1717 ;CHECK: vtbl.8
1818 %tmp1 = load <8 x i8>* %A
19 %tmp2 = load %struct.__builtin_neon_v8qi2* %B
20 %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 0
21 %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 1
19 %tmp2 = load %struct.__neon_int8x8x2_t* %B
20 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
21 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
2222 %tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
2323 ret <8 x i8> %tmp5
2424 }
2525
26 define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__builtin_neon_v8qi3* %B) nounwind {
26 define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
2727 ;CHECK: vtbl3:
2828 ;CHECK: vtbl.8
2929 %tmp1 = load <8 x i8>* %A
30 %tmp2 = load %struct.__builtin_neon_v8qi3* %B
31 %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 0
32 %tmp4 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 1
33 %tmp5 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 2
30 %tmp2 = load %struct.__neon_int8x8x3_t* %B
31 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
32 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
33 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
3434 %tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
3535 ret <8 x i8> %tmp6
3636 }
3737
38 define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__builtin_neon_v8qi4* %B) nounwind {
38 define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
3939 ;CHECK: vtbl4:
4040 ;CHECK: vtbl.8
4141 %tmp1 = load <8 x i8>* %A
42 %tmp2 = load %struct.__builtin_neon_v8qi4* %B
43 %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 0
44 %tmp4 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 1
45 %tmp5 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 2
46 %tmp6 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 3
42 %tmp2 = load %struct.__neon_int8x8x4_t* %B
43 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
44 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
45 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
46 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
4747 %tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
4848 ret <8 x i8> %tmp7
4949 }
5858 ret <8 x i8> %tmp4
5959 }
6060
61 define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__builtin_neon_v8qi2* %B, <8 x i8>* %C) nounwind {
61 define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
6262 ;CHECK: vtbx2:
6363 ;CHECK: vtbx.8
6464 %tmp1 = load <8 x i8>* %A
65 %tmp2 = load %struct.__builtin_neon_v8qi2* %B
66 %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 0
67 %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 1
65 %tmp2 = load %struct.__neon_int8x8x2_t* %B
66 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
67 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
6868 %tmp5 = load <8 x i8>* %C
6969 %tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
7070 ret <8 x i8> %tmp6
7171 }
7272
73 define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__builtin_neon_v8qi3* %B, <8 x i8>* %C) nounwind {
73 define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
7474 ;CHECK: vtbx3:
7575 ;CHECK: vtbx.8
7676 %tmp1 = load <8 x i8>* %A
77 %tmp2 = load %struct.__builtin_neon_v8qi3* %B
78 %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 0
79 %tmp4 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 1
80 %tmp5 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 2
77 %tmp2 = load %struct.__neon_int8x8x3_t* %B
78 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
79 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
80 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
8181 %tmp6 = load <8 x i8>* %C
8282 %tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
8383 ret <8 x i8> %tmp7
8484 }
8585
86 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__builtin_neon_v8qi4* %B, <8 x i8>* %C) nounwind {
86 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
8787 ;CHECK: vtbx4:
8888 ;CHECK: vtbx.8
8989 %tmp1 = load <8 x i8>* %A
90 %tmp2 = load %struct.__builtin_neon_v8qi4* %B
91 %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 0
92 %tmp4 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 1
93 %tmp5 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 2
94 %tmp6 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 3
90 %tmp2 = load %struct.__neon_int8x8x4_t* %B
91 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
92 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
93 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
94 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
9595 %tmp7 = load <8 x i8>* %C
9696 %tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
9797 ret <8 x i8> %tmp8
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> }
6
7 %struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> }
8 %struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> }
9 %struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> }
10 %struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> }
111
122 define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
133 ;CHECK: vtrni8:
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> }
6
7 %struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> }
8 %struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> }
9 %struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> }
10 %struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> }
111
122 define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
133 ;CHECK: vuzpi8:
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 %struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
3 %struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> }
4 %struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> }
5 %struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> }
6
7 %struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> }
8 %struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> }
9 %struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> }
10 %struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> }
111
122 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
133 ;CHECK: vzipi8: