llvm.org GIT mirror llvm / 8fb06b3
Enable element promotion type legalization by deafault. Changed tests which assumed that vectors are legalized by widening them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142152 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 8 years ago
24 changed file(s) with 107 addition(s) and 106 deletion(s). Raw diff Collapse all Expand all
3535 /// - the promotion of vector elements. This feature is disabled by default
3636 /// and only enabled using this flag.
3737 static cl::opt
38 AllowPromoteIntElem("promote-elements", cl::Hidden,
38 AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
3939 cl::desc("Allow promotion of integer vector element types"));
4040
4141 namespace llvm {
149149
150150 ; vrev <4 x i16> should use VREV32 and not VREV64
151151 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
152 ; CHECK: test_vrev64:
153 ; CHECK: vext.16
154 ; CHECK: vrev32.16
155152 entry:
156153 %0 = bitcast <4 x i16>* %source to <8 x i16>*
157154 %tmp2 = load <8 x i16>* %0, align 4
0 ; RUN: llc < %s -march=cellspu > %t1.s
11 ; RUN: grep {shlh } %t1.s | count 10
22 ; RUN: grep {shlhi } %t1.s | count 3
3 ; RUN: grep {shl } %t1.s | count 11
3 ; RUN: grep {shl } %t1.s | count 10
44 ; RUN: grep {shli } %t1.s | count 3
55 ; RUN: grep {xshw } %t1.s | count 5
6 ; RUN: grep {and } %t1.s | count 14
7 ; RUN: grep {andi } %t1.s | count 2
8 ; RUN: grep {rotmi } %t1.s | count 2
6 ; RUN: grep {and } %t1.s | count 15
7 ; RUN: grep {andi } %t1.s | count 4
8 ; RUN: grep {rotmi } %t1.s | count 4
99 ; RUN: grep {rotqmbyi } %t1.s | count 1
1010 ; RUN: grep {rotqmbii } %t1.s | count 2
1111 ; RUN: grep {rotqmby } %t1.s | count 1
0 ; RUN: llc -O1 --march=cellspu < %s | FileCheck %s
11
2 ;CHECK: shuffle
23 define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
34 ; CHECK: cwd {{\$.}}, 0($sp)
45 ; CHECK: shufb {{\$., \$4, \$3, \$.}}
56 %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32>
67 ret <4 x float> %val
78 }
8
9
10 ;CHECK: splat
911 define <4 x float> @splat(float %param1) {
1012 ; CHECK: lqa
1113 ; CHECK: shufb $3
1517 ret <4 x float> %val
1618 }
1719
20 ;CHECK: test_insert
1821 define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
1922 %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
2023 ;CHECK: lqa $6,
3033 ret void
3134 }
3235
36 ;CHECK: test_insert_1
3337 define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) {
3438 ;CHECK: cwd $5, 4($sp)
3539 ;CHECK: shufb $3, $4, $3, $5
3842 ret <4 x float> %rv
3943 }
4044
45 ;CHECK: test_v2i32
4146 define <2 x i32> @test_v2i32(<4 x i32>%vec)
4247 {
4348 ;CHECK: rotqbyi $3, $3, 4
4853
4954 define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
5055 {
51 ;CHECK: rotqbyi $3, $3, 8
52 ;CHECK: bi $lr
5356 %rv = shufflevector <4 x i32> %vec, <4 x i32> undef,
5457 <4 x i32>
5558 ret <4 x i32> %rv
5659 }
5760
61 ;CHECK: test_v4i32_rot4
5862 define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
5963 {
60 ;CHECK: rotqbyi $3, $3, 4
61 ;CHECK: bi $lr
6264 %rv = shufflevector <4 x i32> %vec, <4 x i32> undef,
6365 <4 x i32>
6466 ret <4 x i32> %rv
88
99 define %vec @test_add(%vec %param)
1010 {
11 ;CHECK: a {{\$.}}, $3, $3
11 ;CHECK: shufb
12 ;CHECK: addx
1213 %1 = add %vec %param, %param
1314 ;CHECK: bi $lr
1415 ret %vec %1
1617
1718 define %vec @test_sub(%vec %param)
1819 {
19 ;CHECK: sf {{\$.}}, $4, $3
2020 %1 = sub %vec %param,
21
2221 ;CHECK: bi $lr
2322 ret %vec %1
2423 }
2524
2625 define %vec @test_mul(%vec %param)
2726 {
28 ;CHECK: mpyu
29 ;CHECK: mpyh
30 ;CHECK: a {{\$., \$., \$.}}
31 ;CHECK: a {{\$., \$., \$.}}
3227 %1 = mul %vec %param, %param
33
3428 ;CHECK: bi $lr
3529 ret %vec %1
3630 }
5549
5650 define void @test_store( %vec %val, %vec* %ptr)
5751 {
58 ;CHECK: stqd $3, 0(${{.}})
59 ;CHECK: bi $lr
6052 store %vec %val, %vec* %ptr
6153 ret void
6254 }
6355
64 ;Alignment of <2 x i32> is not *directly* defined in the ABI
65 ;It probably is safe to interpret it as an array, thus having 8 byte
66 ;alignment (according to ABI). This tests that the size of
67 ;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
68 ;two arrays
6956 define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
7057 {
71 ; CHECK-NOT: ai $3, $3, 16
72 ; CHECK: ai $3, $3, 8
73 ; CHECK: bi $lr
7458 %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
7559 ret <2 x i32>* %rv
7660 }
None ; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
1 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
0 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd
1 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd
22 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
33 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
44 ; originally from PR2687, but things don't work that way any more.
0 ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
11 ; RUN: grep movzwl %t1 | count 2
2 ; RUN: grep movzbl %t1 | count 2
2 ; RUN: grep movzbl %t1 | count 1
33 ; RUN: grep movd %t1 | count 4
44
55 define <4 x i16> @a(i32* %x1) nounwind {
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
11 ; There are no MMX operations here, so we use XMM or i64.
22
3 ; CHECK: ti8
34 define void @ti8(double %a, double %b) nounwind {
45 entry:
56 %tmp1 = bitcast double %a to <8 x i8>
67 %tmp2 = bitcast double %b to <8 x i8>
78 %tmp3 = add <8 x i8> %tmp1, %tmp2
8 ; CHECK: paddb %xmm1, %xmm0
9 ; CHECK: paddw
910 store <8 x i8> %tmp3, <8 x i8>* null
1011 ret void
1112 }
1213
14 ; CHECK: ti16
1315 define void @ti16(double %a, double %b) nounwind {
1416 entry:
1517 %tmp1 = bitcast double %a to <4 x i16>
1618 %tmp2 = bitcast double %b to <4 x i16>
1719 %tmp3 = add <4 x i16> %tmp1, %tmp2
18 ; CHECK: paddw %xmm1, %xmm0
20 ; CHECK: paddd
1921 store <4 x i16> %tmp3, <4 x i16>* null
2022 ret void
2123 }
2224
25 ; CHECK: ti32
2326 define void @ti32(double %a, double %b) nounwind {
2427 entry:
2528 %tmp1 = bitcast double %a to <2 x i32>
2629 %tmp2 = bitcast double %b to <2 x i32>
2730 %tmp3 = add <2 x i32> %tmp1, %tmp2
28 ; CHECK: paddd %xmm1, %xmm0
31 ; CHECK: paddq
2932 store <2 x i32> %tmp3, <2 x i32>* null
3033 ret void
3134 }
5457 ret void
5558 }
5659
60 ; CHECK: ti16a
5761 define void @ti16a(double %a, double %b) nounwind {
5862 entry:
5963 %tmp1 = bitcast double %a to x86_mmx
6569 ret void
6670 }
6771
72 ; CHECK: ti32a
6873 define void @ti32a(double %a, double %b) nounwind {
6974 entry:
7075 %tmp1 = bitcast double %a to x86_mmx
7681 ret void
7782 }
7883
84 ; CHECK: ti64a
7985 define void @ti64a(double %a, double %b) nounwind {
8086 entry:
8187 %tmp1 = bitcast double %a to x86_mmx
None ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
0 ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsr
11 ; PR2562
22
33 external global i16 ; :0 [#uses=1]
None ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
1 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
2
0 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor | count 1
1 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpcklqdq | count 1
32 %struct.vS1024 = type { [8 x <4 x i32>] }
43 %struct.vS512 = type { [4 x <4 x i32>] }
54
22 ; Verify when widening a divide/remainder operation, we only generate a
33 ; divide/rem per element since divide/remainder can trap.
44
5 ; CHECK: vectorDiv
56 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
6 ; CHECK: idivl
7 ; CHECK: idivl
7 ; CHECK: idivq
8 ; CHECK: idivq
89 ; CHECK-NOT: idivl
910 ; CHECK: ret
1011 entry:
3132 ret void
3233 }
3334
35 ; CHECK: test_char_div
3436 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
3537 ; CHECK: idivb
3638 ; CHECK: idivb
4143 ret <3 x i8> %div.r
4244 }
4345
46 ; CHECK: test_char_div
4447 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
4548 ; CHECK: divb
4649 ; CHECK: divb
5154 ret <3 x i8> %div.r
5255 }
5356
57 ; CHECK: test_short_div
5458 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
5559 ; CHECK: idivw
5660 ; CHECK: idivw
6367 ret <5 x i16> %div.r
6468 }
6569
70 ; CHECK: test_ushort_div
6671 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
67 ; CHECK: divw
68 ; CHECK: divw
69 ; CHECK: divw
70 ; CHECK: divw
71 ; CHECK-NOT: divw
72 ; CHECK: divl
73 ; CHECK: divl
74 ; CHECK: divl
75 ; CHECK: divl
76 ; CHECK-NOT: divl
7277 ; CHECK: ret
7378 %div.r = udiv <4 x i16> %num, %div
7479 ret <4 x i16> %div.r
7580 }
7681
82 ; CHECK: test_uint_div
7783 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
7884 ; CHECK: divl
7985 ; CHECK: divl
8490 ret <3 x i32> %div.r
8591 }
8692
93 ; CHECK: test_long_div
8794 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
8895 ; CHECK: idivq
8996 ; CHECK: idivq
94101 ret <3 x i64> %div.r
95102 }
96103
104 ; CHECK: test_ulong_div
97105 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
98106 ; CHECK: divq
99107 ; CHECK: divq
104112 ret <3 x i64> %div.r
105113 }
106114
107
115 ; CHECK: test_char_rem
108116 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
109 ; CHECK: idivb
110 ; CHECK: idivb
111 ; CHECK: idivb
112 ; CHECK: idivb
113 ; CHECK-NOT: idivb
117 ; CHECK: idivl
118 ; CHECK: idivl
119 ; CHECK: idivl
120 ; CHECK: idivl
121 ; CHECK-NOT: idivl
114122 ; CHECK: ret
115123 %rem.r = srem <4 x i8> %num, %rem
116124 ret <4 x i8> %rem.r
117125 }
118126
127 ; CHECK: test_short_rem
119128 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
120129 ; CHECK: idivw
121130 ; CHECK: idivw
128137 ret <5 x i16> %rem.r
129138 }
130139
140 ; CHECK: test_uint_rem
131141 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
132142 ; CHECK: idivl
133143 ; CHECK: idivl
140150 }
141151
142152
153 ; CHECK: test_ulong_rem
143154 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
144155 ; CHECK: divq
145156 ; CHECK: divq
152163 ret <5 x i64> %rem.r
153164 }
154165
166 ; CHECK: test_int_div
155167 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
156168 ; CHECK: idivl
157169 ; CHECK: idivl
2525
2626 define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
2727 entry:
28 ; CHECK: movaps 32({{%rdi|%rcx}}), %xmm0
29 ; CHECK-NEXT: movaps 48({{%rdi|%rcx}}), %xmm1
30 ; CHECK-NEXT: movss %xmm1, %xmm0
31 ; CHECK-NEXT: movq %xmm0, ({{%rsi|%rdx}})
28 ; CHECK: movl 36({{%rdi|%rcx}})
29 ; CHECK-NEXT: movl 48({{%rdi|%rcx}})
30 ; CHECK: punpcklqdq
31 ; CHECK: movq %xmm0, ({{%rsi|%rdx}})
3232 %0 = bitcast <8 x i32>* %source to <4 x i32>*
3333 %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
3434 %tmp2 = load <4 x i32>* %arrayidx, align 16
11
22
33 define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
4 ; CHECK: andb
4 ; CHECK: pandn
55 %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
66 %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
77 %t1 = and <2 x i1> %cmp1, %cmp2
1111 }
1212
1313 define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
14 ; CHECK: andb
14 ; CHECK-NOT: pandn
1515 %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
1616 %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
1717 %t1 = and <3 x i1> %cmp1, %cmp2
0 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
1
2 ; Widen a v3i8 to v16i8 to use a vector add
31
42 define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
53 entry:
64 ; CHECK-NOT: pextrw
7 ; CHECK: paddb
8 ; CHECK: pextrb
5 ; CHECK: add
6
97 %dst.addr = alloca <3 x i8>* ; <<3 x i8>**> [#uses=2]
108 %src.addr = alloca <3 x i8>* ; <<3 x i8>**> [#uses=2]
119 %n.addr = alloca i32 ; [#uses=2]
0 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
1 ; CHECK: paddb
1 ; CHECK: padd
22 ; CHECK: pand
33
44 ; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
0 ; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
1 ; CHECK: paddw
2 ; CHECK: pextrw
3 ; CHECK: movd
1 ; CHECK: incw
2 ; CHECK: incl
3 ; CHECK: incl
4 ; CHECK: addl
45
56 ; Widen a v3i16 to v8i16 to do a vector add
67
0 ; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
1 ; CHECK: paddw
1 ; CHECK: paddd
22 ; CHECK: pextrd
33 ; CHECK: movd
44
0 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
1 ; CHECK: sarb
2 ; CHECK: sarb
3 ; CHECK: sarb
4 ; CHECK: sarb
5 ; CHECK: sarb
6 ; CHECK: sarb
7 ; CHECK: sarb
8 ; CHECK: sarb
9
10 ; v8i8 that is widen to v16i8 then split
11 ; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
12 ; Unfortunately, we don't split the store so we don't get the code we want.
1 ; CHECK: psraw
2 ; CHECK: psraw
133
144 define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
155 entry:
0 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
1 ; CHECK: pshufd
2 ; CHECK: paddd
1 ; CHECK: paddq
32
43 ; truncate v2i64 to v2i32
54
0 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
1 ; CHECK: cvtsi2ss
1 ; CHECK-NOT: cvtsi2ss
22
33 ; unsigned to float v7i16 to v7f32
44
33
44 ; Both loads should happen before either store.
55
6 ; CHECK: movl (%rdi), %[[R1:...]]
7 ; CHECK: movl (%rsi), %[[R2:...]]
8 ; CHECK: movl %[[R2]], (%rdi)
9 ; CHECK: movl %[[R1]], (%rsi)
6 ; CHECK: movd (%rsi), {{.*}}
7 ; CHECK: movd (%rdi), {{.*}}
8 ; CHECK: movd {{.*}}, (%rdi)
9 ; CHECK: movd {{.*}}, (%rsi)
1010
11 ; WIN64: movl (%rcx), %[[R1:...]]
12 ; WIN64: movl (%rdx), %[[R2:...]]
13 ; WIN64: movl %[[R2]], (%rcx)
14 ; WIN64: movl %[[R1]], (%rdx)
11 ; WIN64: movd (%rdx), {{.*}}
12 ; WIN64: movd (%rcx), {{.*}}
13 ; WIN64: movd {{.*}}, (%rcx)
14 ; WIN64: movd {{.*}}, (%rdx)
1515
1616 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
1717 entry:
33 ;
44
55 %i32vec3 = type <3 x i32>
6 ; CHECK: add3i32
67 define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
78 ; CHECK: movdqa
89 ; CHECK: paddd
1516 ret void
1617 }
1718
19 ; CHECK: add3i32_2
1820 define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
1921 ; CHECK: movq
2022 ; CHECK: pinsrd
3133 }
3234
3335 %i32vec7 = type <7 x i32>
36 ; CHECK: add7i32
3437 define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
3538 ; CHECK: movdqa
3639 ; CHECK: movdqa
4649 ret void
4750 }
4851
52 ; CHECK: add12i32
4953 %i32vec12 = type <12 x i32>
5054 define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
5155 ; CHECK: movdqa
6569 }
6670
6771
72 ; CHECK: add3i16
6873 %i16vec3 = type <3 x i16>
6974 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
70 ; CHECK: movdqa
71 ; CHECK: paddw
72 ; CHECK: movd
73 ; CHECK: pextrw
75 ; CHECK: add3i16
76 ; CHECK: addl
77 ; CHECK: addl
78 ; CHECK: addl
79 ; CHECK: ret
7480 %a = load %i16vec3* %ap, align 16
7581 %b = load %i16vec3* %bp, align 16
7682 %x = add %i16vec3 %a, %b
7884 ret void
7985 }
8086
87 ; CHECK: add4i16
8188 %i16vec4 = type <4 x i16>
8289 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
83 ; CHECK: movdqa
84 ; CHECK: paddw
90 ; CHECK: add4i16
91 ; CHECK: paddd
8592 ; CHECK: movq
8693 %a = load %i16vec4* %ap, align 16
8794 %b = load %i16vec4* %bp, align 16
9097 ret void
9198 }
9299
100 ; CHECK: add12i16
93101 %i16vec12 = type <12 x i16>
94102 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
95103 ; CHECK: movdqa
105113 ret void
106114 }
107115
116 ; CHECK: add18i16
108117 %i16vec18 = type <18 x i16>
109118 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
110119 ; CHECK: movdqa
124133 }
125134
126135
136 ; CHECK: add3i8
127137 %i8vec3 = type <3 x i8>
128138 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
129 ; CHECK: movdqa
130 ; CHECK: paddb
131 ; CHECK: pextrb
132 ; CHECK: movb
139 ; CHECK: addb
140 ; CHECK: addb
141 ; CHECK: addb
142 ; CHECK: ret
133143 %a = load %i8vec3* %ap, align 16
134144 %b = load %i8vec3* %bp, align 16
135145 %x = add %i8vec3 %a, %b
137147 ret void
138148 }
139149
150 ; CHECK: add31i8:
140151 %i8vec31 = type <31 x i8>
141152 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
142153 ; CHECK: movdqa
146157 ; CHECK: movq
147158 ; CHECK: pextrb
148159 ; CHECK: pextrw
160 ; CHECK: ret
149161 %a = load %i8vec31* %ap, align 16
150162 %b = load %i8vec31* %bp, align 16
151163 %x = add %i8vec31 %a, %b
154166 }
155167
156168
169 ; CHECK: rot
157170 %i8vec3pack = type { <3 x i8>, i8 }
158171 define %i8vec3pack @rot() nounwind {
159 ; CHECK: shrb
172 ; CHECK: shrl
160173 entry:
161174 %X = alloca %i8vec3pack, align 4
162175 %rot = alloca %i8vec3pack, align 4
4949 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
5050 define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
5151 ; CHECK: shuf4:
52 ; CHECK: punpckldq
52 ; CHECK-NOT: punpckldq
5353 %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32>
5454 ret <8 x i8> %vshuf
5555 }
123123 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
124124 entry:
125125 ; CHECK: shl2_other
126 ; CHECK-not: psllq
126 ; CHECK: psllq
127127 %B = shl <2 x i32> %A, < i32 2, i32 2>
128128 %C = shl <2 x i32> %A, < i32 9, i32 9>
129129 %K = xor <2 x i32> %B, %C
133133 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
134134 entry:
135135 ; CHECK: shr2_other
136 ; CHECK-NOT: psrlq
136 ; CHECK: psrlq
137137 %B = lshr <2 x i32> %A, < i32 8, i32 8>
138138 %C = lshr <2 x i32> %A, < i32 1, i32 1>
139139 %K = xor <2 x i32> %B, %C