llvm.org GIT mirror llvm / d9a18c3
[SLPVectorizer][X86] Add vectorization tests for vXi64/vXi32/vXi16/VXi8 shifts git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303069 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
3 changed file(s) with 2589 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP
7
8 @a64 = common global [8 x i64] zeroinitializer, align 64
9 @b64 = common global [8 x i64] zeroinitializer, align 64
10 @c64 = common global [8 x i64] zeroinitializer, align 64
11 @a32 = common global [16 x i32] zeroinitializer, align 64
12 @b32 = common global [16 x i32] zeroinitializer, align 64
13 @c32 = common global [16 x i32] zeroinitializer, align 64
14 @a16 = common global [32 x i16] zeroinitializer, align 64
15 @b16 = common global [32 x i16] zeroinitializer, align 64
16 @c16 = common global [32 x i16] zeroinitializer, align 64
17 @a8 = common global [64 x i8] zeroinitializer, align 64
18 @b8 = common global [64 x i8] zeroinitializer, align 64
19 @c8 = common global [64 x i8] zeroinitializer, align 64
20
21 define void @ashr_v8i64() {
22 ; SSE-LABEL: @ashr_v8i64(
23 ; SSE-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
24 ; SSE-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
25 ; SSE-NEXT: [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
26 ; SSE-NEXT: [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
27 ; SSE-NEXT: [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
28 ; SSE-NEXT: [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
29 ; SSE-NEXT: [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
30 ; SSE-NEXT: [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
31 ; SSE-NEXT: [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
32 ; SSE-NEXT: [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
33 ; SSE-NEXT: [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
34 ; SSE-NEXT: [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
35 ; SSE-NEXT: [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
36 ; SSE-NEXT: [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
37 ; SSE-NEXT: [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
38 ; SSE-NEXT: [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
39 ; SSE-NEXT: [[R0:%.*]] = ashr i64 [[A0]], [[B0]]
40 ; SSE-NEXT: [[R1:%.*]] = ashr i64 [[A1]], [[B1]]
41 ; SSE-NEXT: [[R2:%.*]] = ashr i64 [[A2]], [[B2]]
42 ; SSE-NEXT: [[R3:%.*]] = ashr i64 [[A3]], [[B3]]
43 ; SSE-NEXT: [[R4:%.*]] = ashr i64 [[A4]], [[B4]]
44 ; SSE-NEXT: [[R5:%.*]] = ashr i64 [[A5]], [[B5]]
45 ; SSE-NEXT: [[R6:%.*]] = ashr i64 [[A6]], [[B6]]
46 ; SSE-NEXT: [[R7:%.*]] = ashr i64 [[A7]], [[B7]]
47 ; SSE-NEXT: store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
48 ; SSE-NEXT: store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
49 ; SSE-NEXT: store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
50 ; SSE-NEXT: store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
51 ; SSE-NEXT: store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
52 ; SSE-NEXT: store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
53 ; SSE-NEXT: store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
54 ; SSE-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
55 ; SSE-NEXT: ret void
56 ;
57 ; AVX1-LABEL: @ashr_v8i64(
58 ; AVX1-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
59 ; AVX1-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
60 ; AVX1-NEXT: [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
61 ; AVX1-NEXT: [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
62 ; AVX1-NEXT: [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
63 ; AVX1-NEXT: [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
64 ; AVX1-NEXT: [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
65 ; AVX1-NEXT: [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
66 ; AVX1-NEXT: [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
67 ; AVX1-NEXT: [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
68 ; AVX1-NEXT: [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
69 ; AVX1-NEXT: [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
70 ; AVX1-NEXT: [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
71 ; AVX1-NEXT: [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
72 ; AVX1-NEXT: [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
73 ; AVX1-NEXT: [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
74 ; AVX1-NEXT: [[R0:%.*]] = ashr i64 [[A0]], [[B0]]
75 ; AVX1-NEXT: [[R1:%.*]] = ashr i64 [[A1]], [[B1]]
76 ; AVX1-NEXT: [[R2:%.*]] = ashr i64 [[A2]], [[B2]]
77 ; AVX1-NEXT: [[R3:%.*]] = ashr i64 [[A3]], [[B3]]
78 ; AVX1-NEXT: [[R4:%.*]] = ashr i64 [[A4]], [[B4]]
79 ; AVX1-NEXT: [[R5:%.*]] = ashr i64 [[A5]], [[B5]]
80 ; AVX1-NEXT: [[R6:%.*]] = ashr i64 [[A6]], [[B6]]
81 ; AVX1-NEXT: [[R7:%.*]] = ashr i64 [[A7]], [[B7]]
82 ; AVX1-NEXT: store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
83 ; AVX1-NEXT: store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
84 ; AVX1-NEXT: store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
85 ; AVX1-NEXT: store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
86 ; AVX1-NEXT: store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
87 ; AVX1-NEXT: store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
88 ; AVX1-NEXT: store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
89 ; AVX1-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
90 ; AVX1-NEXT: ret void
91 ;
92 ; AVX2-LABEL: @ashr_v8i64(
93 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
94 ; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
95 ; AVX2-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
96 ; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
97 ; AVX2-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP1]], [[TMP3]]
98 ; AVX2-NEXT: [[TMP6:%.*]] = ashr <4 x i64> [[TMP2]], [[TMP4]]
99 ; AVX2-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
100 ; AVX2-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
101 ; AVX2-NEXT: ret void
102 ;
103 ; AVX512-LABEL: @ashr_v8i64(
104 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
105 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
106 ; AVX512-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[TMP1]], [[TMP2]]
107 ; AVX512-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
108 ; AVX512-NEXT: ret void
109 ;
110 ; XOP-LABEL: @ashr_v8i64(
111 ; XOP-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
112 ; XOP-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
113 ; XOP-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
114 ; XOP-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
115 ; XOP-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP1]], [[TMP3]]
116 ; XOP-NEXT: [[TMP6:%.*]] = ashr <4 x i64> [[TMP2]], [[TMP4]]
117 ; XOP-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
118 ; XOP-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
119 ; XOP-NEXT: ret void
120 ;
121 %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
122 %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
123 %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
124 %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
125 %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
126 %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
127 %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
128 %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
129 %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
130 %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
131 %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
132 %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
133 %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
134 %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
135 %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
136 %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
137 %r0 = ashr i64 %a0, %b0
138 %r1 = ashr i64 %a1, %b1
139 %r2 = ashr i64 %a2, %b2
140 %r3 = ashr i64 %a3, %b3
141 %r4 = ashr i64 %a4, %b4
142 %r5 = ashr i64 %a5, %b5
143 %r6 = ashr i64 %a6, %b6
144 %r7 = ashr i64 %a7, %b7
145 store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
146 store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
147 store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
148 store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
149 store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
150 store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
151 store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
152 store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
153 ret void
154 }
155
156 define void @ashr_v16i32() {
157 ; SSE-LABEL: @ashr_v16i32(
158 ; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
159 ; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
160 ; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
161 ; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
162 ; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
163 ; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
164 ; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
165 ; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
166 ; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
167 ; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
168 ; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
169 ; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
170 ; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
171 ; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
172 ; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
173 ; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
174 ; SSE-NEXT: [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
175 ; SSE-NEXT: [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
176 ; SSE-NEXT: [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
177 ; SSE-NEXT: [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
178 ; SSE-NEXT: [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
179 ; SSE-NEXT: [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
180 ; SSE-NEXT: [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
181 ; SSE-NEXT: [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
182 ; SSE-NEXT: [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
183 ; SSE-NEXT: [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
184 ; SSE-NEXT: [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
185 ; SSE-NEXT: [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
186 ; SSE-NEXT: [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
187 ; SSE-NEXT: [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
188 ; SSE-NEXT: [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
189 ; SSE-NEXT: [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
190 ; SSE-NEXT: [[R0:%.*]] = ashr i32 [[A0]], [[B0]]
191 ; SSE-NEXT: [[R1:%.*]] = ashr i32 [[A1]], [[B1]]
192 ; SSE-NEXT: [[R2:%.*]] = ashr i32 [[A2]], [[B2]]
193 ; SSE-NEXT: [[R3:%.*]] = ashr i32 [[A3]], [[B3]]
194 ; SSE-NEXT: [[R4:%.*]] = ashr i32 [[A4]], [[B4]]
195 ; SSE-NEXT: [[R5:%.*]] = ashr i32 [[A5]], [[B5]]
196 ; SSE-NEXT: [[R6:%.*]] = ashr i32 [[A6]], [[B6]]
197 ; SSE-NEXT: [[R7:%.*]] = ashr i32 [[A7]], [[B7]]
198 ; SSE-NEXT: [[R8:%.*]] = ashr i32 [[A8]], [[B8]]
199 ; SSE-NEXT: [[R9:%.*]] = ashr i32 [[A9]], [[B9]]
200 ; SSE-NEXT: [[R10:%.*]] = ashr i32 [[A10]], [[B10]]
201 ; SSE-NEXT: [[R11:%.*]] = ashr i32 [[A11]], [[B11]]
202 ; SSE-NEXT: [[R12:%.*]] = ashr i32 [[A12]], [[B12]]
203 ; SSE-NEXT: [[R13:%.*]] = ashr i32 [[A13]], [[B13]]
204 ; SSE-NEXT: [[R14:%.*]] = ashr i32 [[A14]], [[B14]]
205 ; SSE-NEXT: [[R15:%.*]] = ashr i32 [[A15]], [[B15]]
206 ; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
207 ; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
208 ; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
209 ; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
210 ; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
211 ; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
212 ; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
213 ; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
214 ; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
215 ; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
216 ; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
217 ; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
218 ; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
219 ; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
220 ; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
221 ; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
222 ; SSE-NEXT: ret void
223 ;
224 ; AVX1-LABEL: @ashr_v16i32(
225 ; AVX1-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
226 ; AVX1-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
227 ; AVX1-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
228 ; AVX1-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
229 ; AVX1-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
230 ; AVX1-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
231 ; AVX1-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
232 ; AVX1-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
233 ; AVX1-NEXT: [[TMP9:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP5]]
234 ; AVX1-NEXT: [[TMP10:%.*]] = ashr <4 x i32> [[TMP2]], [[TMP6]]
235 ; AVX1-NEXT: [[TMP11:%.*]] = ashr <4 x i32> [[TMP3]], [[TMP7]]
236 ; AVX1-NEXT: [[TMP12:%.*]] = ashr <4 x i32> [[TMP4]], [[TMP8]]
237 ; AVX1-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
238 ; AVX1-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
239 ; AVX1-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
240 ; AVX1-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
241 ; AVX1-NEXT: ret void
242 ;
243 ; AVX2-LABEL: @ashr_v16i32(
244 ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
245 ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
246 ; AVX2-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
247 ; AVX2-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
248 ; AVX2-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP1]], [[TMP3]]
249 ; AVX2-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP2]], [[TMP4]]
250 ; AVX2-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
251 ; AVX2-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
252 ; AVX2-NEXT: ret void
253 ;
254 ; AVX512-LABEL: @ashr_v16i32(
255 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
256 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
257 ; AVX512-NEXT: [[TMP3:%.*]] = ashr <16 x i32> [[TMP1]], [[TMP2]]
258 ; AVX512-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
259 ; AVX512-NEXT: ret void
260 ;
261 ; XOP-LABEL: @ashr_v16i32(
262 ; XOP-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
263 ; XOP-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
264 ; XOP-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
265 ; XOP-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
266 ; XOP-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP1]], [[TMP3]]
267 ; XOP-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP2]], [[TMP4]]
268 ; XOP-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
269 ; XOP-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
270 ; XOP-NEXT: ret void
271 ;
272 %a0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
273 %a1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
274 %a2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
275 %a3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
276 %a4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
277 %a5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
278 %a6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
279 %a7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
280 %a8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
281 %a9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
282 %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
283 %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
284 %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
285 %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
286 %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
287 %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
288 %b0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
289 %b1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
290 %b2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
291 %b3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
292 %b4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
293 %b5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
294 %b6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
295 %b7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
296 %b8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
297 %b9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
298 %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
299 %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
300 %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
301 %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
302 %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
303 %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
304 %r0 = ashr i32 %a0 , %b0
305 %r1 = ashr i32 %a1 , %b1
306 %r2 = ashr i32 %a2 , %b2
307 %r3 = ashr i32 %a3 , %b3
308 %r4 = ashr i32 %a4 , %b4
309 %r5 = ashr i32 %a5 , %b5
310 %r6 = ashr i32 %a6 , %b6
311 %r7 = ashr i32 %a7 , %b7
312 %r8 = ashr i32 %a8 , %b8
313 %r9 = ashr i32 %a9 , %b9
314 %r10 = ashr i32 %a10, %b10
315 %r11 = ashr i32 %a11, %b11
316 %r12 = ashr i32 %a12, %b12
317 %r13 = ashr i32 %a13, %b13
318 %r14 = ashr i32 %a14, %b14
319 %r15 = ashr i32 %a15, %b15
320 store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
321 store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
322 store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
323 store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
324 store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
325 store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
326 store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
327 store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
328 store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
329 store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
330 store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
331 store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
332 store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
333 store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
334 store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
335 store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
336 ret void
337 }
338
339 define void @ashr_v32i16() {
340 ; SSE-LABEL: @ashr_v32i16(
341 ; SSE-NEXT: [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
342 ; SSE-NEXT: [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
343 ; SSE-NEXT: [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
344 ; SSE-NEXT: [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
345 ; SSE-NEXT: [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
346 ; SSE-NEXT: [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
347 ; SSE-NEXT: [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
348 ; SSE-NEXT: [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
349 ; SSE-NEXT: [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
350 ; SSE-NEXT: [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
351 ; SSE-NEXT: [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
352 ; SSE-NEXT: [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
353 ; SSE-NEXT: [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
354 ; SSE-NEXT: [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
355 ; SSE-NEXT: [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
356 ; SSE-NEXT: [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
357 ; SSE-NEXT: [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
358 ; SSE-NEXT: [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
359 ; SSE-NEXT: [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
360 ; SSE-NEXT: [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
361 ; SSE-NEXT: [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
362 ; SSE-NEXT: [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
363 ; SSE-NEXT: [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
364 ; SSE-NEXT: [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
365 ; SSE-NEXT: [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
366 ; SSE-NEXT: [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
367 ; SSE-NEXT: [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
368 ; SSE-NEXT: [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
369 ; SSE-NEXT: [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
370 ; SSE-NEXT: [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
371 ; SSE-NEXT: [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
372 ; SSE-NEXT: [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
373 ; SSE-NEXT: [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
374 ; SSE-NEXT: [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
375 ; SSE-NEXT: [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
376 ; SSE-NEXT: [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
377 ; SSE-NEXT: [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
378 ; SSE-NEXT: [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
379 ; SSE-NEXT: [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
380 ; SSE-NEXT: [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
381 ; SSE-NEXT: [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
382 ; SSE-NEXT: [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
383 ; SSE-NEXT: [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
384 ; SSE-NEXT: [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
385 ; SSE-NEXT: [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
386 ; SSE-NEXT: [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
387 ; SSE-NEXT: [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
388 ; SSE-NEXT: [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
389 ; SSE-NEXT: [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
390 ; SSE-NEXT: [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
391 ; SSE-NEXT: [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
392 ; SSE-NEXT: [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
393 ; SSE-NEXT: [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
394 ; SSE-NEXT: [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
395 ; SSE-NEXT: [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
396 ; SSE-NEXT: [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
397 ; SSE-NEXT: [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
398 ; SSE-NEXT: [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
399 ; SSE-NEXT: [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
400 ; SSE-NEXT: [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
401 ; SSE-NEXT: [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
402 ; SSE-NEXT: [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
403 ; SSE-NEXT: [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
404 ; SSE-NEXT: [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
405 ; SSE-NEXT: [[R0:%.*]] = ashr i16 [[A0]], [[B0]]
406 ; SSE-NEXT: [[R1:%.*]] = ashr i16 [[A1]], [[B1]]
407 ; SSE-NEXT: [[R2:%.*]] = ashr i16 [[A2]], [[B2]]
408 ; SSE-NEXT: [[R3:%.*]] = ashr i16 [[A3]], [[B3]]
409 ; SSE-NEXT: [[R4:%.*]] = ashr i16 [[A4]], [[B4]]
410 ; SSE-NEXT: [[R5:%.*]] = ashr i16 [[A5]], [[B5]]
411 ; SSE-NEXT: [[R6:%.*]] = ashr i16 [[A6]], [[B6]]
412 ; SSE-NEXT: [[R7:%.*]] = ashr i16 [[A7]], [[B7]]
413 ; SSE-NEXT: [[R8:%.*]] = ashr i16 [[A8]], [[B8]]
414 ; SSE-NEXT: [[R9:%.*]] = ashr i16 [[A9]], [[B9]]
415 ; SSE-NEXT: [[R10:%.*]] = ashr i16 [[A10]], [[B10]]
416 ; SSE-NEXT: [[R11:%.*]] = ashr i16 [[A11]], [[B11]]
417 ; SSE-NEXT: [[R12:%.*]] = ashr i16 [[A12]], [[B12]]
418 ; SSE-NEXT: [[R13:%.*]] = ashr i16 [[A13]], [[B13]]
419 ; SSE-NEXT: [[R14:%.*]] = ashr i16 [[A14]], [[B14]]
420 ; SSE-NEXT: [[R15:%.*]] = ashr i16 [[A15]], [[B15]]
421 ; SSE-NEXT: [[R16:%.*]] = ashr i16 [[A16]], [[B16]]
422 ; SSE-NEXT: [[R17:%.*]] = ashr i16 [[A17]], [[B17]]
423 ; SSE-NEXT: [[R18:%.*]] = ashr i16 [[A18]], [[B18]]
424 ; SSE-NEXT: [[R19:%.*]] = ashr i16 [[A19]], [[B19]]
425 ; SSE-NEXT: [[R20:%.*]] = ashr i16 [[A20]], [[B20]]
426 ; SSE-NEXT: [[R21:%.*]] = ashr i16 [[A21]], [[B21]]
427 ; SSE-NEXT: [[R22:%.*]] = ashr i16 [[A22]], [[B22]]
428 ; SSE-NEXT: [[R23:%.*]] = ashr i16 [[A23]], [[B23]]
429 ; SSE-NEXT: [[R24:%.*]] = ashr i16 [[A24]], [[B24]]
430 ; SSE-NEXT: [[R25:%.*]] = ashr i16 [[A25]], [[B25]]
431 ; SSE-NEXT: [[R26:%.*]] = ashr i16 [[A26]], [[B26]]
432 ; SSE-NEXT: [[R27:%.*]] = ashr i16 [[A27]], [[B27]]
433 ; SSE-NEXT: [[R28:%.*]] = ashr i16 [[A28]], [[B28]]
434 ; SSE-NEXT: [[R29:%.*]] = ashr i16 [[A29]], [[B29]]
435 ; SSE-NEXT: [[R30:%.*]] = ashr i16 [[A30]], [[B30]]
436 ; SSE-NEXT: [[R31:%.*]] = ashr i16 [[A31]], [[B31]]
437 ; SSE-NEXT: store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
438 ; SSE-NEXT: store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
439 ; SSE-NEXT: store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
440 ; SSE-NEXT: store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
441 ; SSE-NEXT: store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
442 ; SSE-NEXT: store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
443 ; SSE-NEXT: store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
444 ; SSE-NEXT: store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
445 ; SSE-NEXT: store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
446 ; SSE-NEXT: store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
447 ; SSE-NEXT: store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
448 ; SSE-NEXT: store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
449 ; SSE-NEXT: store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
450 ; SSE-NEXT: store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
451 ; SSE-NEXT: store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
452 ; SSE-NEXT: store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
453 ; SSE-NEXT: store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
454 ; SSE-NEXT: store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
455 ; SSE-NEXT: store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
456 ; SSE-NEXT: store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
457 ; SSE-NEXT: store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
458 ; SSE-NEXT: store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
459 ; SSE-NEXT: store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
460 ; SSE-NEXT: store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
461 ; SSE-NEXT: store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
462 ; SSE-NEXT: store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
463 ; SSE-NEXT: store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
464 ; SSE-NEXT: store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
465 ; SSE-NEXT: store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
466 ; SSE-NEXT: store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
467 ; SSE-NEXT: store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
468 ; SSE-NEXT: store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
469 ; SSE-NEXT: ret void
470 ;
471 ; AVX-LABEL: @ashr_v32i16(
472 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
473 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
474 ; AVX-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
475 ; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
476 ; AVX-NEXT: [[TMP5:%.*]] = ashr <16 x i16> [[TMP1]], [[TMP3]]
477 ; AVX-NEXT: [[TMP6:%.*]] = ashr <16 x i16> [[TMP2]], [[TMP4]]
478 ; AVX-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
479 ; AVX-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
480 ; AVX-NEXT: ret void
481 ;
482 ; AVX512-LABEL: @ashr_v32i16(
483 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
484 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
485 ; AVX512-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
486 ; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
487 ; AVX512-NEXT: [[TMP5:%.*]] = ashr <16 x i16> [[TMP1]], [[TMP3]]
488 ; AVX512-NEXT: [[TMP6:%.*]] = ashr <16 x i16> [[TMP2]], [[TMP4]]
489 ; AVX512-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
490 ; AVX512-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
491 ; AVX512-NEXT: ret void
492 ;
493 ; XOP-LABEL: @ashr_v32i16(
494 ; XOP-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
495 ; XOP-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
496 ; XOP-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
497 ; XOP-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
498 ; XOP-NEXT: [[TMP5:%.*]] = ashr <16 x i16> [[TMP1]], [[TMP3]]
499 ; XOP-NEXT: [[TMP6:%.*]] = ashr <16 x i16> [[TMP2]], [[TMP4]]
500 ; XOP-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
501 ; XOP-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
502 ; XOP-NEXT: ret void
503 ;
504 %a0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
505 %a1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
506 %a2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
507 %a3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
508 %a4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
509 %a5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
510 %a6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
511 %a7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
512 %a8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
513 %a9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
514 %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
515 %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
516 %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
517 %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
518 %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
519 %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
520 %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
521 %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
522 %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
523 %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
524 %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
525 %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
526 %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
527 %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
528 %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
529 %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
530 %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
531 %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
532 %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
533 %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
534 %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
535 %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
536 %b0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
537 %b1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
538 %b2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
539 %b3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
540 %b4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
541 %b5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
542 %b6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
543 %b7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
544 %b8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
545 %b9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
546 %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
547 %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
548 %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
549 %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
550 %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
551 %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
552 %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
553 %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
554 %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
555 %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
556 %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
557 %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
558 %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
559 %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
560 %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
561 %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
562 %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
563 %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
564 %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
565 %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
566 %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
567 %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
568 %r0 = ashr i16 %a0 , %b0
569 %r1 = ashr i16 %a1 , %b1
570 %r2 = ashr i16 %a2 , %b2
571 %r3 = ashr i16 %a3 , %b3
572 %r4 = ashr i16 %a4 , %b4
573 %r5 = ashr i16 %a5 , %b5
574 %r6 = ashr i16 %a6 , %b6
575 %r7 = ashr i16 %a7 , %b7
576 %r8 = ashr i16 %a8 , %b8
577 %r9 = ashr i16 %a9 , %b9
578 %r10 = ashr i16 %a10, %b10
579 %r11 = ashr i16 %a11, %b11
580 %r12 = ashr i16 %a12, %b12
581 %r13 = ashr i16 %a13, %b13
582 %r14 = ashr i16 %a14, %b14
583 %r15 = ashr i16 %a15, %b15
584 %r16 = ashr i16 %a16, %b16
585 %r17 = ashr i16 %a17, %b17
586 %r18 = ashr i16 %a18, %b18
587 %r19 = ashr i16 %a19, %b19
588 %r20 = ashr i16 %a20, %b20
589 %r21 = ashr i16 %a21, %b21
590 %r22 = ashr i16 %a22, %b22
591 %r23 = ashr i16 %a23, %b23
592 %r24 = ashr i16 %a24, %b24
593 %r25 = ashr i16 %a25, %b25
594 %r26 = ashr i16 %a26, %b26
595 %r27 = ashr i16 %a27, %b27
596 %r28 = ashr i16 %a28, %b28
597 %r29 = ashr i16 %a29, %b29
598 %r30 = ashr i16 %a30, %b30
599 %r31 = ashr i16 %a31, %b31
600 store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
601 store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
602 store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
603 store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
604 store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
605 store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
606 store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
607 store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
608 store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
609 store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
610 store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
611 store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
612 store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
613 store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
614 store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
615 store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
616 store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
617 store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
618 store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
619 store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
620 store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
621 store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
622 store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
623 store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
624 store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
625 store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
626 store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
627 store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
628 store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
629 store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
630 store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
631 store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
632 ret void
633 }
634
635 define void @ashr_v64i8() {
636 ; CHECK-LABEL: @ashr_v64i8(
637 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
638 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
639 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
640 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
641 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
642 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
643 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
644 ; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
645 ; CHECK-NEXT: [[TMP9:%.*]] = ashr <16 x i8> [[TMP1]], [[TMP5]]
646 ; CHECK-NEXT: [[TMP10:%.*]] = ashr <16 x i8> [[TMP2]], [[TMP6]]
647 ; CHECK-NEXT: [[TMP11:%.*]] = ashr <16 x i8> [[TMP3]], [[TMP7]]
648 ; CHECK-NEXT: [[TMP12:%.*]] = ashr <16 x i8> [[TMP4]], [[TMP8]]
649 ; CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
650 ; CHECK-NEXT: store <16 x i8> [[TMP10]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
651 ; CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
652 ; CHECK-NEXT: store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
653 ; CHECK-NEXT: ret void
654 ;
655 %a0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
656 %a1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
657 %a2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
658 %a3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
659 %a4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
660 %a5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
661 %a6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
662 %a7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
663 %a8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
664 %a9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
665 %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
666 %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
667 %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
668 %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
669 %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
670 %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
671 %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
672 %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
673 %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
674 %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
675 %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
676 %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
677 %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
678 %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
679 %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
680 %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
681 %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
682 %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
683 %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
684 %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
685 %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
686 %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
687 %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
688 %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
689 %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
690 %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
691 %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
692 %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
693 %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
694 %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
695 %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
696 %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
697 %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
698 %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
699 %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
700 %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
701 %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
702 %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
703 %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
704 %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
705 %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
706 %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
707 %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
708 %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
709 %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
710 %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
711 %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
712 %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
713 %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
714 %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
715 %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
716 %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
717 %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
718 %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
719 %b0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
720 %b1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
721 %b2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
722 %b3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
723 %b4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
724 %b5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
725 %b6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
726 %b7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
727 %b8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
728 %b9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
729 %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
730 %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
731 %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
732 %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
733 %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
734 %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
735 %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
736 %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
737 %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
738 %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
739 %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
740 %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
741 %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
742 %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
743 %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
744 %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
745 %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
746 %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
747 %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
748 %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
749 %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
750 %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
751 %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
752 %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
753 %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
754 %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
755 %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
756 %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
757 %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
758 %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
759 %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
760 %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
761 %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
762 %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
763 %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
764 %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
765 %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
766 %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
767 %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
768 %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
769 %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
770 %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
771 %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
772 %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
773 %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
774 %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
775 %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
776 %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
777 %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
778 %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
779 %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
780 %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
781 %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
782 %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
783 %r0 = ashr i8 %a0 , %b0
784 %r1 = ashr i8 %a1 , %b1
785 %r2 = ashr i8 %a2 , %b2
786 %r3 = ashr i8 %a3 , %b3
787 %r4 = ashr i8 %a4 , %b4
788 %r5 = ashr i8 %a5 , %b5
789 %r6 = ashr i8 %a6 , %b6
790 %r7 = ashr i8 %a7 , %b7
791 %r8 = ashr i8 %a8 , %b8
792 %r9 = ashr i8 %a9 , %b9
793 %r10 = ashr i8 %a10, %b10
794 %r11 = ashr i8 %a11, %b11
795 %r12 = ashr i8 %a12, %b12
796 %r13 = ashr i8 %a13, %b13
797 %r14 = ashr i8 %a14, %b14
798 %r15 = ashr i8 %a15, %b15
799 %r16 = ashr i8 %a16, %b16
800 %r17 = ashr i8 %a17, %b17
801 %r18 = ashr i8 %a18, %b18
802 %r19 = ashr i8 %a19, %b19
803 %r20 = ashr i8 %a20, %b20
804 %r21 = ashr i8 %a21, %b21
805 %r22 = ashr i8 %a22, %b22
806 %r23 = ashr i8 %a23, %b23
807 %r24 = ashr i8 %a24, %b24
808 %r25 = ashr i8 %a25, %b25
809 %r26 = ashr i8 %a26, %b26
810 %r27 = ashr i8 %a27, %b27
811 %r28 = ashr i8 %a28, %b28
812 %r29 = ashr i8 %a29, %b29
813 %r30 = ashr i8 %a30, %b30
814 %r31 = ashr i8 %a31, %b31
815 %r32 = ashr i8 %a32, %b32
816 %r33 = ashr i8 %a33, %b33
817 %r34 = ashr i8 %a34, %b34
818 %r35 = ashr i8 %a35, %b35
819 %r36 = ashr i8 %a36, %b36
820 %r37 = ashr i8 %a37, %b37
821 %r38 = ashr i8 %a38, %b38
822 %r39 = ashr i8 %a39, %b39
823 %r40 = ashr i8 %a40, %b40
824 %r41 = ashr i8 %a41, %b41
825 %r42 = ashr i8 %a42, %b42
826 %r43 = ashr i8 %a43, %b43
827 %r44 = ashr i8 %a44, %b44
828 %r45 = ashr i8 %a45, %b45
829 %r46 = ashr i8 %a46, %b46
830 %r47 = ashr i8 %a47, %b47
831 %r48 = ashr i8 %a48, %b48
832 %r49 = ashr i8 %a49, %b49
833 %r50 = ashr i8 %a50, %b50
834 %r51 = ashr i8 %a51, %b51
835 %r52 = ashr i8 %a52, %b52
836 %r53 = ashr i8 %a53, %b53
837 %r54 = ashr i8 %a54, %b54
838 %r55 = ashr i8 %a55, %b55
839 %r56 = ashr i8 %a56, %b56
840 %r57 = ashr i8 %a57, %b57
841 %r58 = ashr i8 %a58, %b58
842 %r59 = ashr i8 %a59, %b59
843 %r60 = ashr i8 %a60, %b60
844 %r61 = ashr i8 %a61, %b61
845 %r62 = ashr i8 %a62, %b62
846 %r63 = ashr i8 %a63, %b63
847 store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
848 store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
849 store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
850 store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
851 store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
852 store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
853 store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
854 store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
855 store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
856 store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
857 store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
858 store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
859 store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
860 store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
861 store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
862 store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
863 store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
864 store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
865 store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
866 store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
867 store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
868 store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
869 store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
870 store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
871 store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
872 store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
873 store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
874 store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
875 store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
876 store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
877 store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
878 store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
879 store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
880 store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
881 store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
882 store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
883 store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
884 store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
885 store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
886 store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
887 store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
888 store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
889 store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
890 store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
891 store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
892 store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
893 store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
894 store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
895 store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
896 store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
897 store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
898 store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
899 store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
900 store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
901 store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
902 store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
903 store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
904 store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
905 store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
906 store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
907 store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
908 store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
909 store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
910 store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
911 ret void
912 }
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP
7
8 @a64 = common global [8 x i64] zeroinitializer, align 64
9 @b64 = common global [8 x i64] zeroinitializer, align 64
10 @c64 = common global [8 x i64] zeroinitializer, align 64
11 @a32 = common global [16 x i32] zeroinitializer, align 64
12 @b32 = common global [16 x i32] zeroinitializer, align 64
13 @c32 = common global [16 x i32] zeroinitializer, align 64
14 @a16 = common global [32 x i16] zeroinitializer, align 64
15 @b16 = common global [32 x i16] zeroinitializer, align 64
16 @c16 = common global [32 x i16] zeroinitializer, align 64
17 @a8 = common global [64 x i8] zeroinitializer, align 64
18 @b8 = common global [64 x i8] zeroinitializer, align 64
19 @c8 = common global [64 x i8] zeroinitializer, align 64
20
21 define void @lshr_v8i64() {
22 ; SSE-LABEL: @lshr_v8i64(
23 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
24 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
25 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
26 ; SSE-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
27 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
28 ; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
29 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
30 ; SSE-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
31 ; SSE-NEXT: [[TMP9:%.*]] = lshr <2 x i64> [[TMP1]], [[TMP5]]
32 ; SSE-NEXT: [[TMP10:%.*]] = lshr <2 x i64> [[TMP2]], [[TMP6]]
33 ; SSE-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP3]], [[TMP7]]
34 ; SSE-NEXT: [[TMP12:%.*]] = lshr <2 x i64> [[TMP4]], [[TMP8]]
35 ; SSE-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
36 ; SSE-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
37 ; SSE-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
38 ; SSE-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
39 ; SSE-NEXT: ret void
40 ;
41 ; AVX1-LABEL: @lshr_v8i64(
42 ; AVX1-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
43 ; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
44 ; AVX1-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
45 ; AVX1-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
46 ; AVX1-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
47 ; AVX1-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
48 ; AVX1-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
49 ; AVX1-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
50 ; AVX1-NEXT: [[TMP9:%.*]] = lshr <2 x i64> [[TMP1]], [[TMP5]]
51 ; AVX1-NEXT: [[TMP10:%.*]] = lshr <2 x i64> [[TMP2]], [[TMP6]]
52 ; AVX1-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP3]], [[TMP7]]
53 ; AVX1-NEXT: [[TMP12:%.*]] = lshr <2 x i64> [[TMP4]], [[TMP8]]
54 ; AVX1-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
55 ; AVX1-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
56 ; AVX1-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
57 ; AVX1-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
58 ; AVX1-NEXT: ret void
59 ;
60 ; AVX2-LABEL: @lshr_v8i64(
61 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
62 ; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
63 ; AVX2-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
64 ; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
65 ; AVX2-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP1]], [[TMP3]]
66 ; AVX2-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[TMP2]], [[TMP4]]
67 ; AVX2-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
68 ; AVX2-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
69 ; AVX2-NEXT: ret void
70 ;
71 ; AVX512-LABEL: @lshr_v8i64(
72 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
73 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
74 ; AVX512-NEXT: [[TMP3:%.*]] = lshr <8 x i64> [[TMP1]], [[TMP2]]
75 ; AVX512-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
76 ; AVX512-NEXT: ret void
77 ;
78 ; XOP-LABEL: @lshr_v8i64(
79 ; XOP-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
80 ; XOP-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
81 ; XOP-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
82 ; XOP-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
83 ; XOP-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP1]], [[TMP3]]
84 ; XOP-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[TMP2]], [[TMP4]]
85 ; XOP-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
86 ; XOP-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
87 ; XOP-NEXT: ret void
88 ;
89 %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
90 %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
91 %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
92 %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
93 %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
94 %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
95 %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
96 %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
97 %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
98 %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
99 %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
100 %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
101 %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
102 %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
103 %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
104 %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
105 %r0 = lshr i64 %a0, %b0
106 %r1 = lshr i64 %a1, %b1
107 %r2 = lshr i64 %a2, %b2
108 %r3 = lshr i64 %a3, %b3
109 %r4 = lshr i64 %a4, %b4
110 %r5 = lshr i64 %a5, %b5
111 %r6 = lshr i64 %a6, %b6
112 %r7 = lshr i64 %a7, %b7
113 store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
114 store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
115 store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
116 store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
117 store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
118 store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
119 store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
120 store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
121 ret void
122 }
123
124 define void @lshr_v16i32() {
125 ; SSE-LABEL: @lshr_v16i32(
126 ; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
127 ; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
128 ; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
129 ; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
130 ; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
131 ; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
132 ; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
133 ; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
134 ; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
135 ; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
136 ; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
137 ; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
138 ; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
139 ; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
140 ; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
141 ; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
142 ; SSE-NEXT: [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
143 ; SSE-NEXT: [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
144 ; SSE-NEXT: [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
145 ; SSE-NEXT: [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
146 ; SSE-NEXT: [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
147 ; SSE-NEXT: [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
148 ; SSE-NEXT: [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
149 ; SSE-NEXT: [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
150 ; SSE-NEXT: [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
151 ; SSE-NEXT: [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
152 ; SSE-NEXT: [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
153 ; SSE-NEXT: [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
154 ; SSE-NEXT: [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
155 ; SSE-NEXT: [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
156 ; SSE-NEXT: [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
157 ; SSE-NEXT: [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
158 ; SSE-NEXT: [[R0:%.*]] = lshr i32 [[A0]], [[B0]]
159 ; SSE-NEXT: [[R1:%.*]] = lshr i32 [[A1]], [[B1]]
160 ; SSE-NEXT: [[R2:%.*]] = lshr i32 [[A2]], [[B2]]
161 ; SSE-NEXT: [[R3:%.*]] = lshr i32 [[A3]], [[B3]]
162 ; SSE-NEXT: [[R4:%.*]] = lshr i32 [[A4]], [[B4]]
163 ; SSE-NEXT: [[R5:%.*]] = lshr i32 [[A5]], [[B5]]
164 ; SSE-NEXT: [[R6:%.*]] = lshr i32 [[A6]], [[B6]]
165 ; SSE-NEXT: [[R7:%.*]] = lshr i32 [[A7]], [[B7]]
166 ; SSE-NEXT: [[R8:%.*]] = lshr i32 [[A8]], [[B8]]
167 ; SSE-NEXT: [[R9:%.*]] = lshr i32 [[A9]], [[B9]]
168 ; SSE-NEXT: [[R10:%.*]] = lshr i32 [[A10]], [[B10]]
169 ; SSE-NEXT: [[R11:%.*]] = lshr i32 [[A11]], [[B11]]
170 ; SSE-NEXT: [[R12:%.*]] = lshr i32 [[A12]], [[B12]]
171 ; SSE-NEXT: [[R13:%.*]] = lshr i32 [[A13]], [[B13]]
172 ; SSE-NEXT: [[R14:%.*]] = lshr i32 [[A14]], [[B14]]
173 ; SSE-NEXT: [[R15:%.*]] = lshr i32 [[A15]], [[B15]]
174 ; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
175 ; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
176 ; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
177 ; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
178 ; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
179 ; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
180 ; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
181 ; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
182 ; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
183 ; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
184 ; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
185 ; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
186 ; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
187 ; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
188 ; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
189 ; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
190 ; SSE-NEXT: ret void
191 ;
192 ; AVX-LABEL: @lshr_v16i32(
193 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
194 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
195 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
196 ; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
197 ; AVX-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[TMP1]], [[TMP3]]
198 ; AVX-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP2]], [[TMP4]]
199 ; AVX-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
200 ; AVX-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
201 ; AVX-NEXT: ret void
202 ;
203 ; AVX512-LABEL: @lshr_v16i32(
204 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
205 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
206 ; AVX512-NEXT: [[TMP3:%.*]] = lshr <16 x i32> [[TMP1]], [[TMP2]]
207 ; AVX512-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
208 ; AVX512-NEXT: ret void
209 ;
210 ; XOP-LABEL: @lshr_v16i32(
211 ; XOP-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
212 ; XOP-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
213 ; XOP-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
214 ; XOP-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
215 ; XOP-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[TMP1]], [[TMP3]]
216 ; XOP-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP2]], [[TMP4]]
217 ; XOP-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
218 ; XOP-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
219 ; XOP-NEXT: ret void
220 ;
221 %a0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
222 %a1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
223 %a2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
224 %a3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
225 %a4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
226 %a5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
227 %a6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
228 %a7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
229 %a8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
230 %a9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
231 %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
232 %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
233 %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
234 %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
235 %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
236 %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
237 %b0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
238 %b1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
239 %b2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
240 %b3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
241 %b4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
242 %b5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
243 %b6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
244 %b7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
245 %b8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
246 %b9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
247 %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
248 %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
249 %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
250 %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
251 %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
252 %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
253 %r0 = lshr i32 %a0 , %b0
254 %r1 = lshr i32 %a1 , %b1
255 %r2 = lshr i32 %a2 , %b2
256 %r3 = lshr i32 %a3 , %b3
257 %r4 = lshr i32 %a4 , %b4
258 %r5 = lshr i32 %a5 , %b5
259 %r6 = lshr i32 %a6 , %b6
260 %r7 = lshr i32 %a7 , %b7
261 %r8 = lshr i32 %a8 , %b8
262 %r9 = lshr i32 %a9 , %b9
263 %r10 = lshr i32 %a10, %b10
264 %r11 = lshr i32 %a11, %b11
265 %r12 = lshr i32 %a12, %b12
266 %r13 = lshr i32 %a13, %b13
267 %r14 = lshr i32 %a14, %b14
268 %r15 = lshr i32 %a15, %b15
269 store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
270 store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
271 store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
272 store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
273 store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
274 store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
275 store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
276 store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
277 store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
278 store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
279 store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
280 store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
281 store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
282 store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
283 store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
284 store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
285 ret void
286 }
287
288 define void @lshr_v32i16() {
289 ; SSE-LABEL: @lshr_v32i16(
290 ; SSE-NEXT: [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
291 ; SSE-NEXT: [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
292 ; SSE-NEXT: [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
293 ; SSE-NEXT: [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
294 ; SSE-NEXT: [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
295 ; SSE-NEXT: [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
296 ; SSE-NEXT: [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
297 ; SSE-NEXT: [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
298 ; SSE-NEXT: [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
299 ; SSE-NEXT: [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
300 ; SSE-NEXT: [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
301 ; SSE-NEXT: [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
302 ; SSE-NEXT: [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
303 ; SSE-NEXT: [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
304 ; SSE-NEXT: [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
305 ; SSE-NEXT: [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
306 ; SSE-NEXT: [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
307 ; SSE-NEXT: [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
308 ; SSE-NEXT: [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
309 ; SSE-NEXT: [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
310 ; SSE-NEXT: [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
311 ; SSE-NEXT: [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
312 ; SSE-NEXT: [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
313 ; SSE-NEXT: [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
314 ; SSE-NEXT: [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
315 ; SSE-NEXT: [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
316 ; SSE-NEXT: [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
317 ; SSE-NEXT: [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
318 ; SSE-NEXT: [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
319 ; SSE-NEXT: [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
320 ; SSE-NEXT: [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
321 ; SSE-NEXT: [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
322 ; SSE-NEXT: [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
323 ; SSE-NEXT: [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
324 ; SSE-NEXT: [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
325 ; SSE-NEXT: [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
326 ; SSE-NEXT: [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
327 ; SSE-NEXT: [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
328 ; SSE-NEXT: [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
329 ; SSE-NEXT: [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
330 ; SSE-NEXT: [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
331 ; SSE-NEXT: [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
332 ; SSE-NEXT: [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
333 ; SSE-NEXT: [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
334 ; SSE-NEXT: [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
335 ; SSE-NEXT: [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
336 ; SSE-NEXT: [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
337 ; SSE-NEXT: [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
338 ; SSE-NEXT: [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
339 ; SSE-NEXT: [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
340 ; SSE-NEXT: [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
341 ; SSE-NEXT: [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
342 ; SSE-NEXT: [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
343 ; SSE-NEXT: [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
344 ; SSE-NEXT: [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
345 ; SSE-NEXT: [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
346 ; SSE-NEXT: [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
347 ; SSE-NEXT: [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
348 ; SSE-NEXT: [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
349 ; SSE-NEXT: [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
350 ; SSE-NEXT: [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
351 ; SSE-NEXT: [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
352 ; SSE-NEXT: [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
353 ; SSE-NEXT: [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
354 ; SSE-NEXT: [[R0:%.*]] = lshr i16 [[A0]], [[B0]]
355 ; SSE-NEXT: [[R1:%.*]] = lshr i16 [[A1]], [[B1]]
356 ; SSE-NEXT: [[R2:%.*]] = lshr i16 [[A2]], [[B2]]
357 ; SSE-NEXT: [[R3:%.*]] = lshr i16 [[A3]], [[B3]]
358 ; SSE-NEXT: [[R4:%.*]] = lshr i16 [[A4]], [[B4]]
359 ; SSE-NEXT: [[R5:%.*]] = lshr i16 [[A5]], [[B5]]
360 ; SSE-NEXT: [[R6:%.*]] = lshr i16 [[A6]], [[B6]]
361 ; SSE-NEXT: [[R7:%.*]] = lshr i16 [[A7]], [[B7]]
362 ; SSE-NEXT: [[R8:%.*]] = lshr i16 [[A8]], [[B8]]
363 ; SSE-NEXT: [[R9:%.*]] = lshr i16 [[A9]], [[B9]]
364 ; SSE-NEXT: [[R10:%.*]] = lshr i16 [[A10]], [[B10]]
365 ; SSE-NEXT: [[R11:%.*]] = lshr i16 [[A11]], [[B11]]
366 ; SSE-NEXT: [[R12:%.*]] = lshr i16 [[A12]], [[B12]]
367 ; SSE-NEXT: [[R13:%.*]] = lshr i16 [[A13]], [[B13]]
368 ; SSE-NEXT: [[R14:%.*]] = lshr i16 [[A14]], [[B14]]
369 ; SSE-NEXT: [[R15:%.*]] = lshr i16 [[A15]], [[B15]]
370 ; SSE-NEXT: [[R16:%.*]] = lshr i16 [[A16]], [[B16]]
371 ; SSE-NEXT: [[R17:%.*]] = lshr i16 [[A17]], [[B17]]
372 ; SSE-NEXT: [[R18:%.*]] = lshr i16 [[A18]], [[B18]]
373 ; SSE-NEXT: [[R19:%.*]] = lshr i16 [[A19]], [[B19]]
374 ; SSE-NEXT: [[R20:%.*]] = lshr i16 [[A20]], [[B20]]
375 ; SSE-NEXT: [[R21:%.*]] = lshr i16 [[A21]], [[B21]]
376 ; SSE-NEXT: [[R22:%.*]] = lshr i16 [[A22]], [[B22]]
377 ; SSE-NEXT: [[R23:%.*]] = lshr i16 [[A23]], [[B23]]
378 ; SSE-NEXT: [[R24:%.*]] = lshr i16 [[A24]], [[B24]]
379 ; SSE-NEXT: [[R25:%.*]] = lshr i16 [[A25]], [[B25]]
380 ; SSE-NEXT: [[R26:%.*]] = lshr i16 [[A26]], [[B26]]
381 ; SSE-NEXT: [[R27:%.*]] = lshr i16 [[A27]], [[B27]]
382 ; SSE-NEXT: [[R28:%.*]] = lshr i16 [[A28]], [[B28]]
383 ; SSE-NEXT: [[R29:%.*]] = lshr i16 [[A29]], [[B29]]
384 ; SSE-NEXT: [[R30:%.*]] = lshr i16 [[A30]], [[B30]]
385 ; SSE-NEXT: [[R31:%.*]] = lshr i16 [[A31]], [[B31]]
386 ; SSE-NEXT: store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
387 ; SSE-NEXT: store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
388 ; SSE-NEXT: store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
389 ; SSE-NEXT: store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
390 ; SSE-NEXT: store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
391 ; SSE-NEXT: store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
392 ; SSE-NEXT: store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
393 ; SSE-NEXT: store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
394 ; SSE-NEXT: store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
395 ; SSE-NEXT: store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
396 ; SSE-NEXT: store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
397 ; SSE-NEXT: store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
398 ; SSE-NEXT: store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
399 ; SSE-NEXT: store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
400 ; SSE-NEXT: store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
401 ; SSE-NEXT: store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
402 ; SSE-NEXT: store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
403 ; SSE-NEXT: store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
404 ; SSE-NEXT: store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
405 ; SSE-NEXT: store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
406 ; SSE-NEXT: store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
407 ; SSE-NEXT: store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
408 ; SSE-NEXT: store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
409 ; SSE-NEXT: store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
410 ; SSE-NEXT: store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
411 ; SSE-NEXT: store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
412 ; SSE-NEXT: store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
413 ; SSE-NEXT: store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
414 ; SSE-NEXT: store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
415 ; SSE-NEXT: store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
416 ; SSE-NEXT: store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
417 ; SSE-NEXT: store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
418 ; SSE-NEXT: ret void
419 ;
420 ; AVX-LABEL: @lshr_v32i16(
421 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
422 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
423 ; AVX-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
424 ; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
425 ; AVX-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP1]], [[TMP3]]
426 ; AVX-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP2]], [[TMP4]]
427 ; AVX-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
428 ; AVX-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
429 ; AVX-NEXT: ret void
430 ;
431 ; AVX512-LABEL: @lshr_v32i16(
432 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
433 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
434 ; AVX512-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
435 ; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
436 ; AVX512-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP1]], [[TMP3]]
437 ; AVX512-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP2]], [[TMP4]]
438 ; AVX512-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
439 ; AVX512-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
440 ; AVX512-NEXT: ret void
441 ;
442 ; XOP-LABEL: @lshr_v32i16(
443 ; XOP-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
444 ; XOP-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
445 ; XOP-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
446 ; XOP-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
447 ; XOP-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP1]], [[TMP3]]
448 ; XOP-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP2]], [[TMP4]]
449 ; XOP-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
450 ; XOP-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
451 ; XOP-NEXT: ret void
452 ;
453 %a0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
454 %a1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
455 %a2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
456 %a3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
457 %a4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
458 %a5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
459 %a6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
460 %a7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
461 %a8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
462 %a9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
463 %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
464 %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
465 %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
466 %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
467 %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
468 %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
469 %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
470 %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
471 %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
472 %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
473 %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
474 %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
475 %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
476 %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
477 %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
478 %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
479 %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
480 %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
481 %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
482 %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
483 %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
484 %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
485 %b0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
486 %b1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
487 %b2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
488 %b3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
489 %b4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
490 %b5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
491 %b6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
492 %b7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
493 %b8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
494 %b9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
495 %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
496 %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
497 %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
498 %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
499 %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
500 %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
501 %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
502 %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
503 %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
504 %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
505 %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
506 %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
507 %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
508 %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
509 %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
510 %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
511 %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
512 %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
513 %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
514 %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
515 %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
516 %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
517 %r0 = lshr i16 %a0 , %b0
518 %r1 = lshr i16 %a1 , %b1
519 %r2 = lshr i16 %a2 , %b2
520 %r3 = lshr i16 %a3 , %b3
521 %r4 = lshr i16 %a4 , %b4
522 %r5 = lshr i16 %a5 , %b5
523 %r6 = lshr i16 %a6 , %b6
524 %r7 = lshr i16 %a7 , %b7
525 %r8 = lshr i16 %a8 , %b8
526 %r9 = lshr i16 %a9 , %b9
527 %r10 = lshr i16 %a10, %b10
528 %r11 = lshr i16 %a11, %b11
529 %r12 = lshr i16 %a12, %b12
530 %r13 = lshr i16 %a13, %b13
531 %r14 = lshr i16 %a14, %b14
532 %r15 = lshr i16 %a15, %b15
533 %r16 = lshr i16 %a16, %b16
534 %r17 = lshr i16 %a17, %b17
535 %r18 = lshr i16 %a18, %b18
536 %r19 = lshr i16 %a19, %b19
537 %r20 = lshr i16 %a20, %b20
538 %r21 = lshr i16 %a21, %b21
539 %r22 = lshr i16 %a22, %b22
540 %r23 = lshr i16 %a23, %b23
541 %r24 = lshr i16 %a24, %b24
542 %r25 = lshr i16 %a25, %b25
543 %r26 = lshr i16 %a26, %b26
544 %r27 = lshr i16 %a27, %b27
545 %r28 = lshr i16 %a28, %b28
546 %r29 = lshr i16 %a29, %b29
547 %r30 = lshr i16 %a30, %b30
548 %r31 = lshr i16 %a31, %b31
549 store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
550 store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
551 store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
552 store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
553 store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
554 store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
555 store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
556 store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
557 store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
558 store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
559 store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
560 store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
561 store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
562 store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
563 store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
564 store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
565 store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
566 store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
567 store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
568 store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
569 store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
570 store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
571 store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
572 store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
573 store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
574 store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
575 store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
576 store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
577 store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
578 store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
579 store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
580 store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
581 ret void
582 }
583
584 define void @lshr_v64i8() {
585 ; CHECK-LABEL: @lshr_v64i8(
586 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
587 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
588 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
589 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
590 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
591 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
592 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
593 ; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
594 ; CHECK-NEXT: [[TMP9:%.*]] = lshr <16 x i8> [[TMP1]], [[TMP5]]
595 ; CHECK-NEXT: [[TMP10:%.*]] = lshr <16 x i8> [[TMP2]], [[TMP6]]
596 ; CHECK-NEXT: [[TMP11:%.*]] = lshr <16 x i8> [[TMP3]], [[TMP7]]
597 ; CHECK-NEXT: [[TMP12:%.*]] = lshr <16 x i8> [[TMP4]], [[TMP8]]
598 ; CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
599 ; CHECK-NEXT: store <16 x i8> [[TMP10]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
600 ; CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
601 ; CHECK-NEXT: store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
602 ; CHECK-NEXT: ret void
603 ;
604 %a0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
605 %a1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
606 %a2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
607 %a3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
608 %a4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
609 %a5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
610 %a6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
611 %a7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
612 %a8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
613 %a9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
614 %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
615 %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
616 %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
617 %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
618 %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
619 %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
620 %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
621 %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
622 %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
623 %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
624 %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
625 %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
626 %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
627 %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
628 %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
629 %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
630 %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
631 %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
632 %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
633 %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
634 %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
635 %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
636 %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
637 %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
638 %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
639 %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
640 %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
641 %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
642 %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
643 %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
644 %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
645 %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
646 %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
647 %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
648 %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
649 %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
650 %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
651 %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
652 %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
653 %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
654 %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
655 %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
656 %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
657 %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
658 %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
659 %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
660 %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
661 %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
662 %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
663 %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
664 %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
665 %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
666 %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
667 %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
668 %b0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
669 %b1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
670 %b2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
671 %b3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
672 %b4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
673 %b5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
674 %b6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
675 %b7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
676 %b8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
677 %b9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
678 %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
679 %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
680 %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
681 %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
682 %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
683 %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
684 %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
685 %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
686 %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
687 %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
688 %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
689 %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
690 %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
691 %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
692 %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
693 %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
694 %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
695 %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
696 %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
697 %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
698 %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
699 %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
700 %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
701 %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
702 %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
703 %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
704 %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
705 %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
706 %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
707 %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
708 %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
709 %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
710 %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
711 %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
712 %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
713 %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
714 %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
715 %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
716 %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
717 %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
718 %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
719 %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
720 %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
721 %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
722 %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
723 %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
724 %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
725 %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
726 %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
727 %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
728 %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
729 %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
730 %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
731 %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
732 %r0 = lshr i8 %a0 , %b0
733 %r1 = lshr i8 %a1 , %b1
734 %r2 = lshr i8 %a2 , %b2
735 %r3 = lshr i8 %a3 , %b3
736 %r4 = lshr i8 %a4 , %b4
737 %r5 = lshr i8 %a5 , %b5
738 %r6 = lshr i8 %a6 , %b6
739 %r7 = lshr i8 %a7 , %b7
740 %r8 = lshr i8 %a8 , %b8
741 %r9 = lshr i8 %a9 , %b9
742 %r10 = lshr i8 %a10, %b10
743 %r11 = lshr i8 %a11, %b11
744 %r12 = lshr i8 %a12, %b12
745 %r13 = lshr i8 %a13, %b13
746 %r14 = lshr i8 %a14, %b14
747 %r15 = lshr i8 %a15, %b15
748 %r16 = lshr i8 %a16, %b16
749 %r17 = lshr i8 %a17, %b17
750 %r18 = lshr i8 %a18, %b18
751 %r19 = lshr i8 %a19, %b19
752 %r20 = lshr i8 %a20, %b20
753 %r21 = lshr i8 %a21, %b21
754 %r22 = lshr i8 %a22, %b22
755 %r23 = lshr i8 %a23, %b23
756 %r24 = lshr i8 %a24, %b24
757 %r25 = lshr i8 %a25, %b25
758 %r26 = lshr i8 %a26, %b26
759 %r27 = lshr i8 %a27, %b27
760 %r28 = lshr i8 %a28, %b28
761 %r29 = lshr i8 %a29, %b29
762 %r30 = lshr i8 %a30, %b30
763 %r31 = lshr i8 %a31, %b31
764 %r32 = lshr i8 %a32, %b32
765 %r33 = lshr i8 %a33, %b33
766 %r34 = lshr i8 %a34, %b34
767 %r35 = lshr i8 %a35, %b35
768 %r36 = lshr i8 %a36, %b36
769 %r37 = lshr i8 %a37, %b37
770 %r38 = lshr i8 %a38, %b38
771 %r39 = lshr i8 %a39, %b39
772 %r40 = lshr i8 %a40, %b40
773 %r41 = lshr i8 %a41, %b41
774 %r42 = lshr i8 %a42, %b42
775 %r43 = lshr i8 %a43, %b43
776 %r44 = lshr i8 %a44, %b44
777 %r45 = lshr i8 %a45, %b45
778 %r46 = lshr i8 %a46, %b46
779 %r47 = lshr i8 %a47, %b47
780 %r48 = lshr i8 %a48, %b48
781 %r49 = lshr i8 %a49, %b49
782 %r50 = lshr i8 %a50, %b50
783 %r51 = lshr i8 %a51, %b51
784 %r52 = lshr i8 %a52, %b52
785 %r53 = lshr i8 %a53, %b53
786 %r54 = lshr i8 %a54, %b54
787 %r55 = lshr i8 %a55, %b55
788 %r56 = lshr i8 %a56, %b56
789 %r57 = lshr i8 %a57, %b57
790 %r58 = lshr i8 %a58, %b58
791 %r59 = lshr i8 %a59, %b59
792 %r60 = lshr i8 %a60, %b60
793 %r61 = lshr i8 %a61, %b61
794 %r62 = lshr i8 %a62, %b62
795 %r63 = lshr i8 %a63, %b63
796 store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
797 store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
798 store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
799 store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
800 store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
801 store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
802 store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
803 store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
804 store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
805 store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
806 store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
807 store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
808 store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
809 store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
810 store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
811 store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
812 store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
813 store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
814 store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
815 store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
816 store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
817 store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
818 store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
819 store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
820 store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
821 store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
822 store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
823 store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
824 store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
825 store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
826 store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
827 store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
828 store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
829 store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
830 store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
831 store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
832 store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
833 store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
834 store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
835 store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
836 store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
837 store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
838 store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
839 store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
840 store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
841 store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
842 store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
843 store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
844 store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
845 store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
846 store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
847 store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
848 store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
849 store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
850 store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
851 store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
852 store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
853 store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
854 store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
855 store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
856 store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
857 store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
858 store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
859 store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
860 ret void
861 }
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP
7
8 @a64 = common global [8 x i64] zeroinitializer, align 64
9 @b64 = common global [8 x i64] zeroinitializer, align 64
10 @c64 = common global [8 x i64] zeroinitializer, align 64
11 @a32 = common global [16 x i32] zeroinitializer, align 64
12 @b32 = common global [16 x i32] zeroinitializer, align 64
13 @c32 = common global [16 x i32] zeroinitializer, align 64
14 @a16 = common global [32 x i16] zeroinitializer, align 64
15 @b16 = common global [32 x i16] zeroinitializer, align 64
16 @c16 = common global [32 x i16] zeroinitializer, align 64
17 @a8 = common global [64 x i8] zeroinitializer, align 64
18 @b8 = common global [64 x i8] zeroinitializer, align 64
19 @c8 = common global [64 x i8] zeroinitializer, align 64
20
21 define void @shl_v8i64() {
22 ; SSE-LABEL: @shl_v8i64(
23 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
24 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
25 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
26 ; SSE-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
27 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
28 ; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
29 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
30 ; SSE-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
31 ; SSE-NEXT: [[TMP9:%.*]] = shl <2 x i64> [[TMP1]], [[TMP5]]
32 ; SSE-NEXT: [[TMP10:%.*]] = shl <2 x i64> [[TMP2]], [[TMP6]]
33 ; SSE-NEXT: [[TMP11:%.*]] = shl <2 x i64> [[TMP3]], [[TMP7]]
34 ; SSE-NEXT: [[TMP12:%.*]] = shl <2 x i64> [[TMP4]], [[TMP8]]
35 ; SSE-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
36 ; SSE-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
37 ; SSE-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
38 ; SSE-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
39 ; SSE-NEXT: ret void
40 ;
41 ; AVX1-LABEL: @shl_v8i64(
42 ; AVX1-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
43 ; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
44 ; AVX1-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
45 ; AVX1-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
46 ; AVX1-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
47 ; AVX1-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
48 ; AVX1-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
49 ; AVX1-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
50 ; AVX1-NEXT: [[TMP9:%.*]] = shl <2 x i64> [[TMP1]], [[TMP5]]
51 ; AVX1-NEXT: [[TMP10:%.*]] = shl <2 x i64> [[TMP2]], [[TMP6]]
52 ; AVX1-NEXT: [[TMP11:%.*]] = shl <2 x i64> [[TMP3]], [[TMP7]]
53 ; AVX1-NEXT: [[TMP12:%.*]] = shl <2 x i64> [[TMP4]], [[TMP8]]
54 ; AVX1-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
55 ; AVX1-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
56 ; AVX1-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
57 ; AVX1-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
58 ; AVX1-NEXT: ret void
59 ;
60 ; AVX2-LABEL: @shl_v8i64(
61 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
62 ; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
63 ; AVX2-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
64 ; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
65 ; AVX2-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]
66 ; AVX2-NEXT: [[TMP6:%.*]] = shl <4 x i64> [[TMP2]], [[TMP4]]
67 ; AVX2-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
68 ; AVX2-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
69 ; AVX2-NEXT: ret void
70 ;
71 ; AVX512-LABEL: @shl_v8i64(
72 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
73 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
74 ; AVX512-NEXT: [[TMP3:%.*]] = shl <8 x i64> [[TMP1]], [[TMP2]]
75 ; AVX512-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
76 ; AVX512-NEXT: ret void
77 ;
78 ; XOP-LABEL: @shl_v8i64(
79 ; XOP-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
80 ; XOP-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
81 ; XOP-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
82 ; XOP-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
83 ; XOP-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]
84 ; XOP-NEXT: [[TMP6:%.*]] = shl <4 x i64> [[TMP2]], [[TMP4]]
85 ; XOP-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
86 ; XOP-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
87 ; XOP-NEXT: ret void
88 ;
89 %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
90 %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
91 %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
92 %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
93 %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
94 %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
95 %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
96 %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
97 %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
98 %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
99 %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
100 %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
101 %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
102 %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
103 %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
104 %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
105 %r0 = shl i64 %a0, %b0
106 %r1 = shl i64 %a1, %b1
107 %r2 = shl i64 %a2, %b2
108 %r3 = shl i64 %a3, %b3
109 %r4 = shl i64 %a4, %b4
110 %r5 = shl i64 %a5, %b5
111 %r6 = shl i64 %a6, %b6
112 %r7 = shl i64 %a7, %b7
113 store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
114 store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
115 store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
116 store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
117 store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
118 store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
119 store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
120 store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
121 ret void
122 }
123
124 define void @shl_v16i32() {
125 ; SSE-LABEL: @shl_v16i32(
126 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
127 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
128 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
129 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
130 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
131 ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
132 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
133 ; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
134 ; SSE-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP1]], [[TMP5]]
135 ; SSE-NEXT: [[TMP10:%.*]] = shl <4 x i32> [[TMP2]], [[TMP6]]
136 ; SSE-NEXT: [[TMP11:%.*]] = shl <4 x i32> [[TMP3]], [[TMP7]]
137 ; SSE-NEXT: [[TMP12:%.*]] = shl <4 x i32> [[TMP4]], [[TMP8]]
138 ; SSE-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
139 ; SSE-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
140 ; SSE-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
141 ; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
142 ; SSE-NEXT: ret void
143 ;
144 ; AVX-LABEL: @shl_v16i32(
145 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
146 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
147 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
148 ; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
149 ; AVX-NEXT: [[TMP5:%.*]] = shl <8 x i32> [[TMP1]], [[TMP3]]
150 ; AVX-NEXT: [[TMP6:%.*]] = shl <8 x i32> [[TMP2]], [[TMP4]]
151 ; AVX-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
152 ; AVX-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
153 ; AVX-NEXT: ret void
154 ;
155 ; AVX512-LABEL: @shl_v16i32(
156 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
157 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
158 ; AVX512-NEXT: [[TMP3:%.*]] = shl <16 x i32> [[TMP1]], [[TMP2]]
159 ; AVX512-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
160 ; AVX512-NEXT: ret void
161 ;
162 ; XOP-LABEL: @shl_v16i32(
163 ; XOP-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
164 ; XOP-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
165 ; XOP-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
166 ; XOP-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
167 ; XOP-NEXT: [[TMP5:%.*]] = shl <8 x i32> [[TMP1]], [[TMP3]]
168 ; XOP-NEXT: [[TMP6:%.*]] = shl <8 x i32> [[TMP2]], [[TMP4]]
169 ; XOP-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
170 ; XOP-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
171 ; XOP-NEXT: ret void
172 ;
173 %a0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
174 %a1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
175 %a2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
176 %a3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
177 %a4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
178 %a5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
179 %a6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
180 %a7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
181 %a8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
182 %a9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
183 %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
184 %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
185 %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
186 %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
187 %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
188 %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
189 %b0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
190 %b1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
191 %b2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
192 %b3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
193 %b4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
194 %b5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
195 %b6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
196 %b7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
197 %b8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
198 %b9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
199 %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
200 %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
201 %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
202 %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
203 %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
204 %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
205 %r0 = shl i32 %a0 , %b0
206 %r1 = shl i32 %a1 , %b1
207 %r2 = shl i32 %a2 , %b2
208 %r3 = shl i32 %a3 , %b3
209 %r4 = shl i32 %a4 , %b4
210 %r5 = shl i32 %a5 , %b5
211 %r6 = shl i32 %a6 , %b6
212 %r7 = shl i32 %a7 , %b7
213 %r8 = shl i32 %a8 , %b8
214 %r9 = shl i32 %a9 , %b9
215 %r10 = shl i32 %a10, %b10
216 %r11 = shl i32 %a11, %b11
217 %r12 = shl i32 %a12, %b12
218 %r13 = shl i32 %a13, %b13
219 %r14 = shl i32 %a14, %b14
220 %r15 = shl i32 %a15, %b15
221 store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
222 store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
223 store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
224 store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
225 store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
226 store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
227 store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
228 store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
229 store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
230 store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
231 store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
232 store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
233 store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
234 store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
235 store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
236 store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
237 ret void
238 }
239
240 define void @shl_v32i16() {
241 ; SSE-LABEL: @shl_v32i16(
242 ; SSE-NEXT: [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
243 ; SSE-NEXT: [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
244 ; SSE-NEXT: [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
245 ; SSE-NEXT: [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
246 ; SSE-NEXT: [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
247 ; SSE-NEXT: [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
248 ; SSE-NEXT: [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
249 ; SSE-NEXT: [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
250 ; SSE-NEXT: [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
251 ; SSE-NEXT: [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
252 ; SSE-NEXT: [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
253 ; SSE-NEXT: [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
254 ; SSE-NEXT: [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
255 ; SSE-NEXT: [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
256 ; SSE-NEXT: [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
257 ; SSE-NEXT: [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
258 ; SSE-NEXT: [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
259 ; SSE-NEXT: [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
260 ; SSE-NEXT: [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
261 ; SSE-NEXT: [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
262 ; SSE-NEXT: [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
263 ; SSE-NEXT: [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
264 ; SSE-NEXT: [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
265 ; SSE-NEXT: [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
266 ; SSE-NEXT: [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
267 ; SSE-NEXT: [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
268 ; SSE-NEXT: [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
269 ; SSE-NEXT: [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
270 ; SSE-NEXT: [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
271 ; SSE-NEXT: [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
272 ; SSE-NEXT: [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
273 ; SSE-NEXT: [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
274 ; SSE-NEXT: [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
275 ; SSE-NEXT: [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
276 ; SSE-NEXT: [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
277 ; SSE-NEXT: [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
278 ; SSE-NEXT: [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
279 ; SSE-NEXT: [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
280 ; SSE-NEXT: [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
281 ; SSE-NEXT: [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
282 ; SSE-NEXT: [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
283 ; SSE-NEXT: [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
284 ; SSE-NEXT: [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
285 ; SSE-NEXT: [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
286 ; SSE-NEXT: [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
287 ; SSE-NEXT: [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
288 ; SSE-NEXT: [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
289 ; SSE-NEXT: [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
290 ; SSE-NEXT: [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
291 ; SSE-NEXT: [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
292 ; SSE-NEXT: [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
293 ; SSE-NEXT: [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
294 ; SSE-NEXT: [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
295 ; SSE-NEXT: [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
296 ; SSE-NEXT: [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
297 ; SSE-NEXT: [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
298 ; SSE-NEXT: [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
299 ; SSE-NEXT: [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
300 ; SSE-NEXT: [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
301 ; SSE-NEXT: [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
302 ; SSE-NEXT: [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
303 ; SSE-NEXT: [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
304 ; SSE-NEXT: [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
305 ; SSE-NEXT: [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
306 ; SSE-NEXT: [[R0:%.*]] = shl i16 [[A0]], [[B0]]
307 ; SSE-NEXT: [[R1:%.*]] = shl i16 [[A1]], [[B1]]
308 ; SSE-NEXT: [[R2:%.*]] = shl i16 [[A2]], [[B2]]
309 ; SSE-NEXT: [[R3:%.*]] = shl i16 [[A3]], [[B3]]
310 ; SSE-NEXT: [[R4:%.*]] = shl i16 [[A4]], [[B4]]
311 ; SSE-NEXT: [[R5:%.*]] = shl i16 [[A5]], [[B5]]
312 ; SSE-NEXT: [[R6:%.*]] = shl i16 [[A6]], [[B6]]
313 ; SSE-NEXT: [[R7:%.*]] = shl i16 [[A7]], [[B7]]
314 ; SSE-NEXT: [[R8:%.*]] = shl i16 [[A8]], [[B8]]
315 ; SSE-NEXT: [[R9:%.*]] = shl i16 [[A9]], [[B9]]
316 ; SSE-NEXT: [[R10:%.*]] = shl i16 [[A10]], [[B10]]
317 ; SSE-NEXT: [[R11:%.*]] = shl i16 [[A11]], [[B11]]
318 ; SSE-NEXT: [[R12:%.*]] = shl i16 [[A12]], [[B12]]
319 ; SSE-NEXT: [[R13:%.*]] = shl i16 [[A13]], [[B13]]
320 ; SSE-NEXT: [[R14:%.*]] = shl i16 [[A14]], [[B14]]
321 ; SSE-NEXT: [[R15:%.*]] = shl i16 [[A15]], [[B15]]
322 ; SSE-NEXT: [[R16:%.*]] = shl i16 [[A16]], [[B16]]
323 ; SSE-NEXT: [[R17:%.*]] = shl i16 [[A17]], [[B17]]
324 ; SSE-NEXT: [[R18:%.*]] = shl i16 [[A18]], [[B18]]
325 ; SSE-NEXT: [[R19:%.*]] = shl i16 [[A19]], [[B19]]
326 ; SSE-NEXT: [[R20:%.*]] = shl i16 [[A20]], [[B20]]
327 ; SSE-NEXT: [[R21:%.*]] = shl i16 [[A21]], [[B21]]
328 ; SSE-NEXT: [[R22:%.*]] = shl i16 [[A22]], [[B22]]
329 ; SSE-NEXT: [[R23:%.*]] = shl i16 [[A23]], [[B23]]
330 ; SSE-NEXT: [[R24:%.*]] = shl i16 [[A24]], [[B24]]
331 ; SSE-NEXT: [[R25:%.*]] = shl i16 [[A25]], [[B25]]
332 ; SSE-NEXT: [[R26:%.*]] = shl i16 [[A26]], [[B26]]
333 ; SSE-NEXT: [[R27:%.*]] = shl i16 [[A27]], [[B27]]
334 ; SSE-NEXT: [[R28:%.*]] = shl i16 [[A28]], [[B28]]
335 ; SSE-NEXT: [[R29:%.*]] = shl i16 [[A29]], [[B29]]
336 ; SSE-NEXT: [[R30:%.*]] = shl i16 [[A30]], [[B30]]
337 ; SSE-NEXT: [[R31:%.*]] = shl i16 [[A31]], [[B31]]
338 ; SSE-NEXT: store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
339 ; SSE-NEXT: store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
340 ; SSE-NEXT: store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
341 ; SSE-NEXT: store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
342 ; SSE-NEXT: store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
343 ; SSE-NEXT: store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
344 ; SSE-NEXT: store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
345 ; SSE-NEXT: store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
346 ; SSE-NEXT: store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
347 ; SSE-NEXT: store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
348 ; SSE-NEXT: store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
349 ; SSE-NEXT: store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
350 ; SSE-NEXT: store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
351 ; SSE-NEXT: store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
352 ; SSE-NEXT: store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
353 ; SSE-NEXT: store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
354 ; SSE-NEXT: store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
355 ; SSE-NEXT: store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
356 ; SSE-NEXT: store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
357 ; SSE-NEXT: store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
358 ; SSE-NEXT: store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
359 ; SSE-NEXT: store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
360 ; SSE-NEXT: store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
361 ; SSE-NEXT: store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
362 ; SSE-NEXT: store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
363 ; SSE-NEXT: store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
364 ; SSE-NEXT: store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
365 ; SSE-NEXT: store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
366 ; SSE-NEXT: store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
367 ; SSE-NEXT: store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
368 ; SSE-NEXT: store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
369 ; SSE-NEXT: store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
370 ; SSE-NEXT: ret void
371 ;
372 ; AVX-LABEL: @shl_v32i16(
373 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
374 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
375 ; AVX-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
376 ; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
377 ; AVX-NEXT: [[TMP5:%.*]] = shl <16 x i16> [[TMP1]], [[TMP3]]
378 ; AVX-NEXT: [[TMP6:%.*]] = shl <16 x i16> [[TMP2]], [[TMP4]]
379 ; AVX-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
380 ; AVX-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
381 ; AVX-NEXT: ret void
382 ;
383 ; AVX512-LABEL: @shl_v32i16(
384 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
385 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
386 ; AVX512-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
387 ; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
388 ; AVX512-NEXT: [[TMP5:%.*]] = shl <16 x i16> [[TMP1]], [[TMP3]]
389 ; AVX512-NEXT: [[TMP6:%.*]] = shl <16 x i16> [[TMP2]], [[TMP4]]
390 ; AVX512-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
391 ; AVX512-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
392 ; AVX512-NEXT: ret void
393 ;
394 ; XOP-LABEL: @shl_v32i16(
395 ; XOP-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
396 ; XOP-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
397 ; XOP-NEXT: [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
398 ; XOP-NEXT: [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
399 ; XOP-NEXT: [[TMP5:%.*]] = shl <16 x i16> [[TMP1]], [[TMP3]]
400 ; XOP-NEXT: [[TMP6:%.*]] = shl <16 x i16> [[TMP2]], [[TMP4]]
401 ; XOP-NEXT: store <16 x i16> [[TMP5]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
402 ; XOP-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
403 ; XOP-NEXT: ret void
404 ;
405 %a0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
406 %a1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
407 %a2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
408 %a3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
409 %a4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
410 %a5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
411 %a6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
412 %a7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
413 %a8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
414 %a9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
415 %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
416 %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
417 %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
418 %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
419 %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
420 %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
421 %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
422 %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
423 %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
424 %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
425 %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
426 %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
427 %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
428 %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
429 %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
430 %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
431 %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
432 %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
433 %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
434 %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
435 %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
436 %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
437 %b0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
438 %b1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
439 %b2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
440 %b3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
441 %b4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
442 %b5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
443 %b6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
444 %b7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
445 %b8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
446 %b9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
447 %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
448 %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
449 %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
450 %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
451 %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
452 %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
453 %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
454 %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
455 %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
456 %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
457 %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
458 %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
459 %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
460 %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
461 %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
462 %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
463 %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
464 %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
465 %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
466 %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
467 %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
468 %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
469 %r0 = shl i16 %a0 , %b0
470 %r1 = shl i16 %a1 , %b1
471 %r2 = shl i16 %a2 , %b2
472 %r3 = shl i16 %a3 , %b3
473 %r4 = shl i16 %a4 , %b4
474 %r5 = shl i16 %a5 , %b5
475 %r6 = shl i16 %a6 , %b6
476 %r7 = shl i16 %a7 , %b7
477 %r8 = shl i16 %a8 , %b8
478 %r9 = shl i16 %a9 , %b9
479 %r10 = shl i16 %a10, %b10
480 %r11 = shl i16 %a11, %b11
481 %r12 = shl i16 %a12, %b12
482 %r13 = shl i16 %a13, %b13
483 %r14 = shl i16 %a14, %b14
484 %r15 = shl i16 %a15, %b15
485 %r16 = shl i16 %a16, %b16
486 %r17 = shl i16 %a17, %b17
487 %r18 = shl i16 %a18, %b18
488 %r19 = shl i16 %a19, %b19
489 %r20 = shl i16 %a20, %b20
490 %r21 = shl i16 %a21, %b21
491 %r22 = shl i16 %a22, %b22
492 %r23 = shl i16 %a23, %b23
493 %r24 = shl i16 %a24, %b24
494 %r25 = shl i16 %a25, %b25
495 %r26 = shl i16 %a26, %b26
496 %r27 = shl i16 %a27, %b27
497 %r28 = shl i16 %a28, %b28
498 %r29 = shl i16 %a29, %b29
499 %r30 = shl i16 %a30, %b30
500 %r31 = shl i16 %a31, %b31
501 store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
502 store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
503 store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
504 store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
505 store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
506 store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
507 store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
508 store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
509 store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
510 store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
511 store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
512 store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
513 store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
514 store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
515 store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
516 store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
517 store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
518 store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
519 store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
520 store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
521 store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
522 store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
523 store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
524 store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
525 store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
526 store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
527 store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
528 store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
529 store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
530 store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
531 store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
532 store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
533 ret void
534 }
535
536 define void @shl_v64i8() {
537 ; CHECK-LABEL: @shl_v64i8(
538 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
539 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
540 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
541 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
542 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
543 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
544 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
545 ; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
546 ; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i8> [[TMP1]], [[TMP5]]
547 ; CHECK-NEXT: [[TMP10:%.*]] = shl <16 x i8> [[TMP2]], [[TMP6]]
548 ; CHECK-NEXT: [[TMP11:%.*]] = shl <16 x i8> [[TMP3]], [[TMP7]]
549 ; CHECK-NEXT: [[TMP12:%.*]] = shl <16 x i8> [[TMP4]], [[TMP8]]
550 ; CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
551 ; CHECK-NEXT: store <16 x i8> [[TMP10]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
552 ; CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
553 ; CHECK-NEXT: store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
554 ; CHECK-NEXT: ret void
555 ;
556 %a0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
557 %a1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
558 %a2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
559 %a3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
560 %a4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
561 %a5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
562 %a6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
563 %a7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
564 %a8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
565 %a9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
566 %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
567 %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
568 %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
569 %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
570 %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
571 %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
572 %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
573 %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
574 %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
575 %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
576 %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
577 %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
578 %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
579 %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
580 %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
581 %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
582 %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
583 %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
584 %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
585 %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
586 %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
587 %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
588 %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
589 %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
590 %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
591 %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
592 %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
593 %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
594 %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
595 %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
596 %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
597 %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
598 %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
599 %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
600 %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
601 %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
602 %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
603 %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
604 %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
605 %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
606 %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
607 %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
608 %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
609 %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
610 %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
611 %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
612 %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
613 %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
614 %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
615 %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
616 %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
617 %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
618 %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
619 %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
620 %b0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
621 %b1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
622 %b2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
623 %b3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
624 %b4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
625 %b5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
626 %b6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
627 %b7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
628 %b8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
629 %b9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
630 %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
631 %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
632 %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
633 %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
634 %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
635 %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
636 %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
637 %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
638 %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
639 %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
640 %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
641 %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
642 %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
643 %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
644 %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
645 %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
646 %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
647 %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
648 %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
649 %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
650 %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
651 %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
652 %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
653 %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
654 %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
655 %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
656 %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
657 %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
658 %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
659 %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
660 %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
661 %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
662 %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
663 %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
664 %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
665 %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
666 %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
667 %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
668 %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
669 %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
670 %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
671 %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
672 %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
673 %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
674 %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
675 %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
676 %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
677 %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
678 %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
679 %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
680 %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
681 %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
682 %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
683 %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
684 %r0 = shl i8 %a0 , %b0
685 %r1 = shl i8 %a1 , %b1
686 %r2 = shl i8 %a2 , %b2
687 %r3 = shl i8 %a3 , %b3
688 %r4 = shl i8 %a4 , %b4
689 %r5 = shl i8 %a5 , %b5
690 %r6 = shl i8 %a6 , %b6
691 %r7 = shl i8 %a7 , %b7
692 %r8 = shl i8 %a8 , %b8
693 %r9 = shl i8 %a9 , %b9
694 %r10 = shl i8 %a10, %b10
695 %r11 = shl i8 %a11, %b11
696 %r12 = shl i8 %a12, %b12
697 %r13 = shl i8 %a13, %b13
698 %r14 = shl i8 %a14, %b14
699 %r15 = shl i8 %a15, %b15
700 %r16 = shl i8 %a16, %b16
701 %r17 = shl i8 %a17, %b17
702 %r18 = shl i8 %a18, %b18
703 %r19 = shl i8 %a19, %b19
704 %r20 = shl i8 %a20, %b20
705 %r21 = shl i8 %a21, %b21
706 %r22 = shl i8 %a22, %b22
707 %r23 = shl i8 %a23, %b23
708 %r24 = shl i8 %a24, %b24
709 %r25 = shl i8 %a25, %b25
710 %r26 = shl i8 %a26, %b26
711 %r27 = shl i8 %a27, %b27
712 %r28 = shl i8 %a28, %b28
713 %r29 = shl i8 %a29, %b29
714 %r30 = shl i8 %a30, %b30
715 %r31 = shl i8 %a31, %b31
716 %r32 = shl i8 %a32, %b32
717 %r33 = shl i8 %a33, %b33
718 %r34 = shl i8 %a34, %b34
719 %r35 = shl i8 %a35, %b35
720 %r36 = shl i8 %a36, %b36
721 %r37 = shl i8 %a37, %b37
722 %r38 = shl i8 %a38, %b38
723 %r39 = shl i8 %a39, %b39
724 %r40 = shl i8 %a40, %b40
725 %r41 = shl i8 %a41, %b41
726 %r42 = shl i8 %a42, %b42
727 %r43 = shl i8 %a43, %b43
728 %r44 = shl i8 %a44, %b44
729 %r45 = shl i8 %a45, %b45
730 %r46 = shl i8 %a46, %b46
731 %r47 = shl i8 %a47, %b47
732 %r48 = shl i8 %a48, %b48
733 %r49 = shl i8 %a49, %b49
734 %r50 = shl i8 %a50, %b50
735 %r51 = shl i8 %a51, %b51
736 %r52 = shl i8 %a52, %b52
737 %r53 = shl i8 %a53, %b53
738 %r54 = shl i8 %a54, %b54
739 %r55 = shl i8 %a55, %b55
740 %r56 = shl i8 %a56, %b56
741 %r57 = shl i8 %a57, %b57
742 %r58 = shl i8 %a58, %b58
743 %r59 = shl i8 %a59, %b59
744 %r60 = shl i8 %a60, %b60
745 %r61 = shl i8 %a61, %b61
746 %r62 = shl i8 %a62, %b62
747 %r63 = shl i8 %a63, %b63
748 store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
749 store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
750 store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
751 store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
752 store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
753 store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
754 store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
755 store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
756 store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
757 store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
758 store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
759 store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
760 store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
761 store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
762 store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
763 store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
764 store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
765 store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
766 store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
767 store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
768 store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
769 store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
770 store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
771 store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
772 store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
773 store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
774 store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
775 store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
776 store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
777 store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
778 store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
779 store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
780 store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
781 store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
782 store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
783 store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
784 store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
785 store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
786 store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
787 store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
788 store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
789 store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
790 store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
791 store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
792 store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
793 store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
794 store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
795 store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
796 store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
797 store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
798 store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
799 store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
800 store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
801 store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
802 store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
803 store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
804 store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
805 store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
806 store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
807 store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
808 store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
809 store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
810 store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
811 store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
812 ret void
813 }