llvm.org GIT mirror llvm / 180beb4
[AVX-512] Add shuffle comments for vbroadcast instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284305 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 3 years ago
6 changed file(s) with 141 addition(s) and 92 deletion(s). Raw diff Collapse all Expand all
978978
979979 case X86::VBROADCASTF128:
980980 case X86::VBROADCASTI128:
981 CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm)
982 CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm)
981983 DecodeSubVectorBroadcast(MVT::v4f64, MVT::v2f64, ShuffleMask);
984 DestName = getRegName(MI->getOperand(0).getReg());
985 break;
986 CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm)
987 CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm)
988 DecodeSubVectorBroadcast(MVT::v8f64, MVT::v2f64, ShuffleMask);
989 DestName = getRegName(MI->getOperand(0).getReg());
990 break;
991 CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm)
992 CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm)
993 DecodeSubVectorBroadcast(MVT::v8f64, MVT::v4f64, ShuffleMask);
994 DestName = getRegName(MI->getOperand(0).getReg());
995 break;
996 CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z256, rm)
997 CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z256, rm)
998 DecodeSubVectorBroadcast(MVT::v8f32, MVT::v4f32, ShuffleMask);
999 DestName = getRegName(MI->getOperand(0).getReg());
1000 break;
1001 CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm)
1002 CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm)
1003 DecodeSubVectorBroadcast(MVT::v16f32, MVT::v4f32, ShuffleMask);
1004 DestName = getRegName(MI->getOperand(0).getReg());
1005 break;
1006 CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm)
1007 CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm)
1008 DecodeSubVectorBroadcast(MVT::v16f32, MVT::v8f32, ShuffleMask);
1009 DestName = getRegName(MI->getOperand(0).getReg());
1010 break;
1011 CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r)
1012 CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r)
1013 Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
1014 CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m)
1015 CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m)
1016 DecodeSubVectorBroadcast(MVT::v8f32, MVT::v2f32, ShuffleMask);
1017 DestName = getRegName(MI->getOperand(0).getReg());
1018 break;
1019 CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r)
1020 CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r)
1021 Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
1022 CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m)
1023 CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m)
1024 DecodeSubVectorBroadcast(MVT::v16f32, MVT::v2f32, ShuffleMask);
9821025 DestName = getRegName(MI->getOperand(0).getReg());
9831026 break;
9841027
99 define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind {
1010 ; X64-AVX512VL-LABEL: test_broadcast_2f64_4f64:
1111 ; X64-AVX512VL: ## BB#0:
12 ; X64-AVX512VL-NEXT: vbroadcastf32x4 (%rdi), %ymm0
12 ; X64-AVX512VL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
1313 ; X64-AVX512VL-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
1414 ; X64-AVX512VL-NEXT: retq
1515 ;
1616 ; X64-AVX512BWVL-LABEL: test_broadcast_2f64_4f64:
1717 ; X64-AVX512BWVL: ## BB#0:
18 ; X64-AVX512BWVL-NEXT: vbroadcastf32x4 (%rdi), %ymm0
18 ; X64-AVX512BWVL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
1919 ; X64-AVX512BWVL-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
2020 ; X64-AVX512BWVL-NEXT: retq
2121 ;
2222 ; X64-AVX512DQVL-LABEL: test_broadcast_2f64_4f64:
2323 ; X64-AVX512DQVL: ## BB#0:
24 ; X64-AVX512DQVL-NEXT: vbroadcastf64x2 (%rdi), %ymm0
24 ; X64-AVX512DQVL-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
2525 ; X64-AVX512DQVL-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
2626 ; X64-AVX512DQVL-NEXT: retq
2727 %1 = load <2 x double>, <2 x double> *%p
3333 define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind {
3434 ; X64-AVX512VL-LABEL: test_broadcast_2i64_4i64:
3535 ; X64-AVX512VL: ## BB#0:
36 ; X64-AVX512VL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
36 ; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
3737 ; X64-AVX512VL-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
3838 ; X64-AVX512VL-NEXT: retq
3939 ;
4040 ; X64-AVX512BWVL-LABEL: test_broadcast_2i64_4i64:
4141 ; X64-AVX512BWVL: ## BB#0:
42 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
42 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
4343 ; X64-AVX512BWVL-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
4444 ; X64-AVX512BWVL-NEXT: retq
4545 ;
4646 ; X64-AVX512DQVL-LABEL: test_broadcast_2i64_4i64:
4747 ; X64-AVX512DQVL: ## BB#0:
48 ; X64-AVX512DQVL-NEXT: vbroadcasti64x2 (%rdi), %ymm0
48 ; X64-AVX512DQVL-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
4949 ; X64-AVX512DQVL-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
5050 ; X64-AVX512DQVL-NEXT: retq
5151 %1 = load <2 x i64>, <2 x i64> *%p
5757 define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind {
5858 ; X64-AVX512-LABEL: test_broadcast_4f32_8f32:
5959 ; X64-AVX512: ## BB#0:
60 ; X64-AVX512-NEXT: vbroadcastf32x4 (%rdi), %ymm0
60 ; X64-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
6161 ; X64-AVX512-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0
6262 ; X64-AVX512-NEXT: retq
6363 %1 = load <4 x float>, <4 x float> *%p
6969 define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind {
7070 ; X64-AVX512-LABEL: test_broadcast_4i32_8i32:
7171 ; X64-AVX512: ## BB#0:
72 ; X64-AVX512-NEXT: vbroadcasti32x4 (%rdi), %ymm0
72 ; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
7373 ; X64-AVX512-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
7474 ; X64-AVX512-NEXT: retq
7575 %1 = load <4 x i32>, <4 x i32> *%p
8181 define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind {
8282 ; X64-AVX512-LABEL: test_broadcast_8i16_16i16:
8383 ; X64-AVX512: ## BB#0:
84 ; X64-AVX512-NEXT: vbroadcasti32x4 (%rdi), %ymm0
84 ; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
8585 ; X64-AVX512-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
8686 ; X64-AVX512-NEXT: retq
8787 %1 = load <8 x i16>, <8 x i16> *%p
9393 define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind {
9494 ; X64-AVX512-LABEL: test_broadcast_16i8_32i8:
9595 ; X64-AVX512: ## BB#0:
96 ; X64-AVX512-NEXT: vbroadcasti32x4 (%rdi), %ymm0
96 ; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
9797 ; X64-AVX512-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
9898 ; X64-AVX512-NEXT: retq
9999 %1 = load <16 x i8>, <16 x i8> *%p
109109 define <8 x double> @test_broadcast_2f64_8f64(<2 x double> *%p) nounwind {
110110 ; X64-AVX512VL-LABEL: test_broadcast_2f64_8f64:
111111 ; X64-AVX512VL: ## BB#0:
112 ; X64-AVX512VL-NEXT: vbroadcastf32x4 (%rdi), %ymm0
112 ; X64-AVX512VL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
113113 ; X64-AVX512VL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
114114 ; X64-AVX512VL-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
115115 ; X64-AVX512VL-NEXT: retq
116116 ;
117117 ; X64-AVX512BWVL-LABEL: test_broadcast_2f64_8f64:
118118 ; X64-AVX512BWVL: ## BB#0:
119 ; X64-AVX512BWVL-NEXT: vbroadcastf32x4 (%rdi), %ymm0
119 ; X64-AVX512BWVL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
120120 ; X64-AVX512BWVL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
121121 ; X64-AVX512BWVL-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
122122 ; X64-AVX512BWVL-NEXT: retq
123123 ;
124124 ; X64-AVX512DQVL-LABEL: test_broadcast_2f64_8f64:
125125 ; X64-AVX512DQVL: ## BB#0:
126 ; X64-AVX512DQVL-NEXT: vbroadcastf64x2 (%rdi), %ymm0
126 ; X64-AVX512DQVL-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
127127 ; X64-AVX512DQVL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
128128 ; X64-AVX512DQVL-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
129129 ; X64-AVX512DQVL-NEXT: retq
136136 define <8 x i64> @test_broadcast_2i64_8i64(<2 x i64> *%p) nounwind {
137137 ; X64-AVX512VL-LABEL: test_broadcast_2i64_8i64:
138138 ; X64-AVX512VL: ## BB#0:
139 ; X64-AVX512VL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
139 ; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
140140 ; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
141141 ; X64-AVX512VL-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
142142 ; X64-AVX512VL-NEXT: retq
143143 ;
144144 ; X64-AVX512BWVL-LABEL: test_broadcast_2i64_8i64:
145145 ; X64-AVX512BWVL: ## BB#0:
146 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
146 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
147147 ; X64-AVX512BWVL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
148148 ; X64-AVX512BWVL-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
149149 ; X64-AVX512BWVL-NEXT: retq
150150 ;
151151 ; X64-AVX512DQVL-LABEL: test_broadcast_2i64_8i64:
152152 ; X64-AVX512DQVL: ## BB#0:
153 ; X64-AVX512DQVL-NEXT: vbroadcasti64x2 (%rdi), %ymm0
153 ; X64-AVX512DQVL-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
154154 ; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
155155 ; X64-AVX512DQVL-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
156156 ; X64-AVX512DQVL-NEXT: retq
163163 define <16 x float> @test_broadcast_4f32_16f32(<4 x float> *%p) nounwind {
164164 ; X64-AVX512VL-LABEL: test_broadcast_4f32_16f32:
165165 ; X64-AVX512VL: ## BB#0:
166 ; X64-AVX512VL-NEXT: vbroadcastf32x4 (%rdi), %ymm0
166 ; X64-AVX512VL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
167167 ; X64-AVX512VL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
168168 ; X64-AVX512VL-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
169169 ; X64-AVX512VL-NEXT: retq
170170 ;
171171 ; X64-AVX512BWVL-LABEL: test_broadcast_4f32_16f32:
172172 ; X64-AVX512BWVL: ## BB#0:
173 ; X64-AVX512BWVL-NEXT: vbroadcastf32x4 (%rdi), %ymm0
173 ; X64-AVX512BWVL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
174174 ; X64-AVX512BWVL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
175175 ; X64-AVX512BWVL-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
176176 ; X64-AVX512BWVL-NEXT: retq
177177 ;
178178 ; X64-AVX512DQVL-LABEL: test_broadcast_4f32_16f32:
179179 ; X64-AVX512DQVL: ## BB#0:
180 ; X64-AVX512DQVL-NEXT: vbroadcastf32x4 (%rdi), %ymm0
180 ; X64-AVX512DQVL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
181181 ; X64-AVX512DQVL-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
182182 ; X64-AVX512DQVL-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
183183 ; X64-AVX512DQVL-NEXT: retq
190190 define <16 x i32> @test_broadcast_4i32_16i32(<4 x i32> *%p) nounwind {
191191 ; X64-AVX512VL-LABEL: test_broadcast_4i32_16i32:
192192 ; X64-AVX512VL: ## BB#0:
193 ; X64-AVX512VL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
193 ; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
194194 ; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
195195 ; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
196196 ; X64-AVX512VL-NEXT: retq
197197 ;
198198 ; X64-AVX512BWVL-LABEL: test_broadcast_4i32_16i32:
199199 ; X64-AVX512BWVL: ## BB#0:
200 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
200 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
201201 ; X64-AVX512BWVL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
202202 ; X64-AVX512BWVL-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
203203 ; X64-AVX512BWVL-NEXT: retq
204204 ;
205205 ; X64-AVX512DQVL-LABEL: test_broadcast_4i32_16i32:
206206 ; X64-AVX512DQVL: ## BB#0:
207 ; X64-AVX512DQVL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
207 ; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
208208 ; X64-AVX512DQVL-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0
209209 ; X64-AVX512DQVL-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
210210 ; X64-AVX512DQVL-NEXT: retq
217217 define <32 x i16> @test_broadcast_8i16_32i16(<8 x i16> *%p) nounwind {
218218 ; X64-AVX512VL-LABEL: test_broadcast_8i16_32i16:
219219 ; X64-AVX512VL: ## BB#0:
220 ; X64-AVX512VL-NEXT: vbroadcasti32x4 (%rdi), %ymm1
220 ; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm1 = mem[0,1,2,3,0,1,2,3]
221221 ; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0
222222 ; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1
223223 ; X64-AVX512VL-NEXT: retq
224224 ;
225225 ; X64-AVX512BWVL-LABEL: test_broadcast_8i16_32i16:
226226 ; X64-AVX512BWVL: ## BB#0:
227 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
227 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
228228 ; X64-AVX512BWVL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
229229 ; X64-AVX512BWVL-NEXT: vpaddw {{.*}}(%rip), %zmm0, %zmm0
230230 ; X64-AVX512BWVL-NEXT: retq
231231 ;
232232 ; X64-AVX512DQVL-LABEL: test_broadcast_8i16_32i16:
233233 ; X64-AVX512DQVL: ## BB#0:
234 ; X64-AVX512DQVL-NEXT: vbroadcasti32x4 (%rdi), %ymm1
234 ; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm1 = mem[0,1,2,3,0,1,2,3]
235235 ; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0
236236 ; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1
237237 ; X64-AVX512DQVL-NEXT: retq
244244 define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind {
245245 ; X64-AVX512VL-LABEL: test_broadcast_16i8_64i8:
246246 ; X64-AVX512VL: ## BB#0:
247 ; X64-AVX512VL-NEXT: vbroadcasti32x4 (%rdi), %ymm1
247 ; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm1 = mem[0,1,2,3,0,1,2,3]
248248 ; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0
249249 ; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1
250250 ; X64-AVX512VL-NEXT: retq
251251 ;
252252 ; X64-AVX512BWVL-LABEL: test_broadcast_16i8_64i8:
253253 ; X64-AVX512BWVL: ## BB#0:
254 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
254 ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
255255 ; X64-AVX512BWVL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
256256 ; X64-AVX512BWVL-NEXT: vpaddb {{.*}}(%rip), %zmm0, %zmm0
257257 ; X64-AVX512BWVL-NEXT: retq
258258 ;
259259 ; X64-AVX512DQVL-LABEL: test_broadcast_16i8_64i8:
260260 ; X64-AVX512DQVL: ## BB#0:
261 ; X64-AVX512DQVL-NEXT: vbroadcasti32x4 (%rdi), %ymm1
261 ; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm1 = mem[0,1,2,3,0,1,2,3]
262262 ; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0
263263 ; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1
264264 ; X64-AVX512DQVL-NEXT: retq
531531 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
532532 ; CHECK: ## BB#0:
533533 ; CHECK-NEXT: kmovw %edi, %k1
534 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm1 {%k1}
535 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z}
536 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm0
534 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
535 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm2 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
536 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
537537 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
538538 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
539539 ; CHECK-NEXT: retq
551551 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
552552 ; CHECK: ## BB#0:
553553 ; CHECK-NEXT: kmovw %edi, %k1
554 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm1 {%k1}
555 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z}
556 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm0
554 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
555 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm2 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
556 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
557557 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
558558 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
559559 ; CHECK-NEXT: retq
651651 ; CHECK: ## BB#0:
652652 ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
653653 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x19,0xc8]
654 ; CHECK-NEXT: ## ymm1 = xmm0[0,1,0,1,0,1,0,1]
654655 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x19,0xd0]
656 ; CHECK-NEXT: ## ymm2 = xmm0[0,1,0,1,0,1,0,1]
655657 ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x19,0xc0]
658 ; CHECK-NEXT: ## ymm0 = xmm0[0,1,0,1,0,1,0,1]
656659 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
657660 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
658661 ; CHECK-NEXT: retq ## encoding: [0xc3]
671674 ; CHECK: ## BB#0:
672675 ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
673676 ; CHECK-NEXT: vbroadcasti32x2 (%rsi), %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e]
677 ; CHECK-NEXT: ## ymm1 = mem[0,1,0,1,0,1,0,1]
674678 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x59,0xd0]
679 ; CHECK-NEXT: ## ymm2 = xmm0[0,1,0,1,0,1,0,1]
675680 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x59,0xc0]
681 ; CHECK-NEXT: ## ymm0 = xmm0[0,1,0,1,0,1,0,1]
676682 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
677683 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
678684 ; CHECK-NEXT: retq ## encoding: [0xc3]
2323 ; X32-AVX512F-LABEL: test_broadcast_2f64_4f64:
2424 ; X32-AVX512F: ## BB#0:
2525 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
26 ; X32-AVX512F-NEXT: vbroadcastf32x4 (%eax), %ymm0
26 ; X32-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
2727 ; X32-AVX512F-NEXT: retl
2828 ;
2929 ; X32-AVX512BW-LABEL: test_broadcast_2f64_4f64:
3030 ; X32-AVX512BW: ## BB#0:
3131 ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
32 ; X32-AVX512BW-NEXT: vbroadcastf32x4 (%eax), %ymm0
32 ; X32-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
3333 ; X32-AVX512BW-NEXT: retl
3434 ;
3535 ; X32-AVX512DQ-LABEL: test_broadcast_2f64_4f64:
3636 ; X32-AVX512DQ: ## BB#0:
3737 ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
38 ; X32-AVX512DQ-NEXT: vbroadcastf64x2 (%eax), %ymm0
38 ; X32-AVX512DQ-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
3939 ; X32-AVX512DQ-NEXT: retl
4040 ;
4141 ; X64-AVX-LABEL: test_broadcast_2f64_4f64:
4545 ;
4646 ; X64-AVX512F-LABEL: test_broadcast_2f64_4f64:
4747 ; X64-AVX512F: ## BB#0:
48 ; X64-AVX512F-NEXT: vbroadcastf32x4 (%rdi), %ymm0
48 ; X64-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
4949 ; X64-AVX512F-NEXT: retq
5050 ;
5151 ; X64-AVX512BW-LABEL: test_broadcast_2f64_4f64:
5252 ; X64-AVX512BW: ## BB#0:
53 ; X64-AVX512BW-NEXT: vbroadcastf32x4 (%rdi), %ymm0
53 ; X64-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
5454 ; X64-AVX512BW-NEXT: retq
5555 ;
5656 ; X64-AVX512DQ-LABEL: test_broadcast_2f64_4f64:
5757 ; X64-AVX512DQ: ## BB#0:
58 ; X64-AVX512DQ-NEXT: vbroadcastf64x2 (%rdi), %ymm0
58 ; X64-AVX512DQ-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
5959 ; X64-AVX512DQ-NEXT: retq
6060 %1 = load <2 x double>, <2 x double> *%p
6161 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32>
7373 ; X32-AVX512F-LABEL: test_broadcast_2f64_8f64:
7474 ; X32-AVX512F: ## BB#0:
7575 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
76 ; X32-AVX512F-NEXT: vbroadcastf32x4 (%eax), %ymm0
76 ; X32-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
7777 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
7878 ; X32-AVX512F-NEXT: retl
7979 ;
8080 ; X32-AVX512BW-LABEL: test_broadcast_2f64_8f64:
8181 ; X32-AVX512BW: ## BB#0:
8282 ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
83 ; X32-AVX512BW-NEXT: vbroadcastf32x4 (%eax), %ymm0
83 ; X32-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
8484 ; X32-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
8585 ; X32-AVX512BW-NEXT: retl
8686 ;
8787 ; X32-AVX512DQ-LABEL: test_broadcast_2f64_8f64:
8888 ; X32-AVX512DQ: ## BB#0:
8989 ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
90 ; X32-AVX512DQ-NEXT: vbroadcastf64x2 (%eax), %ymm0
90 ; X32-AVX512DQ-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
9191 ; X32-AVX512DQ-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
9292 ; X32-AVX512DQ-NEXT: retl
9393 ;
9999 ;
100100 ; X64-AVX512F-LABEL: test_broadcast_2f64_8f64:
101101 ; X64-AVX512F: ## BB#0:
102 ; X64-AVX512F-NEXT: vbroadcastf32x4 (%rdi), %ymm0
102 ; X64-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
103103 ; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
104104 ; X64-AVX512F-NEXT: retq
105105 ;
106106 ; X64-AVX512BW-LABEL: test_broadcast_2f64_8f64:
107107 ; X64-AVX512BW: ## BB#0:
108 ; X64-AVX512BW-NEXT: vbroadcastf32x4 (%rdi), %ymm0
108 ; X64-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
109109 ; X64-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
110110 ; X64-AVX512BW-NEXT: retq
111111 ;
112112 ; X64-AVX512DQ-LABEL: test_broadcast_2f64_8f64:
113113 ; X64-AVX512DQ: ## BB#0:
114 ; X64-AVX512DQ-NEXT: vbroadcastf64x2 (%rdi), %ymm0
114 ; X64-AVX512DQ-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
115115 ; X64-AVX512DQ-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
116116 ; X64-AVX512DQ-NEXT: retq
117117 %1 = load <2 x double>, <2 x double> *%p
160160 ; X32-AVX512F-LABEL: test_broadcast_2i64_4i64:
161161 ; X32-AVX512F: ## BB#0:
162162 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
163 ; X32-AVX512F-NEXT: vbroadcasti32x4 (%eax), %ymm0
163 ; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
164164 ; X32-AVX512F-NEXT: retl
165165 ;
166166 ; X32-AVX512BW-LABEL: test_broadcast_2i64_4i64:
167167 ; X32-AVX512BW: ## BB#0:
168168 ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
169 ; X32-AVX512BW-NEXT: vbroadcasti32x4 (%eax), %ymm0
169 ; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
170170 ; X32-AVX512BW-NEXT: retl
171171 ;
172172 ; X32-AVX512DQ-LABEL: test_broadcast_2i64_4i64:
173173 ; X32-AVX512DQ: ## BB#0:
174174 ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
175 ; X32-AVX512DQ-NEXT: vbroadcasti64x2 (%eax), %ymm0
175 ; X32-AVX512DQ-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
176176 ; X32-AVX512DQ-NEXT: retl
177177 ;
178178 ; X64-AVX-LABEL: test_broadcast_2i64_4i64:
182182 ;
183183 ; X64-AVX512F-LABEL: test_broadcast_2i64_4i64:
184184 ; X64-AVX512F: ## BB#0:
185 ; X64-AVX512F-NEXT: vbroadcasti32x4 (%rdi), %ymm0
185 ; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
186186 ; X64-AVX512F-NEXT: retq
187187 ;
188188 ; X64-AVX512BW-LABEL: test_broadcast_2i64_4i64:
189189 ; X64-AVX512BW: ## BB#0:
190 ; X64-AVX512BW-NEXT: vbroadcasti32x4 (%rdi), %ymm0
190 ; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
191191 ; X64-AVX512BW-NEXT: retq
192192 ;
193193 ; X64-AVX512DQ-LABEL: test_broadcast_2i64_4i64:
194194 ; X64-AVX512DQ: ## BB#0:
195 ; X64-AVX512DQ-NEXT: vbroadcasti64x2 (%rdi), %ymm0
195 ; X64-AVX512DQ-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
196196 ; X64-AVX512DQ-NEXT: retq
197197 %1 = load <2 x i64>, <2 x i64> *%p
198198 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32>
217217 ; X32-AVX512F-LABEL: test_broadcast_2i64_8i64:
218218 ; X32-AVX512F: ## BB#0:
219219 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
220 ; X32-AVX512F-NEXT: vbroadcasti32x4 (%eax), %ymm0
220 ; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
221221 ; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
222222 ; X32-AVX512F-NEXT: retl
223223 ;
224224 ; X32-AVX512BW-LABEL: test_broadcast_2i64_8i64:
225225 ; X32-AVX512BW: ## BB#0:
226226 ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
227 ; X32-AVX512BW-NEXT: vbroadcasti32x4 (%eax), %ymm0
227 ; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
228228 ; X32-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
229229 ; X32-AVX512BW-NEXT: retl
230230 ;
231231 ; X32-AVX512DQ-LABEL: test_broadcast_2i64_8i64:
232232 ; X32-AVX512DQ: ## BB#0:
233233 ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
234 ; X32-AVX512DQ-NEXT: vbroadcasti64x2 (%eax), %ymm0
234 ; X32-AVX512DQ-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
235235 ; X32-AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
236236 ; X32-AVX512DQ-NEXT: retl
237237 ;
249249 ;
250250 ; X64-AVX512F-LABEL: test_broadcast_2i64_8i64:
251251 ; X64-AVX512F: ## BB#0:
252 ; X64-AVX512F-NEXT: vbroadcasti32x4 (%rdi), %ymm0
252 ; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
253253 ; X64-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
254254 ; X64-AVX512F-NEXT: retq
255255 ;
256256 ; X64-AVX512BW-LABEL: test_broadcast_2i64_8i64:
257257 ; X64-AVX512BW: ## BB#0:
258 ; X64-AVX512BW-NEXT: vbroadcasti32x4 (%rdi), %ymm0
258 ; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
259259 ; X64-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
260260 ; X64-AVX512BW-NEXT: retq
261261 ;
262262 ; X64-AVX512DQ-LABEL: test_broadcast_2i64_8i64:
263263 ; X64-AVX512DQ: ## BB#0:
264 ; X64-AVX512DQ-NEXT: vbroadcasti64x2 (%rdi), %ymm0
264 ; X64-AVX512DQ-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 = mem[0,1,0,1]
265265 ; X64-AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
266266 ; X64-AVX512DQ-NEXT: retq
267267 %1 = load <2 x i64>, <2 x i64> *%p
310310 ; X32-AVX512-LABEL: test_broadcast_4f32_8f32:
311311 ; X32-AVX512: ## BB#0:
312312 ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
313 ; X32-AVX512-NEXT: vbroadcastf32x4 (%eax), %ymm0
313 ; X32-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
314314 ; X32-AVX512-NEXT: retl
315315 ;
316316 ; X64-AVX-LABEL: test_broadcast_4f32_8f32:
320320 ;
321321 ; X64-AVX512-LABEL: test_broadcast_4f32_8f32:
322322 ; X64-AVX512: ## BB#0:
323 ; X64-AVX512-NEXT: vbroadcastf32x4 (%rdi), %ymm0
323 ; X64-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
324324 ; X64-AVX512-NEXT: retq
325325 %1 = load <4 x float>, <4 x float> *%p
326326 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32>
338338 ; X32-AVX512F-LABEL: test_broadcast_4f32_16f32:
339339 ; X32-AVX512F: ## BB#0:
340340 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
341 ; X32-AVX512F-NEXT: vbroadcastf32x4 (%eax), %ymm0
341 ; X32-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
342342 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
343343 ; X32-AVX512F-NEXT: retl
344344 ;
345345 ; X32-AVX512BW-LABEL: test_broadcast_4f32_16f32:
346346 ; X32-AVX512BW: ## BB#0:
347347 ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
348 ; X32-AVX512BW-NEXT: vbroadcastf32x4 (%eax), %ymm0
348 ; X32-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
349349 ; X32-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
350350 ; X32-AVX512BW-NEXT: retl
351351 ;
352352 ; X32-AVX512DQ-LABEL: test_broadcast_4f32_16f32:
353353 ; X32-AVX512DQ: ## BB#0:
354354 ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
355 ; X32-AVX512DQ-NEXT: vbroadcastf32x4 (%eax), %ymm0
355 ; X32-AVX512DQ-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
356356 ; X32-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
357357 ; X32-AVX512DQ-NEXT: retl
358358 ;
364364 ;
365365 ; X64-AVX512F-LABEL: test_broadcast_4f32_16f32:
366366 ; X64-AVX512F: ## BB#0:
367 ; X64-AVX512F-NEXT: vbroadcastf32x4 (%rdi), %ymm0
367 ; X64-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
368368 ; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
369369 ; X64-AVX512F-NEXT: retq
370370 ;
371371 ; X64-AVX512BW-LABEL: test_broadcast_4f32_16f32:
372372 ; X64-AVX512BW: ## BB#0:
373 ; X64-AVX512BW-NEXT: vbroadcastf32x4 (%rdi), %ymm0
373 ; X64-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
374374 ; X64-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
375375 ; X64-AVX512BW-NEXT: retq
376376 ;
377377 ; X64-AVX512DQ-LABEL: test_broadcast_4f32_16f32:
378378 ; X64-AVX512DQ: ## BB#0:
379 ; X64-AVX512DQ-NEXT: vbroadcastf32x4 (%rdi), %ymm0
379 ; X64-AVX512DQ-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
380380 ; X64-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
381381 ; X64-AVX512DQ-NEXT: retq
382382 %1 = load <4 x float>, <4 x float> *%p
451451 ; X32-AVX512-LABEL: test_broadcast_4i32_8i32:
452452 ; X32-AVX512: ## BB#0:
453453 ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
454 ; X32-AVX512-NEXT: vbroadcasti32x4 (%eax), %ymm0
454 ; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
455455 ; X32-AVX512-NEXT: retl
456456 ;
457457 ; X64-AVX-LABEL: test_broadcast_4i32_8i32:
461461 ;
462462 ; X64-AVX512-LABEL: test_broadcast_4i32_8i32:
463463 ; X64-AVX512: ## BB#0:
464 ; X64-AVX512-NEXT: vbroadcasti32x4 (%rdi), %ymm0
464 ; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
465465 ; X64-AVX512-NEXT: retq
466466 %1 = load <4 x i32>, <4 x i32> *%p
467467 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32>
486486 ; X32-AVX512F-LABEL: test_broadcast_4i32_16i32:
487487 ; X32-AVX512F: ## BB#0:
488488 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
489 ; X32-AVX512F-NEXT: vbroadcasti32x4 (%eax), %ymm0
489 ; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
490490 ; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
491491 ; X32-AVX512F-NEXT: retl
492492 ;
493493 ; X32-AVX512BW-LABEL: test_broadcast_4i32_16i32:
494494 ; X32-AVX512BW: ## BB#0:
495495 ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
496 ; X32-AVX512BW-NEXT: vbroadcasti32x4 (%eax), %ymm0
496 ; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
497497 ; X32-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
498498 ; X32-AVX512BW-NEXT: retl
499499 ;
500500 ; X32-AVX512DQ-LABEL: test_broadcast_4i32_16i32:
501501 ; X32-AVX512DQ: ## BB#0:
502502 ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
503 ; X32-AVX512DQ-NEXT: vbroadcasti32x4 (%eax), %ymm0
503 ; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
504504 ; X32-AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0
505505 ; X32-AVX512DQ-NEXT: retl
506506 ;
518518 ;
519519 ; X64-AVX512F-LABEL: test_broadcast_4i32_16i32:
520520 ; X64-AVX512F: ## BB#0:
521 ; X64-AVX512F-NEXT: vbroadcasti32x4 (%rdi), %ymm0
521 ; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
522522 ; X64-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
523523 ; X64-AVX512F-NEXT: retq
524524 ;
525525 ; X64-AVX512BW-LABEL: test_broadcast_4i32_16i32:
526526 ; X64-AVX512BW: ## BB#0:
527 ; X64-AVX512BW-NEXT: vbroadcasti32x4 (%rdi), %ymm0
527 ; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
528528 ; X64-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
529529 ; X64-AVX512BW-NEXT: retq
530530 ;
531531 ; X64-AVX512DQ-LABEL: test_broadcast_4i32_16i32:
532532 ; X64-AVX512DQ: ## BB#0:
533 ; X64-AVX512DQ-NEXT: vbroadcasti32x4 (%rdi), %ymm0
533 ; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
534534 ; X64-AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0
535535 ; X64-AVX512DQ-NEXT: retq
536536 %1 = load <4 x i32>, <4 x i32> *%p
605605 ; X32-AVX512-LABEL: test_broadcast_8i16_16i16:
606606 ; X32-AVX512: ## BB#0:
607607 ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
608 ; X32-AVX512-NEXT: vbroadcasti32x4 (%eax), %ymm0
608 ; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
609609 ; X32-AVX512-NEXT: retl
610610 ;
611611 ; X64-AVX-LABEL: test_broadcast_8i16_16i16:
615615 ;
616616 ; X64-AVX512-LABEL: test_broadcast_8i16_16i16:
617617 ; X64-AVX512: ## BB#0:
618 ; X64-AVX512-NEXT: vbroadcasti32x4 (%rdi), %ymm0
618 ; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
619619 ; X64-AVX512-NEXT: retq
620620 %1 = load <8 x i16>, <8 x i16> *%p
621621 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32>
640640 ; X32-AVX512F-LABEL: test_broadcast_8i16_32i16:
641641 ; X32-AVX512F: ## BB#0:
642642 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
643 ; X32-AVX512F-NEXT: vbroadcasti32x4 (%eax), %ymm0
643 ; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
644644 ; X32-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1
645645 ; X32-AVX512F-NEXT: retl
646646 ;
647647 ; X32-AVX512BW-LABEL: test_broadcast_8i16_32i16:
648648 ; X32-AVX512BW: ## BB#0:
649649 ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
650 ; X32-AVX512BW-NEXT: vbroadcasti32x4 (%eax), %ymm0
650 ; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
651651 ; X32-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
652652 ; X32-AVX512BW-NEXT: retl
653653 ;
654654 ; X32-AVX512DQ-LABEL: test_broadcast_8i16_32i16:
655655 ; X32-AVX512DQ: ## BB#0:
656656 ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
657 ; X32-AVX512DQ-NEXT: vbroadcasti32x4 (%eax), %ymm0
657 ; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
658658 ; X32-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1
659659 ; X32-AVX512DQ-NEXT: retl
660660 ;
672672 ;
673673 ; X64-AVX512F-LABEL: test_broadcast_8i16_32i16:
674674 ; X64-AVX512F: ## BB#0:
675 ; X64-AVX512F-NEXT: vbroadcasti32x4 (%rdi), %ymm0
675 ; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
676676 ; X64-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1
677677 ; X64-AVX512F-NEXT: retq
678678 ;
679679 ; X64-AVX512BW-LABEL: test_broadcast_8i16_32i16:
680680 ; X64-AVX512BW: ## BB#0:
681 ; X64-AVX512BW-NEXT: vbroadcasti32x4 (%rdi), %ymm0
681 ; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
682682 ; X64-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
683683 ; X64-AVX512BW-NEXT: retq
684684 ;
685685 ; X64-AVX512DQ-LABEL: test_broadcast_8i16_32i16:
686686 ; X64-AVX512DQ: ## BB#0:
687 ; X64-AVX512DQ-NEXT: vbroadcasti32x4 (%rdi), %ymm0
687 ; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
688688 ; X64-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1
689689 ; X64-AVX512DQ-NEXT: retq
690690 %1 = load <8 x i16>, <8 x i16> *%p
759759 ; X32-AVX512-LABEL: test_broadcast_16i8_32i8:
760760 ; X32-AVX512: ## BB#0:
761761 ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
762 ; X32-AVX512-NEXT: vbroadcasti32x4 (%eax), %ymm0
762 ; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
763763 ; X32-AVX512-NEXT: retl
764764 ;
765765 ; X64-AVX-LABEL: test_broadcast_16i8_32i8:
769769 ;
770770 ; X64-AVX512-LABEL: test_broadcast_16i8_32i8:
771771 ; X64-AVX512: ## BB#0:
772 ; X64-AVX512-NEXT: vbroadcasti32x4 (%rdi), %ymm0
772 ; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
773773 ; X64-AVX512-NEXT: retq
774774 %1 = load <16 x i8>, <16 x i8> *%p
775775 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32>
794794 ; X32-AVX512F-LABEL: test_broadcast_16i8_64i8:
795795 ; X32-AVX512F: ## BB#0:
796796 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
797 ; X32-AVX512F-NEXT: vbroadcasti32x4 (%eax), %ymm0
797 ; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
798798 ; X32-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1
799799 ; X32-AVX512F-NEXT: retl
800800 ;
801801 ; X32-AVX512BW-LABEL: test_broadcast_16i8_64i8:
802802 ; X32-AVX512BW: ## BB#0:
803803 ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
804 ; X32-AVX512BW-NEXT: vbroadcasti32x4 (%eax), %ymm0
804 ; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
805805 ; X32-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
806806 ; X32-AVX512BW-NEXT: retl
807807 ;
808808 ; X32-AVX512DQ-LABEL: test_broadcast_16i8_64i8:
809809 ; X32-AVX512DQ: ## BB#0:
810810 ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
811 ; X32-AVX512DQ-NEXT: vbroadcasti32x4 (%eax), %ymm0
811 ; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
812812 ; X32-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1
813813 ; X32-AVX512DQ-NEXT: retl
814814 ;
826826 ;
827827 ; X64-AVX512F-LABEL: test_broadcast_16i8_64i8:
828828 ; X64-AVX512F: ## BB#0:
829 ; X64-AVX512F-NEXT: vbroadcasti32x4 (%rdi), %ymm0
829 ; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
830830 ; X64-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1
831831 ; X64-AVX512F-NEXT: retq
832832 ;
833833 ; X64-AVX512BW-LABEL: test_broadcast_16i8_64i8:
834834 ; X64-AVX512BW: ## BB#0:
835 ; X64-AVX512BW-NEXT: vbroadcasti32x4 (%rdi), %ymm0
835 ; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
836836 ; X64-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
837837 ; X64-AVX512BW-NEXT: retq
838838 ;
839839 ; X64-AVX512DQ-LABEL: test_broadcast_16i8_64i8:
840840 ; X64-AVX512DQ: ## BB#0:
841 ; X64-AVX512DQ-NEXT: vbroadcasti32x4 (%rdi), %ymm0
841 ; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
842842 ; X64-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1
843843 ; X64-AVX512DQ-NEXT: retq
844844 %1 = load <16 x i8>, <16 x i8> *%p
13171317 ;
13181318 ; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64:
13191319 ; AVX512VL: # BB#0:
1320 ; AVX512VL-NEXT: vbroadcasti32x4 (%rdi), %ymm0
1320 ; AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
13211321 ; AVX512VL-NEXT: retq
13221322 %v = load <2 x i64>, <2 x i64>* %ptr
13231323 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32>
13371337 ;
13381338 ; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64:
13391339 ; AVX512VL: # BB#0:
1340 ; AVX512VL-NEXT: vbroadcastf32x4 (%rdi), %ymm0
1340 ; AVX512VL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
13411341 ; AVX512VL-NEXT: retq
13421342 %v = load <2 x double>, <2 x double>* %ptr
13431343 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32>