llvm.org GIT mirror llvm / 9d122e2
[X86] Add comments to the end of FMA3 instructions to make the operation clear Summary: There are 3 different operand orders for FMA instructions so figuring out the exact operation being performed requires a lot of thought. This patch adds a comment to the end of the assembly line to print the exact operation. I think I've got all the instructions in here except the ones with builtin rounding. I didn't update all tests, but I assume we can get them as we regenerate tests in the future. Reviewers: spatel, v_klochkov, RKSimon Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D44345 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327225 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
9 changed file(s) with 3732 addition(s) and 1585 deletion(s). Raw diff Collapse all Expand all
158158 CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
159159 CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
160160 CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
161
162 #define CASE_AVX512_FMA(Inst, suf) \
163 CASE_AVX512_INS_COMMON(Inst, Z, suf) \
164 CASE_AVX512_INS_COMMON(Inst, Z256, suf) \
165 CASE_AVX512_INS_COMMON(Inst, Z128, suf)
166
167 #define CASE_FMA(Inst, suf) \
168 CASE_AVX512_FMA(Inst, suf) \
169 CASE_AVX_INS_COMMON(Inst, , suf) \
170 CASE_AVX_INS_COMMON(Inst, Y, suf)
171
172 #define CASE_FMA_PACKED_REG(Inst) \
173 CASE_FMA(Inst##PD, r) \
174 CASE_FMA(Inst##PS, r)
175
176 #define CASE_FMA_PACKED_MEM(Inst) \
177 CASE_FMA(Inst##PD, m) \
178 CASE_FMA(Inst##PS, m) \
179 CASE_AVX512_FMA(Inst##PD, mb) \
180 CASE_AVX512_FMA(Inst##PS, mb)
181
182 #define CASE_FMA_SCALAR_REG(Inst) \
183 CASE_AVX_INS_COMMON(Inst##SD, , r) \
184 CASE_AVX_INS_COMMON(Inst##SS, , r) \
185 CASE_AVX_INS_COMMON(Inst##SD, , r_Int) \
186 CASE_AVX_INS_COMMON(Inst##SS, , r_Int) \
187 CASE_AVX_INS_COMMON(Inst##SD, Z, r) \
188 CASE_AVX_INS_COMMON(Inst##SS, Z, r) \
189 CASE_AVX512_INS_COMMON(Inst##SD, Z, r_Int) \
190 CASE_AVX512_INS_COMMON(Inst##SS, Z, r_Int)
191
192 #define CASE_FMA_SCALAR_MEM(Inst) \
193 CASE_AVX_INS_COMMON(Inst##SD, , m) \
194 CASE_AVX_INS_COMMON(Inst##SS, , m) \
195 CASE_AVX_INS_COMMON(Inst##SD, , m_Int) \
196 CASE_AVX_INS_COMMON(Inst##SS, , m_Int) \
197 CASE_AVX_INS_COMMON(Inst##SD, Z, m) \
198 CASE_AVX_INS_COMMON(Inst##SS, Z, m) \
199 CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \
200 CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
161201
162202 static unsigned getVectorRegSize(unsigned RegNo) {
163203 if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
231271 OS << " {z}";
232272 }
233273
274 static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS,
275 const char *(*getRegName)(unsigned)) {
276 const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
277 unsigned NumOperands = MI->getNumOperands();
278 bool RegForm = false;
279 bool Negate = false;
280 StringRef AccStr = "+";
281
282 // The operands for FMA instructions without rounding fall into two forms.
283 // dest, src1, src2, src3
284 // dest, src1, mask, src2, src3
285 // Where src3 is either a register or 5 memory address operands. So to find
286 // dest and src1 we can index from the front. To find src2 and src3 we can
287 // index from the end by taking into account memory vs register form when
288 // finding src2.
289
290 switch (MI->getOpcode()) {
291 default:
292 return false;
293 CASE_FMA_PACKED_REG(FMADD132)
294 CASE_FMA_SCALAR_REG(FMADD132)
295 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
296 RegForm = true;
297 LLVM_FALLTHROUGH;
298 CASE_FMA_PACKED_MEM(FMADD132)
299 CASE_FMA_SCALAR_MEM(FMADD132)
300 AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
301 Mul1Name = getRegName(MI->getOperand(1).getReg());
302 break;
303
304 CASE_FMA_PACKED_REG(FMADD213)
305 CASE_FMA_SCALAR_REG(FMADD213)
306 AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
307 RegForm = true;
308 LLVM_FALLTHROUGH;
309 CASE_FMA_PACKED_MEM(FMADD213)
310 CASE_FMA_SCALAR_MEM(FMADD213)
311 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
312 Mul2Name = getRegName(MI->getOperand(1).getReg());
313 break;
314
315 CASE_FMA_PACKED_REG(FMADD231)
316 CASE_FMA_SCALAR_REG(FMADD231)
317 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
318 RegForm = true;
319 LLVM_FALLTHROUGH;
320 CASE_FMA_PACKED_MEM(FMADD231)
321 CASE_FMA_SCALAR_MEM(FMADD231)
322 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
323 AccName = getRegName(MI->getOperand(1).getReg());
324 break;
325
326 CASE_FMA_PACKED_REG(FMSUB132)
327 CASE_FMA_SCALAR_REG(FMSUB132)
328 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
329 RegForm = true;
330 LLVM_FALLTHROUGH;
331 CASE_FMA_PACKED_MEM(FMSUB132)
332 CASE_FMA_SCALAR_MEM(FMSUB132)
333 AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
334 Mul1Name = getRegName(MI->getOperand(1).getReg());
335 AccStr = "-";
336 break;
337
338 CASE_FMA_PACKED_REG(FMSUB213)
339 CASE_FMA_SCALAR_REG(FMSUB213)
340 AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
341 RegForm = true;
342 LLVM_FALLTHROUGH;
343 CASE_FMA_PACKED_MEM(FMSUB213)
344 CASE_FMA_SCALAR_MEM(FMSUB213)
345 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
346 Mul2Name = getRegName(MI->getOperand(1).getReg());
347 AccStr = "-";
348 break;
349
350 CASE_FMA_PACKED_REG(FMSUB231)
351 CASE_FMA_SCALAR_REG(FMSUB231)
352 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
353 RegForm = true;
354 LLVM_FALLTHROUGH;
355 CASE_FMA_PACKED_MEM(FMSUB231)
356 CASE_FMA_SCALAR_MEM(FMSUB231)
357 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
358 AccName = getRegName(MI->getOperand(1).getReg());
359 AccStr = "-";
360 break;
361
362 CASE_FMA_PACKED_REG(FNMADD132)
363 CASE_FMA_SCALAR_REG(FNMADD132)
364 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
365 RegForm = true;
366 LLVM_FALLTHROUGH;
367 CASE_FMA_PACKED_MEM(FNMADD132)
368 CASE_FMA_SCALAR_MEM(FNMADD132)
369 AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
370 Mul1Name = getRegName(MI->getOperand(1).getReg());
371 Negate = true;
372 break;
373
374 CASE_FMA_PACKED_REG(FNMADD213)
375 CASE_FMA_SCALAR_REG(FNMADD213)
376 AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
377 RegForm = true;
378 LLVM_FALLTHROUGH;
379 CASE_FMA_PACKED_MEM(FNMADD213)
380 CASE_FMA_SCALAR_MEM(FNMADD213)
381 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
382 Mul2Name = getRegName(MI->getOperand(1).getReg());
383 Negate = true;
384 break;
385
386 CASE_FMA_PACKED_REG(FNMADD231)
387 CASE_FMA_SCALAR_REG(FNMADD231)
388 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
389 RegForm = true;
390 LLVM_FALLTHROUGH;
391 CASE_FMA_PACKED_MEM(FNMADD231)
392 CASE_FMA_SCALAR_MEM(FNMADD231)
393 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
394 AccName = getRegName(MI->getOperand(1).getReg());
395 Negate = true;
396 break;
397
398 CASE_FMA_PACKED_REG(FNMSUB132)
399 CASE_FMA_SCALAR_REG(FNMSUB132)
400 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
401 RegForm = true;
402 LLVM_FALLTHROUGH;
403 CASE_FMA_PACKED_MEM(FNMSUB132)
404 CASE_FMA_SCALAR_MEM(FNMSUB132)
405 AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
406 Mul1Name = getRegName(MI->getOperand(1).getReg());
407 AccStr = "-";
408 Negate = true;
409 break;
410
411 CASE_FMA_PACKED_REG(FNMSUB213)
412 CASE_FMA_SCALAR_REG(FNMSUB213)
413 AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
414 RegForm = true;
415 LLVM_FALLTHROUGH;
416 CASE_FMA_PACKED_MEM(FNMSUB213)
417 CASE_FMA_SCALAR_MEM(FNMSUB213)
418 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
419 Mul2Name = getRegName(MI->getOperand(1).getReg());
420 AccStr = "-";
421 Negate = true;
422 break;
423
424 CASE_FMA_PACKED_REG(FNMSUB231)
425 CASE_FMA_SCALAR_REG(FNMSUB231)
426 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
427 RegForm = true;
428 LLVM_FALLTHROUGH;
429 CASE_FMA_PACKED_MEM(FNMSUB231)
430 CASE_FMA_SCALAR_MEM(FNMSUB231)
431 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
432 AccName = getRegName(MI->getOperand(1).getReg());
433 AccStr = "-";
434 Negate = true;
435 break;
436
437 CASE_FMA_PACKED_REG(FMADDSUB132)
438 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
439 RegForm = true;
440 LLVM_FALLTHROUGH;
441 CASE_FMA_PACKED_MEM(FMADDSUB132)
442 AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
443 Mul1Name = getRegName(MI->getOperand(1).getReg());
444 AccStr = "+/-";
445 break;
446
447 CASE_FMA_PACKED_REG(FMADDSUB213)
448 AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
449 RegForm = true;
450 LLVM_FALLTHROUGH;
451 CASE_FMA_PACKED_MEM(FMADDSUB213)
452 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
453 Mul2Name = getRegName(MI->getOperand(1).getReg());
454 AccStr = "+/-";
455 break;
456
457 CASE_FMA_PACKED_REG(FMADDSUB231)
458 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
459 RegForm = true;
460 LLVM_FALLTHROUGH;
461 CASE_FMA_PACKED_MEM(FMADDSUB231)
462 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
463 AccName = getRegName(MI->getOperand(1).getReg());
464 AccStr = "+/-";
465 break;
466
467 CASE_FMA_PACKED_REG(FMSUBADD132)
468 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
469 RegForm = true;
470 LLVM_FALLTHROUGH;
471 CASE_FMA_PACKED_MEM(FMSUBADD132)
472 AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
473 Mul1Name = getRegName(MI->getOperand(1).getReg());
474 AccStr = "-/+";
475 break;
476
477 CASE_FMA_PACKED_REG(FMSUBADD213)
478 AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
479 RegForm = true;
480 LLVM_FALLTHROUGH;
481 CASE_FMA_PACKED_MEM(FMSUBADD213)
482 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
483 Mul2Name = getRegName(MI->getOperand(1).getReg());
484 AccStr = "-/+";
485 break;
486
487 CASE_FMA_PACKED_REG(FMSUBADD231)
488 Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
489 RegForm = true;
490 LLVM_FALLTHROUGH;
491 CASE_FMA_PACKED_MEM(FMSUBADD231)
492 Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
493 AccName = getRegName(MI->getOperand(1).getReg());
494 AccStr = "-/+";
495 break;
496 }
497
498 const char *DestName = getRegName(MI->getOperand(0).getReg());
499
500 if (!Mul1Name) Mul1Name = "mem";
501 if (!Mul2Name) Mul2Name = "mem";
502 if (!AccName) AccName = "mem";
503
504 OS << DestName << " = ";
505 // TODO: Print masking information?
506
507 if (Negate)
508 OS << '-';
509
510 OS << '(' << Mul1Name << " * " << Mul2Name << ") " << AccStr << ' '
511 << AccName;
512
513 return true;
514 }
515
516
234517 //===----------------------------------------------------------------------===//
235518 // Top Level Entrypoint
236519 //===----------------------------------------------------------------------===//
246529 const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr;
247530 unsigned NumOperands = MI->getNumOperands();
248531 bool RegForm = false;
532
533 if (printFMA3Comments(MI, OS, getRegName))
534 return true;
249535
250536 switch (MI->getOpcode()) {
251537 default:
39853985 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
39863986 ; CHECK: ## %bb.0:
39873987 ; CHECK-NEXT: vmovapd %xmm0, %xmm3
3988 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3
3988 ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm3 = (xmm1 * xmm3) + xmm2
39893989 ; CHECK-NEXT: kmovw %edi, %k1
39903990 ; CHECK-NEXT: vmovapd %xmm0, %xmm4
3991 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1}
3991 ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm4 = (xmm1 * xmm4) + xmm2
39923992 ; CHECK-NEXT: vmovapd %xmm0, %xmm5
39933993 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm5
39943994 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1}
40124012 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
40134013 ; CHECK: ## %bb.0:
40144014 ; CHECK-NEXT: vmovaps %xmm0, %xmm3
4015 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3
4015 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm3 = (xmm1 * xmm3) + xmm2
40164016 ; CHECK-NEXT: kmovw %edi, %k1
40174017 ; CHECK-NEXT: vmovaps %xmm0, %xmm4
4018 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1}
4018 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm4 = (xmm1 * xmm4) + xmm2
40194019 ; CHECK-NEXT: vmovaps %xmm0, %xmm5
40204020 ; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm5
40214021 ; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1}
40404040 ; CHECK: ## %bb.0:
40414041 ; CHECK-NEXT: kmovw %edi, %k1
40424042 ; CHECK-NEXT: vmovapd %xmm0, %xmm3
4043 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z}
4043 ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm3 = (xmm1 * xmm3) + xmm2
40444044 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
40454045 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
40464046 ; CHECK-NEXT: retq
40564056 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
40574057 ; CHECK: ## %bb.0:
40584058 ; CHECK-NEXT: kmovw %edi, %k1
4059 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z}
4059 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
40604060 ; CHECK-NEXT: retq
40614061 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
40624062 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
40694069 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
40704070 ; CHECK: ## %bb.0:
40714071 ; CHECK-NEXT: vmovapd %xmm2, %xmm3
4072 ; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3
4072 ; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm3 = (xmm0 * xmm1) + xmm3
40734073 ; CHECK-NEXT: kmovw %edi, %k1
40744074 ; CHECK-NEXT: vmovapd %xmm2, %xmm4
4075 ; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1}
4075 ; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm4 = (xmm0 * xmm1) + xmm4
40764076 ; CHECK-NEXT: vmovapd %xmm2, %xmm5
40774077 ; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm5
40784078 ; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
40964096 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
40974097 ; CHECK: ## %bb.0:
40984098 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4099 ; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3
4099 ; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm3 = (xmm0 * xmm1) + xmm3
41004100 ; CHECK-NEXT: kmovw %edi, %k1
41014101 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4102 ; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1}
4102 ; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm4 = (xmm0 * xmm1) + xmm4
41034103 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
41044104 ; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm5
41054105 ; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
41224122 ; CHECK: ## %bb.0:
41234123 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
41244124 ; CHECK-NEXT: kmovw %edx, %k1
4125 ; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1}
4125 ; CHECK-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
41264126 ; CHECK-NEXT: vmovss %xmm0, (%rdi)
41274127 ; CHECK-NEXT: retq
41284128 %a.val = load float, float* %a
41494149 ; CHECK: ## %bb.0:
41504150 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
41514151 ; CHECK-NEXT: kmovw %edx, %k1
4152 ; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1} {z}
4152 ; CHECK-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
41534153 ; CHECK-NEXT: vmovss %xmm0, (%rdi)
41544154 ; CHECK-NEXT: retq
41554155 %a.val = load float, float* %a
41764176 ; CHECK: ## %bb.0:
41774177 ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
41784178 ; CHECK-NEXT: kmovw %edx, %k1
4179 ; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1}
4179 ; CHECK-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
41804180 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi)
41814181 ; CHECK-NEXT: retq
41824182 %a.val = load double, double* %a
41994199 ; CHECK: ## %bb.0:
42004200 ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
42014201 ; CHECK-NEXT: kmovw %edx, %k1
4202 ; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1} {z}
4202 ; CHECK-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
42034203 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi)
42044204 ; CHECK-NEXT: retq
42054205 %a.val = load double, double* %a
42234223 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
42244224 ; CHECK: ## %bb.0:
42254225 ; CHECK-NEXT: vmovapd %xmm2, %xmm3
4226 ; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3
4226 ; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm3 = (xmm0 * xmm1) - xmm3
42274227 ; CHECK-NEXT: kmovw %edi, %k1
42284228 ; CHECK-NEXT: vmovapd %xmm2, %xmm4
4229 ; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1}
4229 ; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm4 = (xmm0 * xmm1) - xmm4
42304230 ; CHECK-NEXT: vmovapd %xmm2, %xmm5
42314231 ; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5
42324232 ; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
42504250 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
42514251 ; CHECK: ## %bb.0:
42524252 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4253 ; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3
4253 ; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm3 = (xmm0 * xmm1) - xmm3
42544254 ; CHECK-NEXT: kmovw %edi, %k1
42554255 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4256 ; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1}
4256 ; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm4 = (xmm0 * xmm1) - xmm4
42574257 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
42584258 ; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5
42594259 ; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
42774277 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
42784278 ; CHECK: ## %bb.0:
42794279 ; CHECK-NEXT: vmovapd %xmm2, %xmm3
4280 ; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3
4280 ; CHECK-NEXT: vfnmsub231sd {{.*#+}} xmm3 = -(xmm0 * xmm1) - xmm3
42814281 ; CHECK-NEXT: kmovw %edi, %k1
42824282 ; CHECK-NEXT: vmovapd %xmm2, %xmm4
4283 ; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1}
4283 ; CHECK-NEXT: vfnmsub231sd {{.*#+}} xmm4 = -(xmm0 * xmm1) - xmm4
42844284 ; CHECK-NEXT: vmovapd %xmm2, %xmm5
42854285 ; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5
42864286 ; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
43044304 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
43054305 ; CHECK: ## %bb.0:
43064306 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4307 ; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3
4307 ; CHECK-NEXT: vfnmsub231ss {{.*#+}} xmm3 = -(xmm0 * xmm1) - xmm3
43084308 ; CHECK-NEXT: kmovw %edi, %k1
43094309 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4310 ; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1}
4310 ; CHECK-NEXT: vfnmsub231ss {{.*#+}} xmm4 = -(xmm0 * xmm1) - xmm4
43114311 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
43124312 ; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5
43134313 ; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
43294329 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
43304330 ; CHECK: ## %bb.0:
43314331 ; CHECK-NEXT: kmovw %esi, %k1
4332 ; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1}
4332 ; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm1 = (xmm0 * mem) + xmm1
43334333 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
43344334 ; CHECK-NEXT: retq
43354335 %q = load float, float* %ptr_b
43424342 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
43434343 ; CHECK: ## %bb.0:
43444344 ; CHECK-NEXT: kmovw %esi, %k1
4345 ; CHECK-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1}
4345 ; CHECK-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
43464346 ; CHECK-NEXT: retq
43474347 %q = load float, float* %ptr_b
43484348 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
43564356 ; CHECK: ## %bb.0:
43574357 ; CHECK-NEXT: xorl %eax, %eax
43584358 ; CHECK-NEXT: kmovw %eax, %k1
4359 ; CHECK-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 {%k1} {z}
4359 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
43604360 ; CHECK-NEXT: retq
43614361 %q = load float, float* %ptr_b
43624362 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
40914091 ; CHECK: ## %bb.0:
40924092 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
40934093 ; CHECK-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
4094 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) + ymm2
40944095 ; CHECK-NEXT: retq ## encoding: [0xc3]
40954096 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
40964097 ret <8 x float> %res
41034104 ; CHECK: ## %bb.0:
41044105 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
41054106 ; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
4107 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) + xmm2
41064108 ; CHECK-NEXT: retq ## encoding: [0xc3]
41074109 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
41084110 ret <4 x float> %res
41154117 ; CHECK: ## %bb.0:
41164118 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
41174119 ; CHECK-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
4120 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) + ymm2
41184121 ; CHECK-NEXT: retq ## encoding: [0xc3]
41194122 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
41204123 ret <4 x double> %res
41274130 ; CHECK: ## %bb.0:
41284131 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
41294132 ; CHECK-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
4133 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) + xmm2
41304134 ; CHECK-NEXT: retq ## encoding: [0xc3]
41314135 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
41324136 ret <2 x double> %res
41384142 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
41394143 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
41404144 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xda]
4145 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) + xmm2
41414146 ; CHECK-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
4147 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) + xmm2
41424148 ; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
41434149 ; CHECK-NEXT: retq ## encoding: [0xc3]
41444150 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
41554161 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
41564162 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
41574163 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xda]
4164 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) + xmm2
41584165 ; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
4166 ; CHECK-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
41594167 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
41604168 ; CHECK-NEXT: retq ## encoding: [0xc3]
41614169 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
41724180 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
41734181 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
41744182 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xda]
4183 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) + xmm2
41754184 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0xa8,0xca]
4185 ; CHECK-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm2
41764186 ; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
41774187 ; CHECK-NEXT: retq ## encoding: [0xc3]
41784188 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
41874197 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
41884198 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
41894199 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xda]
4200 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) + ymm2
41904201 ; CHECK-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
4202 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) + ymm2
41914203 ; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
41924204 ; CHECK-NEXT: retq ## encoding: [0xc3]
41934205 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
42044216 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
42054217 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
42064218 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xda]
4219 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) + ymm2
42074220 ; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
4221 ; CHECK-NEXT: ## ymm2 = (ymm0 * ymm1) + ymm2
42084222 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
42094223 ; CHECK-NEXT: retq ## encoding: [0xc3]
42104224 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
42214235 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
42224236 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
42234237 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xda]
4238 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) + ymm2
42244239 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0xa8,0xca]
4240 ; CHECK-NEXT: ## ymm1 = (ymm0 * ymm1) + ymm2
42254241 ; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
42264242 ; CHECK-NEXT: retq ## encoding: [0xc3]
42274243 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
42364252 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
42374253 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
42384254 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xda]
4255 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) + xmm2
42394256 ; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
4257 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) + xmm2
42404258 ; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3]
42414259 ; CHECK-NEXT: retq ## encoding: [0xc3]
42424260 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
42534271 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
42544272 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
42554273 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xda]
4274 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) + xmm2
42564275 ; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
4276 ; CHECK-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
42574277 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
42584278 ; CHECK-NEXT: retq ## encoding: [0xc3]
42594279 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
42704290 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
42714291 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
42724292 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xda]
4293 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) + xmm2
42734294 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0xa8,0xca]
4295 ; CHECK-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm2
42744296 ; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
42754297 ; CHECK-NEXT: retq ## encoding: [0xc3]
42764298 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
42854307 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
42864308 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
42874309 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xda]
4310 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) + ymm2
42884311 ; CHECK-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
4312 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) + ymm2
42894313 ; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
42904314 ; CHECK-NEXT: retq ## encoding: [0xc3]
42914315 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
43024326 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
43034327 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
43044328 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xda]
4329 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) + ymm2
43054330 ; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
4331 ; CHECK-NEXT: ## ymm2 = (ymm0 * ymm1) + ymm2
43064332 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
43074333 ; CHECK-NEXT: retq ## encoding: [0xc3]
43084334 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
43194345 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
43204346 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
43214347 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xda]
4348 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) + ymm2
43224349 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0xa8,0xca]
4350 ; CHECK-NEXT: ## ymm1 = (ymm0 * ymm1) + ymm2
43234351 ; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
43244352 ; CHECK-NEXT: retq ## encoding: [0xc3]
43254353 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
43374365 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
43384366 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
43394367 ; CHECK-NEXT: vfmsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaa,0xda]
4368 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) - xmm2
43404369 ; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
4370 ; CHECK-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
43414371 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
43424372 ; CHECK-NEXT: retq ## encoding: [0xc3]
43434373 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
43554385 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
43564386 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
43574387 ; CHECK-NEXT: vfmsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xaa,0xda]
4388 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) - ymm2
43584389 ; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
4390 ; CHECK-NEXT: ## ymm2 = (ymm0 * ymm1) - ymm2
43594391 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
43604392 ; CHECK-NEXT: retq ## encoding: [0xc3]
43614393 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
43724404 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
43734405 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
43744406 ; CHECK-NEXT: vfmsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaa,0xda]
4407 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) - xmm2
43754408 ; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
4409 ; CHECK-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
43764410 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
43774411 ; CHECK-NEXT: retq ## encoding: [0xc3]
43784412 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
43894423 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
43904424 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
43914425 ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xaa,0xda]
4426 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) - ymm2
43924427 ; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
4428 ; CHECK-NEXT: ## ymm2 = (ymm0 * ymm1) - ymm2
43934429 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
43944430 ; CHECK-NEXT: retq ## encoding: [0xc3]
43954431 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
44054441 ; CHECK: ## %bb.0:
44064442 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
44074443 ; CHECK-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
4444 ; CHECK-NEXT: ## ymm0 = -(ymm0 * ymm1) + ymm2
44084445 ; CHECK-NEXT: retq ## encoding: [0xc3]
44094446 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
44104447 ret <8 x float> %res
44174454 ; CHECK: ## %bb.0:
44184455 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
44194456 ; CHECK-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
4457 ; CHECK-NEXT: ## xmm0 = -(xmm0 * xmm1) + xmm2
44204458 ; CHECK-NEXT: retq ## encoding: [0xc3]
44214459 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
44224460 ret <4 x float> %res
44294467 ; CHECK: ## %bb.0:
44304468 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
44314469 ; CHECK-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
4470 ; CHECK-NEXT: ## ymm0 = -(ymm0 * ymm1) + ymm2
44324471 ; CHECK-NEXT: retq ## encoding: [0xc3]
44334472 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
44344473 ret <4 x double> %res
44414480 ; CHECK: ## %bb.0:
44424481 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
44434482 ; CHECK-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
4483 ; CHECK-NEXT: ## xmm0 = -(xmm0 * xmm1) + xmm2
44444484 ; CHECK-NEXT: retq ## encoding: [0xc3]
44454485 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
44464486 ret <2 x double> %res
44534493 ; CHECK: ## %bb.0:
44544494 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
44554495 ; CHECK-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
4496 ; CHECK-NEXT: ## ymm0 = -(ymm0 * ymm1) - ymm2
44564497 ; CHECK-NEXT: retq ## encoding: [0xc3]
44574498 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
44584499 ret <8 x float> %res
44654506 ; CHECK: ## %bb.0:
44664507 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
44674508 ; CHECK-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
4509 ; CHECK-NEXT: ## xmm0 = -(xmm0 * xmm1) - xmm2
44684510 ; CHECK-NEXT: retq ## encoding: [0xc3]
44694511 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
44704512 ret <4 x float> %res
44774519 ; CHECK: ## %bb.0:
44784520 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
44794521 ; CHECK-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
4522 ; CHECK-NEXT: ## ymm0 = -(ymm0 * ymm1) - ymm2
44804523 ; CHECK-NEXT: retq ## encoding: [0xc3]
44814524 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
44824525 ret <4 x double> %res
44894532 ; CHECK: ## %bb.0:
44904533 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
44914534 ; CHECK-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
4535 ; CHECK-NEXT: ## xmm0 = -(xmm0 * xmm1) - xmm2
44924536 ; CHECK-NEXT: retq ## encoding: [0xc3]
44934537 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
44944538 ret <2 x double> %res
45014545 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
45024546 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
45034547 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xae,0xda]
4548 ; CHECK-NEXT: ## xmm3 = -(xmm0 * xmm3) - xmm2
45044549 ; CHECK-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
4550 ; CHECK-NEXT: ## xmm0 = -(xmm0 * xmm1) - xmm2
45054551 ; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
45064552 ; CHECK-NEXT: retq ## encoding: [0xc3]
45074553 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
45184564 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
45194565 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
45204566 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xae,0xda]
4567 ; CHECK-NEXT: ## xmm3 = -(xmm0 * xmm3) - xmm2
45214568 ; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
4569 ; CHECK-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
45224570 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
45234571 ; CHECK-NEXT: retq ## encoding: [0xc3]
45244572 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
45334581 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
45344582 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
45354583 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xae,0xda]
4584 ; CHECK-NEXT: ## ymm3 = -(ymm0 * ymm3) - ymm2
45364585 ; CHECK-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
4586 ; CHECK-NEXT: ## ymm0 = -(ymm0 * ymm1) - ymm2
45374587 ; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
45384588 ; CHECK-NEXT: retq ## encoding: [0xc3]
45394589 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
45504600 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
45514601 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
45524602 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xae,0xda]
4603 ; CHECK-NEXT: ## ymm3 = -(ymm0 * ymm3) - ymm2
45534604 ; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
4605 ; CHECK-NEXT: ## ymm2 = -(ymm0 * ymm1) - ymm2
45544606 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
45554607 ; CHECK-NEXT: retq ## encoding: [0xc3]
45564608 %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
45654617 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
45664618 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
45674619 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xae,0xda]
4620 ; CHECK-NEXT: ## xmm3 = -(xmm0 * xmm3) - xmm2
45684621 ; CHECK-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
4622 ; CHECK-NEXT: ## xmm0 = -(xmm0 * xmm1) - xmm2
45694623 ; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3]
45704624 ; CHECK-NEXT: retq ## encoding: [0xc3]
45714625 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
45824636 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
45834637 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
45844638 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xae,0xda]
4639 ; CHECK-NEXT: ## xmm3 = -(xmm0 * xmm3) - xmm2
45854640 ; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
4641 ; CHECK-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
45864642 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
45874643 ; CHECK-NEXT: retq ## encoding: [0xc3]
45884644 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
45974653 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
45984654 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
45994655 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xae,0xda]
4656 ; CHECK-NEXT: ## ymm3 = -(ymm0 * ymm3) - ymm2
46004657 ; CHECK-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
4658 ; CHECK-NEXT: ## ymm0 = -(ymm0 * ymm1) - ymm2
46014659 ; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
46024660 ; CHECK-NEXT: retq ## encoding: [0xc3]
46034661 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
46144672 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
46154673 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
46164674 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xae,0xda]
4675 ; CHECK-NEXT: ## ymm3 = -(ymm0 * ymm3) - ymm2
46174676 ; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
4677 ; CHECK-NEXT: ## ymm2 = -(ymm0 * ymm1) - ymm2
46184678 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
46194679 ; CHECK-NEXT: retq ## encoding: [0xc3]
46204680 %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
46294689 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
46304690 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
46314691 ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xac,0xda]
4692 ; CHECK-NEXT: ## xmm3 = -(xmm0 * xmm3) + xmm2
46324693 ; CHECK-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
4694 ; CHECK-NEXT: ## xmm0 = -(xmm0 * xmm1) + xmm2
46334695 ; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
46344696 ; CHECK-NEXT: retq ## encoding: [0xc3]
46354697 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
46444706 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
46454707 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
46464708 ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xac,0xda]
4709 ; CHECK-NEXT: ## ymm3 = -(ymm0 * ymm3) + ymm2
46474710 ; CHECK-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
4711 ; CHECK-NEXT: ## ymm0 = -(ymm0 * ymm1) + ymm2
46484712 ; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
46494713 ; CHECK-NEXT: retq ## encoding: [0xc3]
46504714 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
46594723 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
46604724 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
46614725 ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xac,0xda]
4726 ; CHECK-NEXT: ## xmm3 = -(xmm0 * xmm3) + xmm2
46624727 ; CHECK-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
4728 ; CHECK-NEXT: ## xmm0 = -(xmm0 * xmm1) + xmm2
46634729 ; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3]
46644730 ; CHECK-NEXT: retq ## encoding: [0xc3]
46654731 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
46744740 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
46754741 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
46764742 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xac,0xda]
4743 ; CHECK-NEXT: ## ymm3 = -(ymm0 * ymm3) + ymm2
46774744 ; CHECK-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
4745 ; CHECK-NEXT: ## ymm0 = -(ymm0 * ymm1) + ymm2
46784746 ; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
46794747 ; CHECK-NEXT: retq ## encoding: [0xc3]
46804748 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
46904758 ; CHECK: ## %bb.0:
46914759 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
46924760 ; CHECK-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
4761 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) +/- ymm2
46934762 ; CHECK-NEXT: retq ## encoding: [0xc3]
46944763 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
46954764 ret <8 x float> %res
47024771 ; CHECK: ## %bb.0:
47034772 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
47044773 ; CHECK-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
4774 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) +/- xmm2
47054775 ; CHECK-NEXT: retq ## encoding: [0xc3]
47064776 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
47074777 ret <4 x float> %res
47144784 ; CHECK: ## %bb.0:
47154785 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
47164786 ; CHECK-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
4787 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) +/- ymm2
47174788 ; CHECK-NEXT: retq ## encoding: [0xc3]
47184789 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
47194790 ret <4 x double> %res
47264797 ; CHECK: ## %bb.0:
47274798 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
47284799 ; CHECK-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
4800 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) +/- xmm2
47294801 ; CHECK-NEXT: retq ## encoding: [0xc3]
47304802 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
47314803 ret <2 x double> %res
47374809 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
47384810 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
47394811 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xda]
4812 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) +/- xmm2
47404813 ; CHECK-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
4814 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) +/- xmm2
47414815 ; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
47424816 ; CHECK-NEXT: retq ## encoding: [0xc3]
47434817 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
47544828 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
47554829 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
47564830 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xda]
4831 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) +/- xmm2
47574832 ; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
4833 ; CHECK-NEXT: ## xmm2 = (xmm0 * xmm1) +/- xmm2
47584834 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
47594835 ; CHECK-NEXT: retq ## encoding: [0xc3]
47604836 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
47714847 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
47724848 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
47734849 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xda]
4850 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) +/- xmm2
47744851 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0xa6,0xca]
4852 ; CHECK-NEXT: ## xmm1 = (xmm0 * xmm1) +/- xmm2
47754853 ; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
47764854 ; CHECK-NEXT: retq ## encoding: [0xc3]
47774855 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
47864864 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
47874865 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
47884866 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xda]
4867 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) +/- ymm2
47894868 ; CHECK-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
4869 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) +/- ymm2
47904870 ; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
47914871 ; CHECK-NEXT: retq ## encoding: [0xc3]
47924872 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
48034883 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
48044884 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
48054885 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xda]
4886 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) +/- ymm2
48064887 ; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
4888 ; CHECK-NEXT: ## ymm2 = (ymm0 * ymm1) +/- ymm2
48074889 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
48084890 ; CHECK-NEXT: retq ## encoding: [0xc3]
48094891 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
48204902 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
48214903 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
48224904 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xda]
4905 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) +/- ymm2
48234906 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0xa6,0xca]
4907 ; CHECK-NEXT: ## ymm1 = (ymm0 * ymm1) +/- ymm2
48244908 ; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
48254909 ; CHECK-NEXT: retq ## encoding: [0xc3]
48264910 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
48354919 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
48364920 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
48374921 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xda]
4922 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) +/- xmm2
48384923 ; CHECK-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
4924 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) +/- xmm2
48394925 ; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3]
48404926 ; CHECK-NEXT: retq ## encoding: [0xc3]
48414927 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
48524938 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
48534939 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
48544940 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xda]
4941 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) +/- xmm2
48554942 ; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
4943 ; CHECK-NEXT: ## xmm2 = (xmm0 * xmm1) +/- xmm2
48564944 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
48574945 ; CHECK-NEXT: retq ## encoding: [0xc3]
48584946 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
48694957 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
48704958 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
48714959 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xda]
4960 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) +/- xmm2
48724961 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0xa6,0xca]
4962 ; CHECK-NEXT: ## xmm1 = (xmm0 * xmm1) +/- xmm2
48734963 ; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
48744964 ; CHECK-NEXT: retq ## encoding: [0xc3]
48754965 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
48844974 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
48854975 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
48864976 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xda]
4977 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) +/- ymm2
48874978 ; CHECK-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
4979 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) +/- ymm2
48884980 ; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
48894981 ; CHECK-NEXT: retq ## encoding: [0xc3]
48904982 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
49014993 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
49024994 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
49034995 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xda]
4996 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) +/- ymm2
49044997 ; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
4998 ; CHECK-NEXT: ## ymm2 = (ymm0 * ymm1) +/- ymm2
49054999 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
49065000 ; CHECK-NEXT: retq ## encoding: [0xc3]
49075001 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
49185012 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
49195013 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
49205014 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xda]
5015 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) +/- ymm2
49215016 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0xa6,0xca]
5017 ; CHECK-NEXT: ## ymm1 = (ymm0 * ymm1) +/- ymm2
49225018 ; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
49235019 ; CHECK-NEXT: retq ## encoding: [0xc3]
49245020 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
49355031 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
49365032 ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
49375033 ; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa7,0xda]
5034 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) -/+ xmm2
49385035 ; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
5036 ; CHECK-NEXT: ## xmm2 = (xmm0 * xmm1) -/+ xmm2
49395037 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
49405038 ; CHECK-NEXT: retq ## encoding: [0xc3]
49415039 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
49525050 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
49535051 ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
49545052 ; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa7,0xda]
5053 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) -/+ ymm2
49555054 ; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
5055 ; CHECK-NEXT: ## ymm2 = (ymm0 * ymm1) -/+ ymm2
49565056 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
49575057 ; CHECK-NEXT: retq ## encoding: [0xc3]
49585058 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
49695069 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
49705070 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
49715071 ; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa7,0xda]
5072 ; CHECK-NEXT: ## xmm3 = (xmm0 * xmm3) -/+ xmm2
49725073 ; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
5074 ; CHECK-NEXT: ## xmm2 = (xmm0 * xmm1) -/+ xmm2
49735075 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
49745076 ; CHECK-NEXT: retq ## encoding: [0xc3]
49755077 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
49865088 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
49875089 ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
49885090 ; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa7,0xda]
5091 ; CHECK-NEXT: ## ymm3 = (ymm0 * ymm3) -/+ ymm2
49895092 ; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
5093 ; CHECK-NEXT: ## ymm2 = (ymm0 * ymm1) -/+ ymm2
49905094 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
49915095 ; CHECK-NEXT: retq ## encoding: [0xc3]
49925096 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
50015105 ; CHECK: ## %bb.0:
50025106 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
50035107 ; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
5108 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) + xmm2
50045109 ; CHECK-NEXT: retq ## encoding: [0xc3]
50055110 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
50065111 ret <4 x float> %res
50105115 ; CHECK-LABEL: test_mask_vfmadd128_ps_rz:
50115116 ; CHECK: ## %bb.0:
50125117 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
5118 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
50135119 ; CHECK-NEXT: retq ## encoding: [0xc3]
50145120 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
50155121 ret <4 x float> %res
50205126 ; CHECK: ## %bb.0:
50215127 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
50225128 ; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
5129 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
50235130 ; CHECK-NEXT: retq ## encoding: [0xc3]
50245131 %a2 = load <4 x float>, <4 x float>* %ptr_a2
50255132 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
50315138 ; CHECK: ## %bb.0:
50325139 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
50335140 ; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
5141 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
50345142 ; CHECK-NEXT: retq ## encoding: [0xc3]
50355143 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
50365144 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
50415149 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz:
50425150 ; CHECK: ## %bb.0:
50435151 ; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
5152 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
50445153 ; CHECK-NEXT: retq ## encoding: [0xc3]
50455154 %a2 = load <4 x float>, <4 x float>* %ptr_a2
50465155 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
50515160 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza:
50525161 ; CHECK: ## %bb.0:
50535162 ; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
5163 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
50545164 ; CHECK-NEXT: retq ## encoding: [0xc3]
50555165 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
50565166 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
50625172 ; CHECK: ## %bb.0:
50635173 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
50645174 ; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
5175 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
50655176 ; CHECK-NEXT: retq ## encoding: [0xc3]
50665177 %q = load float, float* %ptr_a2
50675178 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
50775188 ; CHECK: ## %bb.0:
50785189 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
50795190 ; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
5191 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
50805192 ; CHECK-NEXT: retq ## encoding: [0xc3]
50815193 %q = load float, float* %ptr_a2, align 4
50825194 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
50915203 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz:
50925204 ; CHECK: ## %bb.0:
50935205 ; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
5206 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
50945207 ; CHECK-NEXT: retq ## encoding: [0xc3]
50955208 %q = load float, float* %ptr_a2
50965209 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
51055218 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza:
51065219 ; CHECK: ## %bb.0:
51075220 ; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
5221 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
51085222 ; CHECK-NEXT: retq ## encoding: [0xc3]
51095223 %q = load float, float* %ptr_a2, align 4
51105224 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
51205234 ; CHECK: ## %bb.0:
51215235 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
51225236 ; CHECK-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
5237 ; CHECK-NEXT: ## xmm0 = (xmm0 * xmm1) + xmm2
51235238 ; CHECK-NEXT: retq ## encoding: [0xc3]
51245239 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
51255240 ret <2 x double> %res
51295244 ; CHECK-LABEL: test_mask_vfmadd128_pd_rz:
51305245 ; CHECK: ## %bb.0:
51315246 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
5247 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
51325248 ; CHECK-NEXT: retq ## encoding: [0xc3]
51335249 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
51345250 ret <2 x double> %res
51395255 ; CHECK: ## %bb.0:
51405256 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
51415257 ; CHECK-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
5258 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
51425259 ; CHECK-NEXT: retq ## encoding: [0xc3]
51435260 %a2 = load <2 x double>, <2 x double>* %ptr_a2
51445261 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
51495266 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz:
51505267 ; CHECK: ## %bb.0:
51515268 ; CHECK-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
5269 ; CHECK-NEXT: ## xmm0 = (xmm1 * xmm0) + mem
51525270 ; CHECK-NEXT: retq ## encoding: [0xc3]
51535271 %a2 = load <2 x double>, <2 x double>* %ptr_a2
51545272 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
51605278 ; CHECK: ## %bb.0:
51615279 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
51625280 ; CHECK-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
5281 ; CHECK-NEXT: ## ymm0 = (ymm0 * ymm1) + ymm2
51635282 ; CHECK-NEXT: retq ## encoding: [0xc3]
51645283 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
51655284 ret <4 x double> %res
51695288 ; CHECK-LABEL: test_mask_vfmadd256_pd_rz:
51705289 ; CHECK: ## %bb.0:
51715290 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
5291 ; CHECK-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2
51725292 ; CHECK-NEXT: retq ## encoding: [0xc3]
51735293 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
51745294 ret <4 x double> %res
51795299 ; CHECK: ## %bb.0:
51805300 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
51815301 ; CHECK-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
5302 ; CHECK-NEXT: ## ymm0 = (ymm1 * ymm0) + mem
51825303 ; CHECK-NEXT: retq ## encoding: [0xc3]
51835304 %a2 = load <4 x double>, <4 x double>* %ptr_a2
51845305 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
51895310 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz:
51905311 ; CHECK: ## %bb.0:
51915312 ; CHECK-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
5313 ; CHECK-NEXT: ## ymm0 = (ymm1 * ymm0) + mem
51925314 ; CHECK-NEXT: retq ## encoding: [0xc3]
51935315 %a2 = load <4 x double>, <4 x double>* %ptr_a2
51945316 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
4141 ; FMA-LABEL: test_x86_fmadd_baa_ps:
4242 ; FMA: # %bb.0:
4343 ; FMA-NEXT: vmovaps (%rcx), %xmm0
44 ; FMA-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0
44 ; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
4545 ; FMA-NEXT: retq
4646 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
4747 ret <4 x float> %res
5151 ; FMA-LABEL: test_x86_fmadd_aba_ps:
5252 ; FMA: # %bb.0:
5353 ; FMA-NEXT: vmovaps (%rcx), %xmm0
54 ; FMA-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0
54 ; FMA-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
5555 ; FMA-NEXT: retq
5656 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
5757 ret <4 x float> %res
6161 ; FMA-LABEL: test_x86_fmadd_bba_ps:
6262 ; FMA: # %bb.0:
6363 ; FMA-NEXT: vmovaps (%rdx), %xmm0
64 ; FMA-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0
64 ; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
6565 ; FMA-NEXT: retq
6666 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
6767 ret <4 x float> %res
7272 ; FMA-LABEL: test_x86_fmadd_baa_ps_y:
7373 ; FMA: # %bb.0:
7474 ; FMA-NEXT: vmovaps (%rcx), %ymm0
75 ; FMA-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0
75 ; FMA-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
7676 ; FMA-NEXT: retq
7777 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
7878 ret <8 x float> %res
8282 ; FMA-LABEL: test_x86_fmadd_aba_ps_y:
8383 ; FMA: # %bb.0:
8484 ; FMA-NEXT: vmovaps (%rcx), %ymm0
85 ; FMA-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0
85 ; FMA-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
8686 ; FMA-NEXT: retq
8787 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
8888 ret <8 x float> %res
9292 ; FMA-LABEL: test_x86_fmadd_bba_ps_y:
9393 ; FMA: # %bb.0:
9494 ; FMA-NEXT: vmovaps (%rdx), %ymm0
95 ; FMA-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0
95 ; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
9696 ; FMA-NEXT: retq
9797 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
9898 ret <8 x float> %res
135135 ; FMA-LABEL: test_x86_fmadd_baa_pd:
136136 ; FMA: # %bb.0:
137137 ; FMA-NEXT: vmovapd (%rcx), %xmm0
138 ; FMA-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0
138 ; FMA-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
139139 ; FMA-NEXT: retq
140140 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
141141 ret <2 x double> %res
145145 ; FMA-LABEL: test_x86_fmadd_aba_pd:
146146 ; FMA: # %bb.0:
147147 ; FMA-NEXT: vmovapd (%rcx), %xmm0
148 ; FMA-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0
148 ; FMA-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
149149 ; FMA-NEXT: retq
150150 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
151151 ret <2 x double> %res
155155 ; FMA-LABEL: test_x86_fmadd_bba_pd:
156156 ; FMA: # %bb.0:
157157 ; FMA-NEXT: vmovapd (%rdx), %xmm0
158 ; FMA-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0
158 ; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
159159 ; FMA-NEXT: retq
160160 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
161161 ret <2 x double> %res
166166 ; FMA-LABEL: test_x86_fmadd_baa_pd_y:
167167 ; FMA: # %bb.0:
168168 ; FMA-NEXT: vmovapd (%rcx), %ymm0
169 ; FMA-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0
169 ; FMA-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
170170 ; FMA-NEXT: retq
171171 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
172172 ret <4 x double> %res
176176 ; FMA-LABEL: test_x86_fmadd_aba_pd_y:
177177 ; FMA: # %bb.0:
178178 ; FMA-NEXT: vmovapd (%rcx), %ymm0
179 ; FMA-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0
179 ; FMA-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
180180 ; FMA-NEXT: retq
181181 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
182182 ret <4 x double> %res
186186 ; FMA-LABEL: test_x86_fmadd_bba_pd_y:
187187 ; FMA: # %bb.0:
188188 ; FMA-NEXT: vmovapd (%rdx), %ymm0
189 ; FMA-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0
189 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
190190 ; FMA-NEXT: retq
191191 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
192192 ret <4 x double> %res
230230 ; FMA-LABEL: test_x86_fnmadd_baa_ps:
231231 ; FMA: # %bb.0:
232232 ; FMA-NEXT: vmovaps (%rcx), %xmm0
233 ; FMA-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0
233 ; FMA-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
234234 ; FMA-NEXT: retq
235235 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
236236 ret <4 x float> %res
240240 ; FMA-LABEL: test_x86_fnmadd_aba_ps:
241241 ; FMA: # %bb.0:
242242 ; FMA-NEXT: vmovaps (%rcx), %xmm0
243 ; FMA-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0
243 ; FMA-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
244244 ; FMA-NEXT: retq
245245 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
246246 ret <4 x float> %res
250250 ; FMA-LABEL: test_x86_fnmadd_bba_ps:
251251 ; FMA: # %bb.0:
252252 ; FMA-NEXT: vmovaps (%rdx), %xmm0
253 ; FMA-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0
253 ; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
254254 ; FMA-NEXT: retq
255255 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
256256 ret <4 x float> %res
261261 ; FMA-LABEL: test_x86_fnmadd_baa_ps_y:
262262 ; FMA: # %bb.0:
263263 ; FMA-NEXT: vmovaps (%rcx), %ymm0
264 ; FMA-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0
264 ; FMA-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
265265 ; FMA-NEXT: retq
266266 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
267267 ret <8 x float> %res
271271 ; FMA-LABEL: test_x86_fnmadd_aba_ps_y:
272272 ; FMA: # %bb.0:
273273 ; FMA-NEXT: vmovaps (%rcx), %ymm0
274 ; FMA-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0
274 ; FMA-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
275275 ; FMA-NEXT: retq
276276 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
277277 ret <8 x float> %res
281281 ; FMA-LABEL: test_x86_fnmadd_bba_ps_y:
282282 ; FMA: # %bb.0:
283283 ; FMA-NEXT: vmovaps (%rdx), %ymm0
284 ; FMA-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0
284 ; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
285285 ; FMA-NEXT: retq
286286 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
287287 ret <8 x float> %res
324324 ; FMA-LABEL: test_x86_fnmadd_baa_pd:
325325 ; FMA: # %bb.0:
326326 ; FMA-NEXT: vmovapd (%rcx), %xmm0
327 ; FMA-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0
327 ; FMA-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
328328 ; FMA-NEXT: retq
329329 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
330330 ret <2 x double> %res
334334 ; FMA-LABEL: test_x86_fnmadd_aba_pd:
335335 ; FMA: # %bb.0:
336336 ; FMA-NEXT: vmovapd (%rcx), %xmm0
337 ; FMA-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0
337 ; FMA-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
338338 ; FMA-NEXT: retq
339339 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
340340 ret <2 x double> %res
344344 ; FMA-LABEL: test_x86_fnmadd_bba_pd:
345345 ; FMA: # %bb.0:
346346 ; FMA-NEXT: vmovapd (%rdx), %xmm0
347 ; FMA-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0
347 ; FMA-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
348348 ; FMA-NEXT: retq
349349 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
350350 ret <2 x double> %res
355355 ; FMA-LABEL: test_x86_fnmadd_baa_pd_y:
356356 ; FMA: # %bb.0:
357357 ; FMA-NEXT: vmovapd (%rcx), %ymm0
358 ; FMA-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0
358 ; FMA-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
359359 ; FMA-NEXT: retq
360360 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
361361 ret <4 x double> %res
365365 ; FMA-LABEL: test_x86_fnmadd_aba_pd_y:
366366 ; FMA: # %bb.0:
367367 ; FMA-NEXT: vmovapd (%rcx), %ymm0
368 ; FMA-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0
368 ; FMA-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
369369 ; FMA-NEXT: retq
370370 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
371371 ret <4 x double> %res
375375 ; FMA-LABEL: test_x86_fnmadd_bba_pd_y:
376376 ; FMA: # %bb.0:
377377 ; FMA-NEXT: vmovapd (%rdx), %ymm0
378 ; FMA-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0
378 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
379379 ; FMA-NEXT: retq
380380 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
381381 ret <4 x double> %res
418418 ; FMA-LABEL: test_x86_fmsub_baa_ps:
419419 ; FMA: # %bb.0:
420420 ; FMA-NEXT: vmovaps (%rcx), %xmm0
421 ; FMA-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0
421 ; FMA-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
422422 ; FMA-NEXT: retq
423423 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
424424 ret <4 x float> %res
428428 ; FMA-LABEL: test_x86_fmsub_aba_ps:
429429 ; FMA: # %bb.0:
430430 ; FMA-NEXT: vmovaps (%rcx), %xmm0
431 ; FMA-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0
431 ; FMA-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
432432 ; FMA-NEXT: retq
433433 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
434434 ret <4 x float> %res
438438 ; FMA-LABEL: test_x86_fmsub_bba_ps:
439439 ; FMA: # %bb.0:
440440 ; FMA-NEXT: vmovaps (%rdx), %xmm0
441 ; FMA-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0
441 ; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
442442 ; FMA-NEXT: retq
443443 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
444444 ret <4 x float> %res
449449 ; FMA-LABEL: test_x86_fmsub_baa_ps_y:
450450 ; FMA: # %bb.0:
451451 ; FMA-NEXT: vmovaps (%rcx), %ymm0
452 ; FMA-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0
452 ; FMA-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
453453 ; FMA-NEXT: retq
454454 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
455455 ret <8 x float> %res
459459 ; FMA-LABEL: test_x86_fmsub_aba_ps_y:
460460 ; FMA: # %bb.0:
461461 ; FMA-NEXT: vmovaps (%rcx), %ymm0
462 ; FMA-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0
462 ; FMA-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
463463 ; FMA-NEXT: retq
464464 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
465465 ret <8 x float> %res
469469 ; FMA-LABEL: test_x86_fmsub_bba_ps_y:
470470 ; FMA: # %bb.0:
471471 ; FMA-NEXT: vmovaps (%rdx), %ymm0
472 ; FMA-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0
472 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
473473 ; FMA-NEXT: retq
474474 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
475475 ret <8 x float> %res
512512 ; FMA-LABEL: test_x86_fmsub_baa_pd:
513513 ; FMA: # %bb.0:
514514 ; FMA-NEXT: vmovapd (%rcx), %xmm0
515 ; FMA-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0
515 ; FMA-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
516516 ; FMA-NEXT: retq
517517 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
518518 ret <2 x double> %res
522522 ; FMA-LABEL: test_x86_fmsub_aba_pd:
523523 ; FMA: # %bb.0:
524524 ; FMA-NEXT: vmovapd (%rcx), %xmm0
525 ; FMA-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0
525 ; FMA-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
526526 ; FMA-NEXT: retq
527527 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
528528 ret <2 x double> %res
532532 ; FMA-LABEL: test_x86_fmsub_bba_pd:
533533 ; FMA: # %bb.0:
534534 ; FMA-NEXT: vmovapd (%rdx), %xmm0
535 ; FMA-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0
535 ; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
536536 ; FMA-NEXT: retq
537537 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
538538 ret <2 x double> %res
543543 ; FMA-LABEL: test_x86_fmsub_baa_pd_y:
544544 ; FMA: # %bb.0:
545545 ; FMA-NEXT: vmovapd (%rcx), %ymm0
546 ; FMA-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0
546 ; FMA-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
547547 ; FMA-NEXT: retq
548548 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
549549 ret <4 x double> %res
553553 ; FMA-LABEL: test_x86_fmsub_aba_pd_y:
554554 ; FMA: # %bb.0:
555555 ; FMA-NEXT: vmovapd (%rcx), %ymm0
556 ; FMA-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0
556 ; FMA-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
557557 ; FMA-NEXT: retq
558558 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
559559 ret <4 x double> %res
563563 ; FMA-LABEL: test_x86_fmsub_bba_pd_y:
564564 ; FMA: # %bb.0:
565565 ; FMA-NEXT: vmovapd (%rdx), %ymm0
566 ; FMA-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0
566 ; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
567567 ; FMA-NEXT: retq
568568 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
569569 ret <4 x double> %res
607607 ; FMA-LABEL: test_x86_fnmsub_baa_ps:
608608 ; FMA: # %bb.0:
609609 ; FMA-NEXT: vmovaps (%rcx), %xmm0
610 ; FMA-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0
610 ; FMA-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
611611 ; FMA-NEXT: retq
612612 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
613613 ret <4 x float> %res
617617 ; FMA-LABEL: test_x86_fnmsub_aba_ps:
618618 ; FMA: # %bb.0:
619619 ; FMA-NEXT: vmovaps (%rcx), %xmm0
620 ; FMA-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0
620 ; FMA-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
621621 ; FMA-NEXT: retq
622622 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
623623 ret <4 x float> %res
627627 ; FMA-LABEL: test_x86_fnmsub_bba_ps:
628628 ; FMA: # %bb.0:
629629 ; FMA-NEXT: vmovaps (%rdx), %xmm0
630 ; FMA-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0
630 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
631631 ; FMA-NEXT: retq
632632 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
633633 ret <4 x float> %res
638638 ; FMA-LABEL: test_x86_fnmsub_baa_ps_y:
639639 ; FMA: # %bb.0:
640640 ; FMA-NEXT: vmovaps (%rcx), %ymm0
641 ; FMA-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0
641 ; FMA-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
642642 ; FMA-NEXT: retq
643643 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
644644 ret <8 x float> %res
648648 ; FMA-LABEL: test_x86_fnmsub_aba_ps_y:
649649 ; FMA: # %bb.0:
650650 ; FMA-NEXT: vmovaps (%rcx), %ymm0
651 ; FMA-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0
651 ; FMA-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
652652 ; FMA-NEXT: retq
653653 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
654654 ret <8 x float> %res
658658 ; FMA-LABEL: test_x86_fnmsub_bba_ps_y:
659659 ; FMA: # %bb.0:
660660 ; FMA-NEXT: vmovaps (%rdx), %ymm0
661 ; FMA-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0
661 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
662662 ; FMA-NEXT: retq
663663 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
664664 ret <8 x float> %res
701701 ; FMA-LABEL: test_x86_fnmsub_baa_pd:
702702 ; FMA: # %bb.0:
703703 ; FMA-NEXT: vmovapd (%rcx), %xmm0
704 ; FMA-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0
704 ; FMA-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
705705 ; FMA-NEXT: retq
706706 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
707707 ret <2 x double> %res
711711 ; FMA-LABEL: test_x86_fnmsub_aba_pd:
712712 ; FMA: # %bb.0:
713713 ; FMA-NEXT: vmovapd (%rcx), %xmm0
714 ; FMA-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0
714 ; FMA-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
715715 ; FMA-NEXT: retq
716716 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
717717 ret <2 x double> %res
721721 ; FMA-LABEL: test_x86_fnmsub_bba_pd:
722722 ; FMA: # %bb.0:
723723 ; FMA-NEXT: vmovapd (%rdx), %xmm0
724 ; FMA-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0
724 ; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
725725 ; FMA-NEXT: retq
726726 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
727727 ret <2 x double> %res
732732 ; FMA-LABEL: test_x86_fnmsub_baa_pd_y:
733733 ; FMA: # %bb.0:
734734 ; FMA-NEXT: vmovapd (%rcx), %ymm0
735 ; FMA-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0
735 ; FMA-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
736736 ; FMA-NEXT: retq
737737 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
738738 ret <4 x double> %res
742742 ; FMA-LABEL: test_x86_fnmsub_aba_pd_y:
743743 ; FMA: # %bb.0:
744744 ; FMA-NEXT: vmovapd (%rcx), %ymm0
745 ; FMA-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0
745 ; FMA-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
746746 ; FMA-NEXT: retq
747747 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
748748 ret <4 x double> %res
752752 ; FMA-LABEL: test_x86_fnmsub_bba_pd_y:
753753 ; FMA: # %bb.0:
754754 ; FMA-NEXT: vmovapd (%rdx), %ymm0
755 ; FMA-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0
755 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
756756 ; FMA-NEXT: retq
757757 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
758758 ret <4 x double> %res
77 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss:
88 ; CHECK-FMA: # %bb.0:
99 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
10 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
1011 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
1112 ;
1213 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss:
1314 ; CHECK-AVX512VL: # %bb.0:
1415 ; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
16 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
1517 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
1618 ;
1719 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss:
1921 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
2022 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
2123 ; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00]
24 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
2225 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
2326 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
2427 ret <4 x float> %res
2831 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_ss:
2932 ; CHECK-FMA: # %bb.0:
3033 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca]
34 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2
3135 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
3236 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
3337 ;
3438 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss:
3539 ; CHECK-AVX512VL: # %bb.0:
3640 ; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca]
41 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2
3742 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3843 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
3944 ;
4247 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
4348 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
4449 ; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00]
50 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
4551 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
4652 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
4753 ret <4 x float> %res
5258 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
5359 ; CHECK-FMA: # %bb.0:
5460 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
61 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
5562 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
5663 ;
5764 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd:
5865 ; CHECK-AVX512VL: # %bb.0:
5966 ; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
67 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
6068 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
6169 ;
6270 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd:
6472 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a]
6573 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
6674 ; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00]
75 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6776 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
6877 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6978 ret <2 x double> %res
7382 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_sd:
7483 ; CHECK-FMA: # %bb.0:
7584 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
85 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2
7686 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
7787 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
7888 ;
7989 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd:
8090 ; CHECK-AVX512VL: # %bb.0:
8191 ; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
92 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2
8293 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
8394 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
8495 ;
8798 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
8899 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
89100 ; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00]
101 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
90102 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
91103 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
92104 ret <2 x double> %res
97109 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
98110 ; CHECK-FMA: # %bb.0:
99111 ; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
112 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
100113 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
101114 ;
102115 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps:
103116 ; CHECK-AVX512VL: # %bb.0:
104117 ; CHECK-AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
118 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
105119 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
106120 ;
107121 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps:
109123 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
110124 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
111125 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00]
126 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
112127 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
113128 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
114129 ret <4 x float> %res
119134 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
120135 ; CHECK-FMA: # %bb.0:
121136 ; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
137 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
122138 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
123139 ;
124140 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd:
125141 ; CHECK-AVX512VL: # %bb.0:
126142 ; CHECK-AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
143 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
127144 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
128145 ;
129146 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd:
131148 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
132149 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
133150 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00]
151 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
134152 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
135153 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
136154 ret <2 x double> %res
141159 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
142160 ; CHECK-FMA: # %bb.0:
143161 ; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
162 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
144163 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
145164 ;
146165 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps_256:
147166 ; CHECK-AVX512VL: # %bb.0:
148167 ; CHECK-AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
168 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
149169 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
150170 ;
151171 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps_256:
153173 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
154174 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
155175 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00]
176 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
156177 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
157178 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
158179 ret <8 x float> %res
163184 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
164185 ; CHECK-FMA: # %bb.0:
165186 ; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
187 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
166188 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
167189 ;
168190 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd_256:
169191 ; CHECK-AVX512VL: # %bb.0:
170192 ; CHECK-AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
193 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
171194 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
172195 ;
173196 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd_256:
175198 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
176199 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
177200 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00]
201 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
178202 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
179203 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
180204 ret <4 x double> %res
186210 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss:
187211 ; CHECK-FMA: # %bb.0:
188212 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2]
213 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
189214 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
190215 ;
191216 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss:
192217 ; CHECK-AVX512VL: # %bb.0:
193218 ; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2]
219 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
194220 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
195221 ;
196222 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss:
198224 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
199225 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
200226 ; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00]
227 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
201228 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
202229 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
203230 ret <4 x float> %res
207234 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_ss:
208235 ; CHECK-FMA: # %bb.0:
209236 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca]
237 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2
210238 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
211239 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
212240 ;
213241 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss:
214242 ; CHECK-AVX512VL: # %bb.0:
215243 ; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca]
244 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2
216245 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
217246 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
218247 ;
221250 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
222251 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
223252 ; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00]
253 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
224254 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
225255 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
226256 ret <4 x float> %res
231261 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
232262 ; CHECK-FMA: # %bb.0:
233263 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
264 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
234265 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
235266 ;
236267 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd:
237268 ; CHECK-AVX512VL: # %bb.0:
238269 ; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
270 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
239271 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
240272 ;
241273 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd:
243275 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a]
244276 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
245277 ; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00]
278 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
246279 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
247280 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
248281 ret <2 x double> %res
252285 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_sd:
253286 ; CHECK-FMA: # %bb.0:
254287 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca]
288 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2
255289 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
256290 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
257291 ;
258292 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd:
259293 ; CHECK-AVX512VL: # %bb.0:
260294 ; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca]
295 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2
261296 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
262297 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
263298 ;
266301 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
267302 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
268303 ; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00]
304 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
269305 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
270306 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
271307 ret <2 x double> %res
276312 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
277313 ; CHECK-FMA: # %bb.0:
278314 ; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaa,0xc2]
315 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
279316 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
280317 ;
281318 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps:
282319 ; CHECK-AVX512VL: # %bb.0:
283320 ; CHECK-AVX512VL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaa,0xc2]
321 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
284322 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
285323 ;
286324 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps:
288326 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
289327 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
290328 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00]
329 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
291330 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
292331 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
293332 ret <4 x float> %res
298337 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
299338 ; CHECK-FMA: # %bb.0:
300339 ; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaa,0xc2]
340 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
301341 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
302342 ;
303343 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd:
304344 ; CHECK-AVX512VL: # %bb.0:
305345 ; CHECK-AVX512VL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaa,0xc2]
346 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2
306347 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
307348 ;
308349 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd:
310351 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
311352 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
312353 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00]
354 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
313355 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
314356 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
315357 ret <2 x double> %res
320362 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
321363 ; CHECK-FMA: # %bb.0:
322364 ; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xaa,0xc2]
365 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
323366 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
324367 ;
325368 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps_256:
326369 ; CHECK-AVX512VL: # %bb.0:
327370 ; CHECK-AVX512VL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xaa,0xc2]
371 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
328372 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
329373 ;
330374 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps_256:
332376 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
333377 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
334378 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00]
379 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
335380 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
336381 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
337382 ret <8 x float> %res
342387 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
343388 ; CHECK-FMA: # %bb.0:
344389 ; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xaa,0xc2]
390 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
345391 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
346392 ;
347393 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd_256:
348394 ; CHECK-AVX512VL: # %bb.0:
349395 ; CHECK-AVX512VL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xaa,0xc2]
396 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2
350397 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
351398 ;
352399 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd_256:
354401 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
355402 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
356403 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00]
404 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
357405 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
358406 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
359407 ret <4 x double> %res
365413 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss:
366414 ; CHECK-FMA: # %bb.0:
367415 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2]
416 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
368417 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
369418 ;
370419 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss:
371420 ; CHECK-AVX512VL: # %bb.0:
372421 ; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2]
422 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
373423 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
374424 ;
375425 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss:
377427 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
378428 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
379429 ; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00]
430 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
380431 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
381432 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
382433 ret <4 x float> %res
386437 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_ss:
387438 ; CHECK-FMA: # %bb.0:
388439 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca]
440 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2
389441 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
390442 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
391443 ;
392444 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss:
393445 ; CHECK-AVX512VL: # %bb.0:
394446 ; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca]
447 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2
395448 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
396449 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
397450 ;
400453 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
401454 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
402455 ; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00]
456 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
403457 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
404458 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
405459 ret <4 x float> %res
410464 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
411465 ; CHECK-FMA: # %bb.0:
412466 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
467 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
413468 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
414469 ;
415470 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd:
416471 ; CHECK-AVX512VL: # %bb.0:
417472 ; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
473 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
418474 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
419475 ;
420476 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd:
422478 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a]
423479 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
424480 ; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00]
481 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
425482 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
426483 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
427484 ret <2 x double> %res
431488 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_sd:
432489 ; CHECK-FMA: # %bb.0:
433490 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca]
491 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2
434492 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
435493 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
436494 ;
437495 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd:
438496 ; CHECK-AVX512VL: # %bb.0:
439497 ; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca]
498 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2
440499 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
441500 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
442501 ;
445504 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
446505 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
447506 ; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00]
507 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
448508 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
449509 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
450510 ret <2 x double> %res
455515 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
456516 ; CHECK-FMA: # %bb.0:
457517 ; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xac,0xc2]
518 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
458519 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
459520 ;
460521 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps:
461522 ; CHECK-AVX512VL: # %bb.0:
462523 ; CHECK-AVX512VL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
524 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
463525 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
464526 ;
465527 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps:
467529 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
468530 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
469531 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00]
532 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
470533 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
471534 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
472535 ret <4 x float> %res
477540 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
478541 ; CHECK-FMA: # %bb.0:
479542 ; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
543 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
480544 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
481545 ;
482546 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd:
483547 ; CHECK-AVX512VL: # %bb.0:
484548 ; CHECK-AVX512VL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
549 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
485550 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
486551 ;
487552 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd:
489554 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
490555 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
491556 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00]
557 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
492558 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
493559 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
494560 ret <2 x double> %res
499565 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
500566 ; CHECK-FMA: # %bb.0:
501567 ; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xac,0xc2]
568 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
502569 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
503570 ;
504571 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps_256:
505572 ; CHECK-AVX512VL: # %bb.0:
506573 ; CHECK-AVX512VL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
574 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
507575 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
508576 ;
509577 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps_256:
511579 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
512580 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
513581 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00]
582 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
514583 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
515584 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
516585 ret <8 x float> %res
521590 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
522591 ; CHECK-FMA: # %bb.0:
523592 ; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
593 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
524594 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
525595 ;
526596 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd_256:
527597 ; CHECK-AVX512VL: # %bb.0:
528598 ; CHECK-AVX512VL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
599 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
529600 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
530601 ;
531602 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd_256:
533604 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
534605 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
535606 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00]
607 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
536608 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
537609 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
538610 ret <4 x double> %res
544616 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss:
545617 ; CHECK-FMA: # %bb.0:
546618 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
619 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
547620 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
548621 ;
549622 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss:
550623 ; CHECK-AVX512VL: # %bb.0:
551624 ; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
625 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
552626 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
553627 ;
554628 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss:
556630 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
557631 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
558632 ; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00]
633 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
559634 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
560635 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
561636 ret <4 x float> %res
565640 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_ss:
566641 ; CHECK-FMA: # %bb.0:
567642 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca]
643 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2
568644 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
569645 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
570646 ;
571647 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss:
572648 ; CHECK-AVX512VL: # %bb.0:
573649 ; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca]
650 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2
574651 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
575652 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
576653 ;
579656 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
580657 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
581658 ; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00]
659 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
582660 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
583661 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
584662 ret <4 x float> %res
589667 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
590668 ; CHECK-FMA: # %bb.0:
591669 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
670 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
592671 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
593672 ;
594673 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd:
595674 ; CHECK-AVX512VL: # %bb.0:
596675 ; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
676 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
597677 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
598678 ;
599679 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd:
601681 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a]
602682 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
603683 ; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00]
684 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
604685 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
605686 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
606687 ret <2 x double> %res
610691 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_sd:
611692 ; CHECK-FMA: # %bb.0:
612693 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
694 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2
613695 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
614696 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
615697 ;
616698 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd:
617699 ; CHECK-AVX512VL: # %bb.0:
618700 ; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
701 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2
619702 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
620703 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
621704 ;
624707 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
625708 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
626709 ; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00]
710 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
627711 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
628712 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
629713 ret <2 x double> %res
634718 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
635719 ; CHECK-FMA: # %bb.0:
636720 ; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xae,0xc2]
721 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
637722 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
638723 ;
639724 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps:
640725 ; CHECK-AVX512VL: # %bb.0:
641726 ; CHECK-AVX512VL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
727 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
642728 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
643729 ;
644730 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps:
646732 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
647733 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
648734 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00]
735 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
649736 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
650737 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
651738 ret <4 x float> %res
656743 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
657744 ; CHECK-FMA: # %bb.0:
658745 ; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
746 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
659747 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
660748 ;
661749 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd:
662750 ; CHECK-AVX512VL: # %bb.0:
663751 ; CHECK-AVX512VL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
752 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
664753 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
665754 ;
666755 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd:
668757 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
669758 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
670759 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00]
760 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
671761 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
672762 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
673763 ret <2 x double> %res
678768 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
679769 ; CHECK-FMA: # %bb.0:
680770 ; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xae,0xc2]
771 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
681772 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
682773 ;
683774 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps_256:
684775 ; CHECK-AVX512VL: # %bb.0:
685776 ; CHECK-AVX512VL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
777 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
686778 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
687779 ;
688780 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps_256:
690782 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
691783 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
692784 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00]
785 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
693786 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
694787 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
695788 ret <8 x float> %res
700793 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
701794 ; CHECK-FMA: # %bb.0:
702795 ; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
796 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
703797 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
704798 ;
705799 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd_256:
706800 ; CHECK-AVX512VL: # %bb.0:
707801 ; CHECK-AVX512VL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
802 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
708803 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
709804 ;
710805 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd_256:
712807 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
713808 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
714809 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00]
810 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
715811 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
716812 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
717813 ret <4 x double> %res
723819 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps:
724820 ; CHECK-FMA: # %bb.0:
725821 ; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
822 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
726823 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
727824 ;
728825 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps:
729826 ; CHECK-AVX512VL: # %bb.0:
730827 ; CHECK-AVX512VL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
828 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
731829 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
732830 ;
733831 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps:
735833 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
736834 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
737835 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00]
836 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
738837 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
739838 %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
740839 ret <4 x float> %res
745844 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
746845 ; CHECK-FMA: # %bb.0:
747846 ; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
847 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
748848 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
749849 ;
750850 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd:
751851 ; CHECK-AVX512VL: # %bb.0:
752852 ; CHECK-AVX512VL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
853 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
753854 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
754855 ;
755856 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd:
757858 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
758859 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
759860 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00]
861 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
760862 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
761863 %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
762864 ret <2 x double> %res
767869 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
768870 ; CHECK-FMA: # %bb.0:
769871 ; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
872 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
770873 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
771874 ;
772875 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps_256:
773876 ; CHECK-AVX512VL: # %bb.0:
774877 ; CHECK-AVX512VL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
878 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
775879 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
776880 ;
777881 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps_256:
779883 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
780884 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
781885 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00]
886 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
782887 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
783888 %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
784889 ret <8 x float> %res
789894 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
790895 ; CHECK-FMA: # %bb.0:
791896 ; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
897 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
792898 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
793899 ;
794900 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd_256:
795901 ; CHECK-AVX512VL: # %bb.0:
796902 ; CHECK-AVX512VL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
903 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
797904 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
798905 ;
799906 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd_256:
801908 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
802909 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
803910 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00]
911 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
804912 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
805913 %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
806914 ret <4 x double> %res
812920 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps:
813921 ; CHECK-FMA: # %bb.0:
814922 ; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa7,0xc2]
923 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
815924 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
816925 ;
817926 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps:
818927 ; CHECK-AVX512VL: # %bb.0:
819928 ; CHECK-AVX512VL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa7,0xc2]
929 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
820930 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
821931 ;
822932 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps:
824934 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
825935 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
826936 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00]
937 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
827938 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
828939 %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
829940 ret <4 x float> %res
834945 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
835946 ; CHECK-FMA: # %bb.0:
836947 ; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa7,0xc2]
948 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
837949 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
838950 ;
839951 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd:
840952 ; CHECK-AVX512VL: # %bb.0:
841953 ; CHECK-AVX512VL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa7,0xc2]
954 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2
842955 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
843956 ;
844957 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd:
846959 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
847960 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
848961 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00]
962 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
849963 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
850964 %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
851965 ret <2 x double> %res
856970 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
857971 ; CHECK-FMA: # %bb.0:
858972 ; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa7,0xc2]
973 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
859974 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
860975 ;
861976 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps_256:
862977 ; CHECK-AVX512VL: # %bb.0:
863978 ; CHECK-AVX512VL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa7,0xc2]
979 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
864980 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
865981 ;
866982 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps_256:
868984 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
869985 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
870986 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00]
987 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
871988 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
872989 %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
873990 ret <8 x float> %res
878995 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
879996 ; CHECK-FMA: # %bb.0:
880997 ; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa7,0xc2]
998 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
881999 ; CHECK-FMA-NEXT: retq # encoding: [0xc3]
8821000 ;
8831001 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd_256:
8841002 ; CHECK-AVX512VL: # %bb.0:
8851003 ; CHECK-AVX512VL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa7,0xc2]
1004 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2
8861005 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
8871006 ;
8881007 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd_256:
8901009 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
8911010 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
8921011 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00]
1012 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
8931013 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
8941014 %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
8951015 ret <4 x double> %res
1414 ; GENERIC-LABEL: test_vfmaddpd_128:
1515 ; GENERIC: # %bb.0:
1616 ; GENERIC-NEXT: #APP
17 ; GENERIC-NEXT: vfmadd132pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
18 ; GENERIC-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
19 ; GENERIC-NEXT: vfmadd231pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
20 ; GENERIC-NEXT: vfmadd132pd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
21 ; GENERIC-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
22 ; GENERIC-NEXT: vfmadd231pd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
17 ; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
18 ; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
19 ; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
20 ; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
21 ; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
22 ; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
2323 ; GENERIC-NEXT: #NO_APP
2424 ; GENERIC-NEXT: retq # sched: [1:1.00]
2525 ;
2626 ; HASWELL-LABEL: test_vfmaddpd_128:
2727 ; HASWELL: # %bb.0:
2828 ; HASWELL-NEXT: #APP
29 ; HASWELL-NEXT: vfmadd132pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
30 ; HASWELL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
31 ; HASWELL-NEXT: vfmadd231pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
32 ; HASWELL-NEXT: vfmadd132pd (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
33 ; HASWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
34 ; HASWELL-NEXT: vfmadd231pd (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
29 ; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
30 ; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
31 ; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
32 ; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
33 ; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
34 ; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
3535 ; HASWELL-NEXT: #NO_APP
3636 ; HASWELL-NEXT: retq # sched: [7:1.00]
3737 ;
3838 ; BROADWELL-LABEL: test_vfmaddpd_128:
3939 ; BROADWELL: # %bb.0:
4040 ; BROADWELL-NEXT: #APP
41 ; BROADWELL-NEXT: vfmadd132pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
42 ; BROADWELL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
43 ; BROADWELL-NEXT: vfmadd231pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
44 ; BROADWELL-NEXT: vfmadd132pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
45 ; BROADWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
46 ; BROADWELL-NEXT: vfmadd231pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
41 ; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
42 ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
43 ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
44 ; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
45 ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
46 ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
4747 ; BROADWELL-NEXT: #NO_APP
4848 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4949 ;
5050 ; SKYLAKE-LABEL: test_vfmaddpd_128:
5151 ; SKYLAKE: # %bb.0:
5252 ; SKYLAKE-NEXT: #APP
53 ; SKYLAKE-NEXT: vfmadd132pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
54 ; SKYLAKE-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
55 ; SKYLAKE-NEXT: vfmadd231pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
56 ; SKYLAKE-NEXT: vfmadd132pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
57 ; SKYLAKE-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
58 ; SKYLAKE-NEXT: vfmadd231pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
53 ; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
54 ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
55 ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
56 ; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
57 ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
58 ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
5959 ; SKYLAKE-NEXT: #NO_APP
6060 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6161 ;
6262 ; KNL-LABEL: test_vfmaddpd_128:
6363 ; KNL: # %bb.0:
6464 ; KNL-NEXT: #APP
65 ; KNL-NEXT: vfmadd132pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
66 ; KNL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
67 ; KNL-NEXT: vfmadd231pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
68 ; KNL-NEXT: vfmadd132pd (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
69 ; KNL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
70 ; KNL-NEXT: vfmadd231pd (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
65 ; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
66 ; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
67 ; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
68 ; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
69 ; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
70 ; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
7171 ; KNL-NEXT: #NO_APP
7272 ; KNL-NEXT: retq # sched: [7:1.00]
7373 ;
7474 ; SKX-LABEL: test_vfmaddpd_128:
7575 ; SKX: # %bb.0:
7676 ; SKX-NEXT: #APP
77 ; SKX-NEXT: vfmadd132pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
78 ; SKX-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
79 ; SKX-NEXT: vfmadd231pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
80 ; SKX-NEXT: vfmadd132pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
81 ; SKX-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
82 ; SKX-NEXT: vfmadd231pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
77 ; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
78 ; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
79 ; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
80 ; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
81 ; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
82 ; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
8383 ; SKX-NEXT: #NO_APP
8484 ; SKX-NEXT: retq # sched: [7:1.00]
8585 ;
8686 ; ZNVER1-LABEL: test_vfmaddpd_128:
8787 ; ZNVER1: # %bb.0:
8888 ; ZNVER1-NEXT: #APP
89 ; ZNVER1-NEXT: vfmadd132pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
90 ; ZNVER1-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
91 ; ZNVER1-NEXT: vfmadd231pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
92 ; ZNVER1-NEXT: vfmadd132pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
93 ; ZNVER1-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
94 ; ZNVER1-NEXT: vfmadd231pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
89 ; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
90 ; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
91 ; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
92 ; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
93 ; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
94 ; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
9595 ; ZNVER1-NEXT: #NO_APP
9696 ; ZNVER1-NEXT: retq # sched: [1:0.50]
9797 tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
102102 ; GENERIC-LABEL: test_vfmaddpd_256:
103103 ; GENERIC: # %bb.0:
104104 ; GENERIC-NEXT: #APP
105 ; GENERIC-NEXT: vfmadd132pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
106 ; GENERIC-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
107 ; GENERIC-NEXT: vfmadd231pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
108 ; GENERIC-NEXT: vfmadd132pd (%rdi), %ymm1, %ymm0 # sched: [9:0.50]
109 ; GENERIC-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [9:0.50]
110 ; GENERIC-NEXT: vfmadd231pd (%rdi), %ymm1, %ymm0 # sched: [9:0.50]
105 ; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
106 ; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
107 ; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
108 ; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [9:0.50]
109 ; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [9:0.50]
110 ; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [9:0.50]
111111 ; GENERIC-NEXT: #NO_APP
112112 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
113113 ; GENERIC-NEXT: retq # sched: [1:1.00]
115115 ; HASWELL-LABEL: test_vfmaddpd_256:
116116 ; HASWELL: # %bb.0:
117117 ; HASWELL-NEXT: #APP
118 ; HASWELL-NEXT: vfmadd132pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
119 ; HASWELL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
120 ; HASWELL-NEXT: vfmadd231pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
121 ; HASWELL-NEXT: vfmadd132pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
122 ; HASWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
123 ; HASWELL-NEXT: vfmadd231pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
118 ; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
119 ; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
120 ; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
121 ; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
122 ; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
123 ; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
124124 ; HASWELL-NEXT: #NO_APP
125125 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
126126 ; HASWELL-NEXT: retq # sched: [7:1.00]
128128 ; BROADWELL-LABEL: test_vfmaddpd_256:
129129 ; BROADWELL: # %bb.0:
130130 ; BROADWELL-NEXT: #APP
131 ; BROADWELL-NEXT: vfmadd132pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
132 ; BROADWELL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
133 ; BROADWELL-NEXT: vfmadd231pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
134 ; BROADWELL-NEXT: vfmadd132pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
135 ; BROADWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
136 ; BROADWELL-NEXT: vfmadd231pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
131 ; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
132 ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
133 ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
134 ; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
135 ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
136 ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
137137 ; BROADWELL-NEXT: #NO_APP
138138 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
139139 ; BROADWELL-NEXT: retq # sched: [7:1.00]
141141 ; SKYLAKE-LABEL: test_vfmaddpd_256:
142142 ; SKYLAKE: # %bb.0:
143143 ; SKYLAKE-NEXT: #APP
144 ; SKYLAKE-NEXT: vfmadd132pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
145 ; SKYLAKE-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
146 ; SKYLAKE-NEXT: vfmadd231pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
147 ; SKYLAKE-NEXT: vfmadd132pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
148 ; SKYLAKE-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
149 ; SKYLAKE-NEXT: vfmadd231pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
144 ; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
145 ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
146 ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
147 ; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
148 ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
149 ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
150150 ; SKYLAKE-NEXT: #NO_APP
151151 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
152152 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
154154 ; KNL-LABEL: test_vfmaddpd_256:
155155 ; KNL: # %bb.0:
156156 ; KNL-NEXT: #APP
157 ; KNL-NEXT: vfmadd132pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
158 ; KNL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
159 ; KNL-NEXT: vfmadd231pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
160 ; KNL-NEXT: vfmadd132pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
161 ; KNL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
162 ; KNL-NEXT: vfmadd231pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
157 ; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
158 ; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
159 ; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
160 ; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
161 ; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
162 ; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
163163 ; KNL-NEXT: #NO_APP
164164 ; KNL-NEXT: retq # sched: [7:1.00]
165165 ;
166166 ; SKX-LABEL: test_vfmaddpd_256:
167167 ; SKX: # %bb.0:
168168 ; SKX-NEXT: #APP
169 ; SKX-NEXT: vfmadd132pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33]
170 ; SKX-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33]
171 ; SKX-NEXT: vfmadd231pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33]
172 ; SKX-NEXT: vfmadd132pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
173 ; SKX-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
174 ; SKX-NEXT: vfmadd231pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
169 ; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33]
170 ; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33]
171 ; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33]
172 ; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
173 ; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
174 ; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
175175 ; SKX-NEXT: #NO_APP
176176 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
177177 ; SKX-NEXT: retq # sched: [7:1.00]
179179 ; ZNVER1-LABEL: test_vfmaddpd_256:
180180 ; ZNVER1: # %bb.0:
181181 ; ZNVER1-NEXT: #APP
182 ; ZNVER1-NEXT: vfmadd132pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
183 ; ZNVER1-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
184 ; ZNVER1-NEXT: vfmadd231pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
185 ; ZNVER1-NEXT: vfmadd132pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
186 ; ZNVER1-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
187 ; ZNVER1-NEXT: vfmadd231pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
182 ; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
183 ; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
184 ; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
185 ; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
186 ; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
187 ; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
188188 ; ZNVER1-NEXT: #NO_APP
189189 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
190190 ; ZNVER1-NEXT: retq # sched: [1:0.50]
196196 ; GENERIC-LABEL: test_vfmaddps_128:
197197 ; GENERIC: # %bb.0:
198198 ; GENERIC-NEXT: #APP
199 ; GENERIC-NEXT: vfmadd132ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
200 ; GENERIC-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
201 ; GENERIC-NEXT: vfmadd231ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
202 ; GENERIC-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
203 ; GENERIC-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
204 ; GENERIC-NEXT: vfmadd231ps (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
199 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
200 ; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
201 ; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
202 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
203 ; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
204 ; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
205205 ; GENERIC-NEXT: #NO_APP
206206 ; GENERIC-NEXT: retq # sched: [1:1.00]
207207 ;
208208 ; HASWELL-LABEL: test_vfmaddps_128:
209209 ; HASWELL: # %bb.0:
210210 ; HASWELL-NEXT: #APP
211 ; HASWELL-NEXT: vfmadd132ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
212 ; HASWELL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
213 ; HASWELL-NEXT: vfmadd231ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
214 ; HASWELL-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
215 ; HASWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
216 ; HASWELL-NEXT: vfmadd231ps (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
211 ; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
212 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
213 ; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
214 ; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
215 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
216 ; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
217217 ; HASWELL-NEXT: #NO_APP
218218 ; HASWELL-NEXT: retq # sched: [7:1.00]
219219 ;
220220 ; BROADWELL-LABEL: test_vfmaddps_128:
221221 ; BROADWELL: # %bb.0:
222222 ; BROADWELL-NEXT: #APP
223 ; BROADWELL-NEXT: vfmadd132ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
224 ; BROADWELL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
225 ; BROADWELL-NEXT: vfmadd231ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
226 ; BROADWELL-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
227 ; BROADWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
228 ; BROADWELL-NEXT: vfmadd231ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
223 ; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
224 ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
225 ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
226 ; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
227 ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
228 ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
229229 ; BROADWELL-NEXT: #NO_APP
230230 ; BROADWELL-NEXT: retq # sched: [7:1.00]
231231 ;
232232 ; SKYLAKE-LABEL: test_vfmaddps_128:
233233 ; SKYLAKE: # %bb.0:
234234 ; SKYLAKE-NEXT: #APP
235 ; SKYLAKE-NEXT: vfmadd132ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
236 ; SKYLAKE-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
237 ; SKYLAKE-NEXT: vfmadd231ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
238 ; SKYLAKE-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
239 ; SKYLAKE-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
240 ; SKYLAKE-NEXT: vfmadd231ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
235 ; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
236 ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
237 ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
238 ; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
239 ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
240 ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
241241 ; SKYLAKE-NEXT: #NO_APP
242242 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
243243 ;
244244 ; KNL-LABEL: test_vfmaddps_128:
245245 ; KNL: # %bb.0:
246246 ; KNL-NEXT: #APP
247 ; KNL-NEXT: vfmadd132ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
248 ; KNL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
249 ; KNL-NEXT: vfmadd231ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
250 ; KNL-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
251 ; KNL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
252 ; KNL-NEXT: vfmadd231ps (%rdi), %xmm1, %xmm0 # sched: [11:0.50]
247 ; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
248 ; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
249 ; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
250 ; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
251 ; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
252 ; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
253253 ; KNL-NEXT: #NO_APP
254254 ; KNL-NEXT: retq # sched: [7:1.00]
255255 ;
256256 ; SKX-LABEL: test_vfmaddps_128:
257257 ; SKX: # %bb.0:
258258 ; SKX-NEXT: #APP
259 ; SKX-NEXT: vfmadd132ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
260 ; SKX-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
261 ; SKX-NEXT: vfmadd231ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
262 ; SKX-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
263 ; SKX-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
264 ; SKX-NEXT: vfmadd231ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
259 ; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
260 ; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
261 ; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
262 ; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
263 ; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
264 ; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
265265 ; SKX-NEXT: #NO_APP
266266 ; SKX-NEXT: retq # sched: [7:1.00]
267267 ;
268268 ; ZNVER1-LABEL: test_vfmaddps_128:
269269 ; ZNVER1: # %bb.0:
270270 ; ZNVER1-NEXT: #APP
271 ; ZNVER1-NEXT: vfmadd132ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
272 ; ZNVER1-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
273 ; ZNVER1-NEXT: vfmadd231ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
274 ; ZNVER1-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
275 ; ZNVER1-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
276 ; ZNVER1-NEXT: vfmadd231ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
271 ; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
272 ; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
273 ; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
274 ; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
275 ; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
276 ; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
277277 ; ZNVER1-NEXT: #NO_APP
278278 ; ZNVER1-NEXT: retq # sched: [1:0.50]
279279 tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
284284 ; GENERIC-LABEL: test_vfmaddps_256:
285285 ; GENERIC: # %bb.0:
286286 ; GENERIC-NEXT: #APP
287 ; GENERIC-NEXT: vfmadd132ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
288 ; GENERIC-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
289 ; GENERIC-NEXT: vfmadd231ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
290 ; GENERIC-NEXT: vfmadd132ps (%rdi), %ymm1, %ymm0 # sched: [9:0.50]
291 ; GENERIC-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [9:0.50]
292 ; GENERIC-NEXT: vfmadd231ps (%rdi), %ymm1, %ymm0 # sched: [9:0.50]
287 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
288 ; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
289 ; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
290 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [9:0.50]
291 ; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [9:0.50]
292 ; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [9:0.50]
293293 ; GENERIC-NEXT: #NO_APP
294294 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
295295 ; GENERIC-NEXT: retq # sched: [1:1.00]
297297 ; HASWELL-LABEL: test_vfmaddps_256:
298298 ; HASWELL: # %bb.0:
299299 ; HASWELL-NEXT: #APP
300 ; HASWELL-NEXT: vfmadd132ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
301 ; HASWELL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
302 ; HASWELL-NEXT: vfmadd231ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
303 ; HASWELL-NEXT: vfmadd132ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
304 ; HASWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
305 ; HASWELL-NEXT: vfmadd231ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
300 ; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
301 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
302 ; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
303 ; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
304 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
305 ; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
306306 ; HASWELL-NEXT: #NO_APP
307307 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
308308 ; HASWELL-NEXT: retq # sched: [7:1.00]
310310 ; BROADWELL-LABEL: test_vfmaddps_256:
311311 ; BROADWELL: # %bb.0:
312312 ; BROADWELL-NEXT: #APP
313 ; BROADWELL-NEXT: vfmadd132ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
314 ; BROADWELL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
315 ; BROADWELL-NEXT: vfmadd231ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
316 ; BROADWELL-NEXT: vfmadd132ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
317 ; BROADWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
318 ; BROADWELL-NEXT: vfmadd231ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
313 ; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
314 ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
315 ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
316 ; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
317 ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
318 ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
319319 ; BROADWELL-NEXT: #NO_APP
320320 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
321321 ; BROADWELL-NEXT: retq # sched: [7:1.00]
323323 ; SKYLAKE-LABEL: test_vfmaddps_256:
324324 ; SKYLAKE: # %bb.0:
325325 ; SKYLAKE-NEXT: #APP
326 ; SKYLAKE-NEXT: vfmadd132ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
327 ; SKYLAKE-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
328 ; SKYLAKE-NEXT: vfmadd231ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
329 ; SKYLAKE-NEXT: vfmadd132ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
330 ; SKYLAKE-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
331 ; SKYLAKE-NEXT: vfmadd231ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
326 ; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
327 ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
328 ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
329 ; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
330 ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
331 ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
332332 ; SKYLAKE-NEXT: #NO_APP
333333 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
334334 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
336336 ; KNL-LABEL: test_vfmaddps_256:
337337 ; KNL: # %bb.0:
338338 ; KNL-NEXT: #APP
339 ; KNL-NEXT: vfmadd132ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
340 ; KNL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
341 ; KNL-NEXT: vfmadd231ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
342 ; KNL-NEXT: vfmadd132ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
343 ; KNL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
344 ; KNL-NEXT: vfmadd231ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
339 ; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
340 ; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
341 ; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
342 ; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
343 ; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
344 ; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
345345 ; KNL-NEXT: #NO_APP
346346 ; KNL-NEXT: retq # sched: [7:1.00]
347347 ;
348348 ; SKX-LABEL: test_vfmaddps_256:
349349 ; SKX: # %bb.0:
350350 ; SKX-NEXT: #APP
351 ; SKX-NEXT: vfmadd132ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33]
352 ; SKX-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33]
353 ; SKX-NEXT: vfmadd231ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33]
354 ; SKX-NEXT: vfmadd132ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
355 ; SKX-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
356 ; SKX-NEXT: vfmadd231ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50]
351 ; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33]
352 ; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33]
353 ; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33]
354 ; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
355 ; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
356 ; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
357357 ; SKX-NEXT: #NO_APP
358358 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
359359 ; SKX-NEXT: retq # sched: [7:1.00]
361361 ; ZNVER1-LABEL: test_vfmaddps_256:
362362 ; ZNVER1: # %bb.0:
363363 ; ZNVER1-NEXT: #APP
364 ; ZNVER1-NEXT: vfmadd132ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
365 ; ZNVER1-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
366 ; ZNVER1-NEXT: vfmadd231ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
367 ; ZNVER1-NEXT: vfmadd132ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
368 ; ZNVER1-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
369 ; ZNVER1-NEXT: vfmadd231ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
364 ; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
365 ; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
366 ; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
367 ; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
368 ; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
369 ; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
370370 ; ZNVER1-NEXT: #NO_APP
371371 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
372372 ; ZNVER1-NEXT: retq # sched: [1:0.50]
378378 ; GENERIC-LABEL: test_vfmaddsd_128:
379379 ; GENERIC: # %bb.0:
380380 ; GENERIC-NEXT: #APP
381 ; GENERIC-NEXT: vfmadd132sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
382 ; GENERIC-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
383 ; GENERIC-NEXT: vfmadd231sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
384 ; GENERIC-NEXT: vfmadd132sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
385 ; GENERIC-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
386 ; GENERIC-NEXT: vfmadd231sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
381 ; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
382 ; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
383 ; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
384 ; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
385 ; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
386 ; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
387387 ; GENERIC-NEXT: #NO_APP
388388 ; GENERIC-NEXT: retq # sched: [1:1.00]
389389 ;
390390 ; HASWELL-LABEL: test_vfmaddsd_128:
391391 ; HASWELL: # %bb.0:
392392 ; HASWELL-NEXT: #APP
393 ; HASWELL-NEXT: vfmadd132sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
394 ; HASWELL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
395 ; HASWELL-NEXT: vfmadd231sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
396 ; HASWELL-NEXT: vfmadd132sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
397 ; HASWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
398 ; HASWELL-NEXT: vfmadd231sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
393 ; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
394 ; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
395 ; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
396 ; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
397 ; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
398 ; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
399399 ; HASWELL-NEXT: #NO_APP
400400 ; HASWELL-NEXT: retq # sched: [7:1.00]
401401 ;
402402 ; BROADWELL-LABEL: test_vfmaddsd_128:
403403 ; BROADWELL: # %bb.0:
404404 ; BROADWELL-NEXT: #APP
405 ; BROADWELL-NEXT: vfmadd132sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
406 ; BROADWELL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
407 ; BROADWELL-NEXT: vfmadd231sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
408 ; BROADWELL-NEXT: vfmadd132sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
409 ; BROADWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
410 ; BROADWELL-NEXT: vfmadd231sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
405 ; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
406 ; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
407 ; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
408 ; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
409 ; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
410 ; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
411411 ; BROADWELL-NEXT: #NO_APP
412412 ; BROADWELL-NEXT: retq # sched: [7:1.00]
413413 ;
414414 ; SKYLAKE-LABEL: test_vfmaddsd_128:
415415 ; SKYLAKE: # %bb.0:
416416 ; SKYLAKE-NEXT: #APP
417 ; SKYLAKE-NEXT: vfmadd132sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
418 ; SKYLAKE-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
419 ; SKYLAKE-NEXT: vfmadd231sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
420 ; SKYLAKE-NEXT: vfmadd132sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
421 ; SKYLAKE-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
422 ; SKYLAKE-NEXT: vfmadd231sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
417 ; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
418 ; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
419 ; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
420 ; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
421 ; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
422 ; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
423423 ; SKYLAKE-NEXT: #NO_APP
424424 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
425425 ;
426426 ; KNL-LABEL: test_vfmaddsd_128:
427427 ; KNL: # %bb.0:
428428 ; KNL-NEXT: #APP
429 ; KNL-NEXT: vfmadd132sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
430 ; KNL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
431 ; KNL-NEXT: vfmadd231sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
432 ; KNL-NEXT: vfmadd132sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
433 ; KNL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
434 ; KNL-NEXT: vfmadd231sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
429 ; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
430 ; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
431 ; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
432 ; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
433 ; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
434 ; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
435435 ; KNL-NEXT: #NO_APP
436436 ; KNL-NEXT: retq # sched: [7:1.00]
437437 ;
438438 ; SKX-LABEL: test_vfmaddsd_128:
439439 ; SKX: # %bb.0:
440440 ; SKX-NEXT: #APP
441 ; SKX-NEXT: vfmadd132sd %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
442 ; SKX-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
443 ; SKX-NEXT: vfmadd231sd %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
444 ; SKX-NEXT: vfmadd132sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
445 ; SKX-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
446 ; SKX-NEXT: vfmadd231sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
441 ; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
442 ; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
443 ; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
444 ; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
445 ; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
446 ; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
447447 ; SKX-NEXT: #NO_APP
448448 ; SKX-NEXT: retq # sched: [7:1.00]
449449 ;
450450 ; ZNVER1-LABEL: test_vfmaddsd_128:
451451 ; ZNVER1: # %bb.0:
452452 ; ZNVER1-NEXT: #APP
453 ; ZNVER1-NEXT: vfmadd132sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
454 ; ZNVER1-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
455 ; ZNVER1-NEXT: vfmadd231sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
456 ; ZNVER1-NEXT: vfmadd132sd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
457 ; ZNVER1-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
458 ; ZNVER1-NEXT: vfmadd231sd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
453 ; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
454 ; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
455 ; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
456 ; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
457 ; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
458 ; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
459459 ; ZNVER1-NEXT: #NO_APP
460460 ; ZNVER1-NEXT: retq # sched: [1:0.50]
461461 tail call void asm "vfmadd132sd $2, $1, $0 \0A\09 vfmadd213sd $2, $1, $0 \0A\09 vfmadd231sd $2, $1, $0 \0A\09 vfmadd132sd $3, $1, $0 \0A\09 vfmadd213sd $3, $1, $0 \0A\09 vfmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
466466 ; GENERIC-LABEL: test_vfmaddss_128:
467467 ; GENERIC: # %bb.0:
468468 ; GENERIC-NEXT: #APP
469 ; GENERIC-NEXT: vfmadd132ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
470 ; GENERIC-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
471 ; GENERIC-NEXT: vfmadd231ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
472 ; GENERIC-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
473 ; GENERIC-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
474 ; GENERIC-NEXT: vfmadd231ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
469 ; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
470 ; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
471 ; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
472 ; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
473 ; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
474 ; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
475475 ; GENERIC-NEXT: #NO_APP
476476 ; GENERIC-NEXT: retq # sched: [1:1.00]
477477 ;
478478 ; HASWELL-LABEL: test_vfmaddss_128:
479479 ; HASWELL: # %bb.0:
480480 ; HASWELL-NEXT: #APP
481 ; HASWELL-NEXT: vfmadd132ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
482 ; HASWELL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
483 ; HASWELL-NEXT: vfmadd231ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
484 ; HASWELL-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
485 ; HASWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
486 ; HASWELL-NEXT: vfmadd231ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
481 ; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
482 ; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
483 ; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
484 ; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
485 ; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
486 ; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
487487 ; HASWELL-NEXT: #NO_APP
488488 ; HASWELL-NEXT: retq # sched: [7:1.00]
489489 ;
490490 ; BROADWELL-LABEL: test_vfmaddss_128:
491491 ; BROADWELL: # %bb.0:
492492 ; BROADWELL-NEXT: #APP
493 ; BROADWELL-NEXT: vfmadd132ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
494 ; BROADWELL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
495 ; BROADWELL-NEXT: vfmadd231ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
496 ; BROADWELL-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
497 ; BROADWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
498 ; BROADWELL-NEXT: vfmadd231ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
493 ; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
494 ; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
495 ; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
496 ; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
497 ; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
498 ; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
499499 ; BROADWELL-NEXT: #NO_APP
500500 ; BROADWELL-NEXT: retq # sched: [7:1.00]
501501 ;
502502 ; SKYLAKE-LABEL: test_vfmaddss_128:
503503 ; SKYLAKE: # %bb.0:
504504 ; SKYLAKE-NEXT: #APP
505 ; SKYLAKE-NEXT: vfmadd132ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
506 ; SKYLAKE-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
507 ; SKYLAKE-NEXT: vfmadd231ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
508 ; SKYLAKE-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
509 ; SKYLAKE-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
510 ; SKYLAKE-NEXT: vfmadd231ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
505 ; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
506 ; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
507 ; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
508 ; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
509 ; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
510 ; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
511511 ; SKYLAKE-NEXT: #NO_APP
512512 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
513513 ;
514514 ; KNL-LABEL: test_vfmaddss_128:
515515 ; KNL: # %bb.0:
516516 ; KNL-NEXT: #APP
517 ; KNL-NEXT: vfmadd132ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
518 ; KNL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
519 ; KNL-NEXT: vfmadd231ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
520 ; KNL-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
521 ; KNL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
522 ; KNL-NEXT: vfmadd231ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50]
517 ; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
518 ; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
519 ; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
520 ; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
521 ; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
522 ; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
523523 ; KNL-NEXT: #NO_APP
524524 ; KNL-NEXT: retq # sched: [7:1.00]
525525 ;
526526 ; SKX-LABEL: test_vfmaddss_128:
527527 ; SKX: # %bb.0:
528528 ; SKX-NEXT: #APP
529 ; SKX-NEXT: vfmadd132ss %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
530 ; SKX-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
531 ; SKX-NEXT: vfmadd231ss %xmm2, %xmm1, %xmm0 # sched: [4:0.33]
532 ; SKX-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
533 ; SKX-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
534 ; SKX-NEXT: vfmadd231ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50]
529 ; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
530 ; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
531 ; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
532 ; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
533 ; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
534 ; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
535535 ; SKX-NEXT: #NO_APP
536536 ; SKX-NEXT: retq # sched: [7:1.00]
537537 ;
538538 ; ZNVER1-LABEL: test_vfmaddss_128:
539539 ; ZNVER1: # %bb.0:
540540 ; ZNVER1-NEXT: #APP
541