llvm.org GIT mirror llvm / 62dd6f6
[X86] Add DAG combine to merge vzext_movl with the various fp<->int conversion operations that only write the lower 64-bits of an xmm register and zero the rest. Summary: We have isel patterns for this, but we're missing some load patterns and all broadcast patterns. A DAG combine seems like a better fit for this. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56971 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352260 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 4 months ago
5 changed file(s) with 26 addition(s) and 112 deletion(s). Raw diff Collapse all Expand all
3249832498 return SDValue(N, 0);
3249932499 }
3250032500
32501 // Look for a v2i64/v2f64 VZEXT_MOVL of a node that already produces zeros
32502 // in the upper 64 bits.
32503 // TODO: Can we generalize this using computeKnownBits.
32504 if (N->getOpcode() == X86ISD::VZEXT_MOVL &&
32505 (VT == MVT::v2f64 || VT == MVT::v2i64) &&
32506 N->getOperand(0).getOpcode() == ISD::BITCAST &&
32507 (N->getOperand(0).getOperand(0).getValueType() == MVT::v4f32 ||
32508 N->getOperand(0).getOperand(0).getValueType() == MVT::v4i32)) {
32509 SDValue In = N->getOperand(0).getOperand(0);
32510 switch (In.getOpcode()) {
32511 default:
32512 break;
32513 case X86ISD::CVTP2SI: case X86ISD::CVTP2UI:
32514 case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI:
32515 case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI:
32516 case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI:
32517 case X86ISD::CVTSI2P: case X86ISD::CVTUI2P:
32518 case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P:
32519 case X86ISD::VFPROUND: case X86ISD::VMFPROUND:
32520 if (In.getOperand(0).getValueType() == MVT::v2f64 ||
32521 In.getOperand(0).getValueType() == MVT::v2i64)
32522 return N->getOperand(0); // return the bitcast
32523 break;
32524 }
32525 }
32526
3250132527 // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
3250232528 // operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
3250332529 // FIXME: This can probably go away once we default to widening legalization.
81838183 v4f32x_info.ImmAllZerosV),
81848184 (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
81858185
8186 def : Pat<(X86vzmovl (v2f64 (bitconvert
8187 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
8188 (VCVTPD2PSZ128rr VR128X:$src)>;
8189 def : Pat<(X86vzmovl (v2f64 (bitconvert
8190 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
8191 (VCVTPD2PSZ128rm addr:$src)>;
81928186 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
81938187 (VCVTPS2PDZ128rm addr:$src)>;
81948188 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
88308824 }
88318825
88328826 let Predicates = [HasAVX512, HasVLX] in {
8833 def : Pat<(X86vzmovl (v2i64 (bitconvert
8834 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
8835 (VCVTPD2DQZ128rr VR128X:$src)>;
8836 def : Pat<(X86vzmovl (v2i64 (bitconvert
8837 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
8838 (VCVTPD2DQZ128rm addr:$src)>;
8839 def : Pat<(X86vzmovl (v2i64 (bitconvert
8840 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
8841 (VCVTPD2UDQZ128rr VR128X:$src)>;
8842 def : Pat<(X86vzmovl (v2i64 (bitconvert
8843 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
8844 (VCVTTPD2DQZ128rr VR128X:$src)>;
8845 def : Pat<(X86vzmovl (v2i64 (bitconvert
8846 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
8847 (VCVTTPD2DQZ128rm addr:$src)>;
8848 def : Pat<(X86vzmovl (v2i64 (bitconvert
8849 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
8850 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8851
88528827 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
88538828 (VCVTDQ2PDZ128rm addr:$src)>;
88548829 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
88618836 }
88628837
88638838 let Predicates = [HasDQI, HasVLX] in {
8864 def : Pat<(X86vzmovl (v2f64 (bitconvert
8865 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
8866 (VCVTQQ2PSZ128rr VR128X:$src)>;
8867 def : Pat<(X86vzmovl (v2f64 (bitconvert
8868 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
8869 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8870
88718839 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
88728840 // patterns have been disabled with null_frag.
88738841 def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
16161616 (VCVTTPD2DQYrm addr:$src)>;
16171617 }
16181618
1619 let Predicates = [HasAVX, NoVLX] in {
1620 def : Pat<(X86vzmovl (v2i64 (bitconvert
1621 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
1622 (VCVTPD2DQrr VR128:$src)>;
1623 def : Pat<(X86vzmovl (v2i64 (bitconvert
1624 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
1625 (VCVTPD2DQrm addr:$src)>;
1626 def : Pat<(X86vzmovl (v2i64 (bitconvert
1627 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
1628 (VCVTTPD2DQrr VR128:$src)>;
1629 def : Pat<(X86vzmovl (v2i64 (bitconvert
1630 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
1631 (VCVTTPD2DQrm addr:$src)>;
1632 } // Predicates = [HasAVX, NoVLX]
1633
16341619 def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16351620 "cvttpd2dq\t{$src, $dst|$dst, $src}",
16361621 [(set VR128:$dst,
16411626 [(set VR128:$dst,
16421627 (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>,
16431628 Sched<[WriteCvtPD2ILd]>;
1644
1645 let Predicates = [UseSSE2] in {
1646 def : Pat<(X86vzmovl (v2i64 (bitconvert
1647 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
1648 (CVTPD2DQrr VR128:$src)>;
1649 def : Pat<(X86vzmovl (v2i64 (bitconvert
1650 (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))),
1651 (CVTPD2DQrm addr:$src)>;
1652 def : Pat<(X86vzmovl (v2i64 (bitconvert
1653 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
1654 (CVTTPD2DQrr VR128:$src)>;
1655 def : Pat<(X86vzmovl (v2i64 (bitconvert
1656 (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))),
1657 (CVTTPD2DQrm addr:$src)>;
1658 } // Predicates = [UseSSE2]
16591629
16601630 // Convert packed single to packed double
16611631 let Predicates = [HasAVX, NoVLX] in {
17891759 [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
17901760 Sched<[WriteCvtPD2PS.Folded]>;
17911761
1792 // AVX 256-bit register conversion intrinsics
1793 // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
1794 // whenever possible to avoid declaring two versions of each one.
1795
17961762 let Predicates = [HasAVX, NoVLX] in {
17971763 def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
17981764 (VCVTPD2PSYrr VR256:$src)>;
17991765 def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
18001766 (VCVTPD2PSYrm addr:$src)>;
1801
1802 // Match fpround and fpextend for 128/256-bit conversions
1803 def : Pat<(X86vzmovl (v2f64 (bitconvert
1804 (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
1805 (VCVTPD2PSrr VR128:$src)>;
1806 def : Pat<(X86vzmovl (v2f64 (bitconvert
1807 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
1808 (VCVTPD2PSrm addr:$src)>;
1809 }
1810
1811 let Predicates = [UseSSE2] in {
1812 // Match fpround and fpextend for 128 conversions
1813 def : Pat<(X86vzmovl (v2f64 (bitconvert
1814 (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
1815 (CVTPD2PSrr VR128:$src)>;
1816 def : Pat<(X86vzmovl (v2f64 (bitconvert
1817 (v4f32 (X86vfpround (memopv2f64 addr:$src)))))),
1818 (CVTPD2PSrm addr:$src)>;
18191767 }
18201768
18211769 //===----------------------------------------------------------------------===//
222222 ; X86: # %bb.0:
223223 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
224224 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
225 ; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
226 ; X86-NEXT: # xmm1 = xmm1[0],zero
227225 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
228226 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
229227 ; X86-NEXT: retl # encoding: [0xc3]
232230 ; X64: # %bb.0:
233231 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
234232 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
235 ; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
236 ; X64-NEXT: # xmm1 = xmm1[0],zero
237233 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
238234 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
239235 ; X64-NEXT: retq # encoding: [0xc3]
444440 ; X86: # %bb.0:
445441 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
446442 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
447 ; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
448 ; X86-NEXT: # xmm1 = xmm1[0],zero
449443 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
450444 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
451445 ; X86-NEXT: retl # encoding: [0xc3]
454448 ; X64: # %bb.0:
455449 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
456450 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
457 ; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
458 ; X64-NEXT: # xmm1 = xmm1[0],zero
459451 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
460452 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
461453 ; X64-NEXT: retq # encoding: [0xc3]
33263326 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
33273327 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
33283328 ; X86-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3329 ; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3330 ; X86-NEXT: # xmm1 = xmm1[0],zero
33313329 ; X86-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
33323330 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
33333331 ; X86-NEXT: retl # encoding: [0xc3]
33363334 ; X64: # %bb.0:
33373335 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
33383336 ; X64-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3339 ; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3340 ; X64-NEXT: # xmm1 = xmm1[0],zero
33413337 ; X64-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
33423338 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
33433339 ; X64-NEXT: retq # encoding: [0xc3]
33803376 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
33813377 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
33823378 ; X86-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3383 ; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3384 ; X86-NEXT: # xmm1 = xmm1[0],zero
33853379 ; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
33863380 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
33873381 ; X86-NEXT: retl # encoding: [0xc3]
33903384 ; X64: # %bb.0:
33913385 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
33923386 ; X64-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3393 ; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3394 ; X64-NEXT: # xmm1 = xmm1[0],zero
33953387 ; X64-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
33963388 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
33973389 ; X64-NEXT: retq # encoding: [0xc3]
34343426 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
34353427 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
34363428 ; X86-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3437 ; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3438 ; X86-NEXT: # xmm1 = xmm1[0],zero
34393429 ; X86-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
34403430 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
34413431 ; X86-NEXT: retl # encoding: [0xc3]
34443434 ; X64: # %bb.0:
34453435 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
34463436 ; X64-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3447 ; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3448 ; X64-NEXT: # xmm1 = xmm1[0],zero
34493437 ; X64-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
34503438 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
34513439 ; X64-NEXT: retq # encoding: [0xc3]
36153603 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
36163604 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
36173605 ; X86-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3618 ; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3619 ; X86-NEXT: # xmm1 = xmm1[0],zero
36203606 ; X86-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
36213607 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
36223608 ; X86-NEXT: retl # encoding: [0xc3]
36253611 ; X64: # %bb.0:
36263612 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
36273613 ; X64-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3628 ; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3629 ; X64-NEXT: # xmm1 = xmm1[0],zero
36303614 ; X64-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
36313615 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
36323616 ; X64-NEXT: retq # encoding: [0xc3]
36693653 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
36703654 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
36713655 ; X86-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3672 ; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3673 ; X86-NEXT: # xmm1 = xmm1[0],zero
36743656 ; X86-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
36753657 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
36763658 ; X86-NEXT: retl # encoding: [0xc3]
36793661 ; X64: # %bb.0:
36803662 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
36813663 ; X64-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3682 ; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9]
3683 ; X64-NEXT: # xmm1 = xmm1[0],zero
36843664 ; X64-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
36853665 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
36863666 ; X64-NEXT: retq # encoding: [0xc3]