llvm.org GIT mirror llvm / 21e3dfb
Implement x86 h-register extract support. - Add patterns for h-register extract, which avoids a shift and mask, and in some cases a temporary register. - Add address-mode matching for turning (X>>(8-n))&(255<<n), where n is a valid address-mode scale value, into an h-register extract and a scaled-offset address. - Replace X86's MOV32to32_ and related instructions with the new target-independent COPY_TO_SUBREG instruction. On x86-64 there are complicated constraints on h registers, and CodeGen doesn't currently provide a high-level way to express all of them, so they are handled with a bunch of special code. This code currently only supports extracts where the result is used by a zero-extend or a store, though these are fairly common. These transformations are not always beneficial; since there are only 4 h registers, they sometimes require extra move instructions, and this sometimes increases register pressure because it can force out values that would otherwise be in one of those registers. However, this appears to be relatively uncommon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68962 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 10 years ago
12 changed file(s) with 540 addition(s) and 100 deletion(s). Raw diff Collapse all Expand all
996996 return false;
997997
998998 // First issue a copy to GR16_ or GR32_.
999 unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16to16_ : X86::MOV32to32_;
999 unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
10001000 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
10011001 ? X86::GR16_RegisterClass : X86::GR32_RegisterClass;
10021002 unsigned CopyReg = createResultReg(CopyRC);
10181018 break;
10191019
10201020 case ISD::AND: {
1021 // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1022 // allows us to fold the shift into this addressing mode.
1021 // Perform some heroic transforms on an and of a constant-count shift
1022 // with a constant to enable use of the scaled offset field.
1023
10231024 SDValue Shift = N.getOperand(0);
1024 if (Shift.getOpcode() != ISD::SHL) break;
1025 if (Shift.getNumOperands() != 2) break;
10251026
10261027 // Scale must not be used already.
10271028 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
10281029
10291030 // Not when RIP is used as the base.
10301031 if (AM.isRIPRel) break;
1031
1032
1033 SDValue X = Shift.getOperand(0);
10321034 ConstantSDNode *C2 = dyn_cast(N.getOperand(1));
10331035 ConstantSDNode *C1 = dyn_cast(Shift.getOperand(1));
10341036 if (!C1 || !C2) break;
1037
1038 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1039 // allows us to convert the shift and and into an h-register extract and
1040 // a scaled index.
1041 if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
1042 unsigned ScaleLog = 8 - C1->getZExtValue();
1043 if (ScaleLog > 0 && ScaleLog < 64 &&
1044 C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
1045 SDValue Eight = CurDAG->getConstant(8, MVT::i8);
1046 SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
1047 SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1048 X, Eight);
1049 SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
1050 Srl, Mask);
1051
1052 // Insert the new nodes into the topological ordering.
1053 if (Eight.getNode()->getNodeId() == -1 ||
1054 Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1055 CurDAG->RepositionNode(X.getNode(), Eight.getNode());
1056 Eight.getNode()->setNodeId(X.getNode()->getNodeId());
1057 }
1058 if (Mask.getNode()->getNodeId() == -1 ||
1059 Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1060 CurDAG->RepositionNode(X.getNode(), Mask.getNode());
1061 Mask.getNode()->setNodeId(X.getNode()->getNodeId());
1062 }
1063 if (Srl.getNode()->getNodeId() == -1 ||
1064 Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1065 CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
1066 Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
1067 }
1068 if (And.getNode()->getNodeId() == -1 ||
1069 And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1070 CurDAG->RepositionNode(N.getNode(), And.getNode());
1071 And.getNode()->setNodeId(N.getNode()->getNodeId());
1072 }
1073 CurDAG->ReplaceAllUsesWith(N, And);
1074 AM.IndexReg = And;
1075 AM.Scale = (1 << ScaleLog);
1076 return false;
1077 }
1078 }
1079
1080 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1081 // allows us to fold the shift into this addressing mode.
1082 if (Shift.getOpcode() != ISD::SHL) break;
10351083
10361084 // Not likely to be profitable if either the AND or SHIFT node has more
10371085 // than one use (unless all uses are for address computation). Besides,
10451093 break;
10461094
10471095 // Get the new AND mask, this folds to a constant.
1048 SDValue X = Shift.getOperand(0);
10491096 SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
10501097 SDValue(C2, 0), SDValue(C1, 0));
10511098 SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
15211521
15221522 // r & (2^32-1) ==> movz
15231523 def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
1524 (MOVZX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>;
1524 (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
15251525 // r & (2^16-1) ==> movz
15261526 def : Pat<(and GR64:$src, 0xffff),
15271527 (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
15301530 (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
15311531 // r & (2^8-1) ==> movz
15321532 def : Pat<(and GR32:$src1, 0xff),
1533 (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit)))>,
1533 (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
15341534 Requires<[In64BitMode]>;
15351535 // r & (2^8-1) ==> movz
15361536 def : Pat<(and GR16:$src1, 0xff),
15391539
15401540 // sext_inreg patterns
15411541 def : Pat<(sext_inreg GR64:$src, i32),
1542 (MOVSX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>;
1542 (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
15431543 def : Pat<(sext_inreg GR64:$src, i16),
1544 (MOVSX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
1544 (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
15451545 def : Pat<(sext_inreg GR64:$src, i8),
1546 (MOVSX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
1546 (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
15471547 def : Pat<(sext_inreg GR32:$src, i8),
1548 (MOVSX32rr8 (i8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)))>,
1548 (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
15491549 Requires<[In64BitMode]>;
15501550 def : Pat<(sext_inreg GR16:$src, i8),
15511551 (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
15531553
15541554 // trunc patterns
15551555 def : Pat<(i32 (trunc GR64:$src)),
1556 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
1556 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
15571557 def : Pat<(i16 (trunc GR64:$src)),
1558 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
1558 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
15591559 def : Pat<(i8 (trunc GR64:$src)),
1560 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
1560 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
15611561 def : Pat<(i8 (trunc GR32:$src)),
1562 (i8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
1562 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
15631563 Requires<[In64BitMode]>;
15641564 def : Pat<(i8 (trunc GR16:$src)),
1565 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit))>,
1565 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
1566 Requires<[In64BitMode]>;
1567
1568 // h-register tricks.
1569 // For now, be conservative and only the extract if the value is immediately
1570 // zero-extended or stored, which are somewhat common cases. This uses a bunch
1571 // of code to prevent a register requiring a REX prefix from being allocated in
1572 // the same instruction as the h register, as there's currently no way to
1573 // describe this requirement to the register allocator.
1574
1575 // h-register extract and zero-extend.
1576 def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
1577 (SUBREG_TO_REG
1578 (i64 0),
1579 (MOVZX32_NOREXrr8
1580 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR64:$src, GR64_),
1581 x86_subreg_8bit_hi)),
1582 x86_subreg_32bit)>;
1583 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
1584 (MOVZX32_NOREXrr8
1585 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
1586 x86_subreg_8bit_hi))>,
1587 Requires<[In64BitMode]>;
1588 def : Pat<(srl_su GR16:$src, (i8 8)),
1589 (EXTRACT_SUBREG
1590 (MOVZX32_NOREXrr8
1591 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
1592 x86_subreg_8bit_hi)),
1593 x86_subreg_16bit)>,
1594 Requires<[In64BitMode]>;
1595
1596 // h-register extract and store.
1597 def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
1598 (MOV8mr_NOREX
1599 addr:$dst,
1600 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR64:$src, GR64_),
1601 x86_subreg_8bit_hi))>;
1602 def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
1603 (MOV8mr_NOREX
1604 addr:$dst,
1605 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
1606 x86_subreg_8bit_hi))>,
1607 Requires<[In64BitMode]>;
1608 def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
1609 (MOV8mr_NOREX
1610 addr:$dst,
1611 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
1612 x86_subreg_8bit_hi))>,
15661613 Requires<[In64BitMode]>;
15671614
15681615 // (shl x, 1) ==> (add x, x)
257257 { X86::JMP64r, X86::JMP64m, 1 },
258258 { X86::MOV16ri, X86::MOV16mi, 0 },
259259 { X86::MOV16rr, X86::MOV16mr, 0 },
260 { X86::MOV16to16_, X86::MOV16_mr, 0 },
261260 { X86::MOV32ri, X86::MOV32mi, 0 },
262261 { X86::MOV32rr, X86::MOV32mr, 0 },
263 { X86::MOV32to32_, X86::MOV32_mr, 0 },
264262 { X86::MOV64ri32, X86::MOV64mi32, 0 },
265263 { X86::MOV64rr, X86::MOV64mr, 0 },
266264 { X86::MOV8ri, X86::MOV8mi, 0 },
371369 { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
372370 { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
373371 { X86::MOV16rr, X86::MOV16rm },
374 { X86::MOV16to16_, X86::MOV16_rm },
375372 { X86::MOV32rr, X86::MOV32rm },
376 { X86::MOV32to32_, X86::MOV32_rm },
377373 { X86::MOV64rr, X86::MOV64rm },
378374 { X86::MOV64toPQIrr, X86::MOVQI2PQIrm },
379375 { X86::MOV64toSDrr, X86::MOV64toSDrm },
403399 { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
404400 { X86::MOVZX16rr8, X86::MOVZX16rm8 },
405401 { X86::MOVZX32rr16, X86::MOVZX32rm16 },
402 { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 },
406403 { X86::MOVZX32rr8, X86::MOVZX32rm8 },
407404 { X86::MOVZX64rr16, X86::MOVZX64rm16 },
408405 { X86::MOVZX64rr32, X86::MOVZX64rm32 },
671668 case X86::MOV16rr:
672669 case X86::MOV32rr:
673670 case X86::MOV64rr:
674 case X86::MOV16to16_:
675 case X86::MOV32to32_:
676671 case X86::MOVSSrr:
677672 case X86::MOVSDrr:
678673
709704 default: break;
710705 case X86::MOV8rm:
711706 case X86::MOV16rm:
712 case X86::MOV16_rm:
713707 case X86::MOV32rm:
714 case X86::MOV32_rm:
715708 case X86::MOV64rm:
716709 case X86::LD_Fp64m:
717710 case X86::MOVSSrm:
740733 default: break;
741734 case X86::MOV8mr:
742735 case X86::MOV16mr:
743 case X86::MOV16_mr:
744736 case X86::MOV32mr:
745 case X86::MOV32_mr:
746737 case X86::MOV64mr:
747738 case X86::ST_FpP64m:
748739 case X86::MOVSSmr:
794785 default: break;
795786 case X86::MOV8rm:
796787 case X86::MOV16rm:
797 case X86::MOV16_rm:
798788 case X86::MOV32rm:
799 case X86::MOV32_rm:
800789 case X86::MOV64rm:
801790 case X86::LD_Fp64m:
802791 case X86::MOVSSrm:
16691658 Opc = X86::MOV16rr;
16701659 } else if (DestRC == &X86::GR8RegClass) {
16711660 Opc = X86::MOV8rr;
1661 } else if (DestRC == &X86::GR64_RegClass) {
1662 Opc = X86::MOV64rr;
16721663 } else if (DestRC == &X86::GR32_RegClass) {
1673 Opc = X86::MOV32_rr;
1664 Opc = X86::MOV32rr;
16741665 } else if (DestRC == &X86::GR16_RegClass) {
1675 Opc = X86::MOV16_rr;
1666 Opc = X86::MOV16rr;
1667 } else if (DestRC == &X86::GR8_RegClass) {
1668 Opc = X86::MOV8rr;
1669 } else if (DestRC == &X86::GR64_NOREXRegClass) {
1670 Opc = X86::MOV64rr;
1671 } else if (DestRC == &X86::GR32_NOREXRegClass) {
1672 Opc = X86::MOV32rr;
1673 } else if (DestRC == &X86::GR16_NOREXRegClass) {
1674 Opc = X86::MOV16rr;
1675 } else if (DestRC == &X86::GR8_NOREXRegClass) {
1676 Opc = X86::MOV8rr;
16761677 } else if (DestRC == &X86::RFP32RegClass) {
16771678 Opc = X86::MOV_Fp3232;
16781679 } else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) {
17201721 return true;
17211722 }
17221723 }
1723
1724
17241725 // Moving from ST(0) turns into FpGET_ST0_32 etc.
17251726 if (SrcRC == &X86::RSTRegClass) {
17261727 // Copying from ST(0)/ST(1).
17781779 Opc = X86::MOV16mr;
17791780 } else if (RC == &X86::GR8RegClass) {
17801781 Opc = X86::MOV8mr;
1782 } else if (RC == &X86::GR64_RegClass) {
1783 Opc = X86::MOV64mr;
17811784 } else if (RC == &X86::GR32_RegClass) {
1782 Opc = X86::MOV32_mr;
1785 Opc = X86::MOV32mr;
17831786 } else if (RC == &X86::GR16_RegClass) {
1784 Opc = X86::MOV16_mr;
1787 Opc = X86::MOV16mr;
1788 } else if (RC == &X86::GR8_RegClass) {
1789 Opc = X86::MOV8mr;
1790 } else if (RC == &X86::GR64_NOREXRegClass) {
1791 Opc = X86::MOV64mr;
1792 } else if (RC == &X86::GR32_NOREXRegClass) {
1793 Opc = X86::MOV32mr;
1794 } else if (RC == &X86::GR16_NOREXRegClass) {
1795 Opc = X86::MOV16mr;
1796 } else if (RC == &X86::GR8_NOREXRegClass) {
1797 Opc = X86::MOV8mr;
17851798 } else if (RC == &X86::RFP80RegClass) {
17861799 Opc = X86::ST_FpP80m; // pops
17871800 } else if (RC == &X86::RFP64RegClass) {
18461859 Opc = X86::MOV16rm;
18471860 } else if (RC == &X86::GR8RegClass) {
18481861 Opc = X86::MOV8rm;
1862 } else if (RC == &X86::GR64_RegClass) {
1863 Opc = X86::MOV64rm;
18491864 } else if (RC == &X86::GR32_RegClass) {
1850 Opc = X86::MOV32_rm;
1865 Opc = X86::MOV32rm;
18511866 } else if (RC == &X86::GR16_RegClass) {
1852 Opc = X86::MOV16_rm;
1867 Opc = X86::MOV16rm;
1868 } else if (RC == &X86::GR8_RegClass) {
1869 Opc = X86::MOV8rm;
1870 } else if (RC == &X86::GR64_NOREXRegClass) {
1871 Opc = X86::MOV64rm;
1872 } else if (RC == &X86::GR32_NOREXRegClass) {
1873 Opc = X86::MOV32rm;
1874 } else if (RC == &X86::GR16_NOREXRegClass) {
1875 Opc = X86::MOV16rm;
1876 } else if (RC == &X86::GR8_NOREXRegClass) {
1877 Opc = X86::MOV8rm;
18531878 } else if (RC == &X86::RFP80RegClass) {
18541879 Opc = X86::LD_Fp80m;
18551880 } else if (RC == &X86::RFP64RegClass) {
180180 def f80mem : X86MemOperand<"printf80mem">;
181181 def f128mem : X86MemOperand<"printf128mem">;
182182
183 // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
184 // plain GR64, so that it doesn't potentially require a REX prefix.
185 def i8mem_NOREX : Operand {
186 let PrintMethod = "printi8mem";
187 let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm);
188 }
189
183190 def lea32mem : Operand {
184191 let PrintMethod = "printlea32mem";
185192 let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
397404 def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
398405 return N->hasOneUse();
399406 }]>;
407 // An 'srl' node with a single use.
408 def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
409 return N->hasOneUse();
410 }]>;
411 // An 'trunc' node with a single use.
412 def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
413 return N->hasOneUse();
414 }]>;
400415
401416 // 'shld' and 'shrd' instruction patterns. Note that even though these have
402417 // the srl and shl in their patterns, the C++ code must still check for them,
766781 def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
767782 "mov{l}\t{$src, $dst|$dst, $src}",
768783 [(store GR32:$src, addr:$dst)]>;
769
784
785 // A version of MOV8mr that uses i8mem_NOREX so that it can be used for
786 // storing h registers, which can't be encoded when a REX prefix is present.
787 def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8:$src),
788 "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
789
770790 //===----------------------------------------------------------------------===//
771791 // Fixed-Register Multiplication and Division Instructions...
772792 //
28982918 "movz{wl|x}\t{$src, $dst|$dst, $src}",
28992919 [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
29002920
2921 // These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8
2922 // except that they use GR32_NOREX for the output operand register class
2923 // instead of GR32. This allows them to operate on h registers on x86-64.
2924 def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
2925 (outs GR32_NOREX:$dst), (ins GR8:$src),
2926 "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
2927 []>, TB;
2928 def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
2929 (outs GR32_NOREX:$dst), (ins i8mem:$src),
2930 "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
2931 []>, TB;
2932
29012933 let neverHasSideEffects = 1 in {
29022934 let Defs = [AX], Uses = [AL] in
29032935 def CBW : I<0x98, RawFrm, (outs), (ins),
29322964 def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
29332965 "xor{l}\t$dst, $dst",
29342966 [(set GR32:$dst, 0)]>;
2935 }
2936
2937 // Basic operations on GR16 / GR32 subclasses GR16_ and GR32_ which contains only
2938 // those registers that have GR8 sub-registers (i.e. AX - DX, EAX - EDX).
2939 let neverHasSideEffects = 1, isAsCheapAsAMove = 1 in {
2940 def MOV16to16_ : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16:$src),
2941 "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
2942 def MOV32to32_ : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32:$src),
2943 "mov{l}\t{$src, $dst|$dst, $src}", []>;
2944
2945 def MOV16_rr : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16_:$src),
2946 "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
2947 def MOV32_rr : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32_:$src),
2948 "mov{l}\t{$src, $dst|$dst, $src}", []>;
2949 } // neverHasSideEffects
2950
2951 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
2952 def MOV16_rm : I<0x8B, MRMSrcMem, (outs GR16_:$dst), (ins i16mem:$src),
2953 "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
2954 def MOV32_rm : I<0x8B, MRMSrcMem, (outs GR32_:$dst), (ins i32mem:$src),
2955 "mov{l}\t{$src, $dst|$dst, $src}", []>;
2956 }
2957 let mayStore = 1, neverHasSideEffects = 1 in {
2958 def MOV16_mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16_:$src),
2959 "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
2960 def MOV32_mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32_:$src),
2961 "mov{l}\t{$src, $dst|$dst, $src}", []>;
29622967 }
29632968
29642969 //===----------------------------------------------------------------------===//
33403345
33413346 // r & (2^16-1) ==> movz
33423347 def : Pat<(and GR32:$src1, 0xffff),
3343 (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>;
3348 (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
33443349 // r & (2^8-1) ==> movz
33453350 def : Pat<(and GR32:$src1, 0xff),
3346 (MOVZX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src1),
3347 x86_subreg_8bit)))>,
3351 (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src1, GR32_),
3352 x86_subreg_8bit))>,
33483353 Requires<[In32BitMode]>;
33493354 // r & (2^8-1) ==> movz
33503355 def : Pat<(and GR16:$src1, 0xff),
3351 (MOVZX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src1),
3352 x86_subreg_8bit)))>,
3356 (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src1, GR16_),
3357 x86_subreg_8bit))>,
33533358 Requires<[In32BitMode]>;
33543359
33553360 // sext_inreg patterns
33563361 def : Pat<(sext_inreg GR32:$src, i16),
3357 (MOVSX32rr16 (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)))>;
3362 (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
33583363 def : Pat<(sext_inreg GR32:$src, i8),
3359 (MOVSX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src),
3360 x86_subreg_8bit)))>,
3364 (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
3365 x86_subreg_8bit))>,
33613366 Requires<[In32BitMode]>;
33623367 def : Pat<(sext_inreg GR16:$src, i8),
3363 (MOVSX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src),
3364 x86_subreg_8bit)))>,
3368 (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
3369 x86_subreg_8bit))>,
33653370 Requires<[In32BitMode]>;
33663371
33673372 // trunc patterns
33683373 def : Pat<(i16 (trunc GR32:$src)),
3369 (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
3374 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
33703375 def : Pat<(i8 (trunc GR32:$src)),
3371 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src), x86_subreg_8bit))>,
3376 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
3377 x86_subreg_8bit)>,
33723378 Requires<[In32BitMode]>;
33733379 def : Pat<(i8 (trunc GR16:$src)),
3374 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src), x86_subreg_8bit))>,
3380 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
3381 x86_subreg_8bit)>,
3382 Requires<[In32BitMode]>;
3383
3384 // h-register tricks
3385 def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
3386 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
3387 x86_subreg_8bit_hi)>,
3388 Requires<[In32BitMode]>;
3389 def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
3390 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
3391 x86_subreg_8bit_hi)>,
3392 Requires<[In32BitMode]>;
3393 def : Pat<(srl_su GR16:$src, (i8 8)),
3394 (EXTRACT_SUBREG
3395 (MOVZX32rr8
3396 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
3397 x86_subreg_8bit_hi)),
3398 x86_subreg_16bit)>,
3399 Requires<[In32BitMode]>;
3400 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
3401 (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
3402 x86_subreg_8bit_hi))>,
33753403 Requires<[In32BitMode]>;
33763404
33773405 // (shl x, 1) ==> (add x, x)
3434 /// these indices must be kept in sync with the class indices in the
3535 /// X86RegisterInfo.td file.
3636 enum SubregIndex {
37 SUBREG_8BIT = 1, SUBREG_16BIT = 2, SUBREG_32BIT = 3
37 SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
3838 };
3939 }
4040
4848 def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
4949 def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
5050
51 // High registers X86-32 only
51 // High registers. On x86-64, these cannot be used in any instruction
52 // with a REX prefix.
5253 def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>;
5354 def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>;
5455 def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>;
184185 //
185186
186187 def x86_subreg_8bit : PatLeaf<(i32 1)>;
187 def x86_subreg_16bit : PatLeaf<(i32 2)>;
188 def x86_subreg_32bit : PatLeaf<(i32 3)>;
188 def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
189 def x86_subreg_16bit : PatLeaf<(i32 3)>;
190 def x86_subreg_32bit : PatLeaf<(i32 4)>;
189191
190192 def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
191193 R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
192194 [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
193195 R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
194196
195 // It's unclear if this subreg set is safe, given that not all registers
196 // in the class have an 'H' subreg.
197 // def : SubRegSet<2, [AX, CX, DX, BX],
198 // [AH, CH, DH, BH]>;
197 def : SubRegSet<2, [AX, CX, DX, BX],
198 [AH, CH, DH, BH]>;
199199
200200 def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
201201 R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
202202 [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
203203 R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
204204
205 def : SubRegSet<2, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
205 def : SubRegSet<2, [EAX, ECX, EDX, EBX],
206 [AH, CH, DH, BH]>;
207
208 def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
206209 R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
207210 [AX, CX, DX, BX, SP, BP, SI, DI,
208211 R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
209
210212
211213 def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
212214 R8, R9, R10, R11, R12, R13, R14, R15],
213215 [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
214216 R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
215217
216 def : SubRegSet<2, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
218 def : SubRegSet<2, [RAX, RCX, RDX, RBX],
219 [AH, CH, DH, BH]>;
220
221 def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
217222 R8, R9, R10, R11, R12, R13, R14, R15],
218223 [AX, CX, DX, BX, SP, BP, SI, DI,
219224 R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
220
221 def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
225
226 def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
222227 R8, R9, R10, R11, R12, R13, R14, R15],
223228 [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
224229 R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
235240 // R8B, ... R15B.
236241 // Allocate R12 and R13 last, as these require an extra byte when
237242 // encoded in x86_64 instructions.
238 // FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
243 // FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
244 // 64-bit mode. The main complication is that they cannot be encoded in an
245 // instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
246 // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
247 // cannot be encoded.
239248 def GR8 : RegisterClass<"X86", [i8], 8,
240249 [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
241250 R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
294303 def GR16 : RegisterClass<"X86", [i16], 16,
295304 [AX, CX, DX, SI, DI, BX, BP, SP,
296305 R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
297 let SubRegClassList = [GR8];
306 let SubRegClassList = [GR8, GR8];
298307 let MethodProtos = [{
299308 iterator allocation_order_begin(const MachineFunction &MF) const;
300309 iterator allocation_order_end(const MachineFunction &MF) const;
362371 def GR32 : RegisterClass<"X86", [i32], 32,
363372 [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
364373 R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
365 let SubRegClassList = [GR8, GR16];
374 let SubRegClassList = [GR8, GR8, GR16];
366375 let MethodProtos = [{
367376 iterator allocation_order_begin(const MachineFunction &MF) const;
368377 iterator allocation_order_end(const MachineFunction &MF) const;
430439 def GR64 : RegisterClass<"X86", [i64], 64,
431440 [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
432441 RBX, R14, R15, R12, R13, RBP, RSP]> {
433 let SubRegClassList = [GR8, GR16, GR32];
442 let SubRegClassList = [GR8, GR8, GR16, GR32];
434443 let MethodProtos = [{
435444 iterator allocation_order_end(const MachineFunction &MF) const;
436445 }];
451460 }
452461
453462
454 // GR16, GR32 subclasses which contain registers that have GR8 sub-registers.
455 // These should only be used for 32-bit mode.
463 // GR8_, GR16_, GR32_, GR64_ - Subclasses of GR8, GR16, GR32, and GR64
464 // which contain just the "a" "b", "c", and "d" registers. On x86-32,
465 // GR16_ and GR32_ are classes for registers that support 8-bit subreg
466 // operations. On x86-64, GR16_, GR32_, and GR64_ are classes for registers
467 // that support 8-bit h-register operations.
468 def GR8_ : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
469 }
456470 def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
457 let SubRegClassList = [GR8];
471 let SubRegClassList = [GR8_, GR8_];
458472 }
459473 def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
460 let SubRegClassList = [GR8, GR16];
474 let SubRegClassList = [GR8_, GR8_, GR16_];
475 }
476 def GR64_ : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
477 let SubRegClassList = [GR8_, GR8_, GR16_, GR32_];
478 }
479
480 // GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of
481 // GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs.
482 // On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes
483 // of registers which do not by themselves require a REX prefix.
484 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
485 [AL, CL, DL, SIL, DIL, BL, BPL, SPL]> {
486 }
487 def GR16_NOREX : RegisterClass<"X86", [i16], 16,
488 [AX, CX, DX, SI, DI, BX, BP, SP]> {
489 let SubRegClassList = [GR8_NOREX, GR8_NOREX];
490 }
491 // GR32_NOREX - GR32 registers which do not require a REX prefix.
492 def GR32_NOREX : RegisterClass<"X86", [i32], 32,
493 [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
494 let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
495 let MethodProtos = [{
496 iterator allocation_order_begin(const MachineFunction &MF) const;
497 iterator allocation_order_end(const MachineFunction &MF) const;
498 }];
499 let MethodBodies = [{
500 // Does the function dedicate RBP / EBP to being a frame ptr?
501 // If so, don't allocate ESP or EBP.
502 static const unsigned X86_GR32_NOREX_AO_fp[] = {
503 X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
504 };
505 // If not, just don't allocate ESP.
506 static const unsigned X86_GR32_NOREX_AO[] = {
507 X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
508 };
509
510 GR32_NOREXClass::iterator
511 GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
512 const TargetMachine &TM = MF.getTarget();
513 const TargetRegisterInfo *RI = TM.getRegisterInfo();
514 if (RI->hasFP(MF))
515 return X86_GR32_NOREX_AO_fp;
516 else
517 return X86_GR32_NOREX_AO;
518 }
519
520 GR32_NOREXClass::iterator
521 GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
522 const TargetMachine &TM = MF.getTarget();
523 const TargetRegisterInfo *RI = TM.getRegisterInfo();
524 if (RI->hasFP(MF))
525 return X86_GR32_NOREX_AO_fp +
526 (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned));
527 else
528 return X86_GR32_NOREX_AO +
529 (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned));
530 }
531 }];
532 }
533
534 // GR64_NOREX - GR64 registers which do not require a REX prefix.
535 def GR64_NOREX : RegisterClass<"X86", [i64], 64,
536 [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> {
537 let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
538 let MethodProtos = [{
539 iterator allocation_order_begin(const MachineFunction &MF) const;
540 iterator allocation_order_end(const MachineFunction &MF) const;
541 }];
542 let MethodBodies = [{
543 // Does the function dedicate RBP / EBP to being a frame ptr?
544 // If so, don't allocate RSP or RBP.
545 static const unsigned X86_GR64_NOREX_AO_fp[] = {
546 X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX
547 };
548 // If not, just don't allocate RSP.
549 static const unsigned X86_GR64_NOREX_AO[] = {
550 X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP
551 };
552
553 GR64_NOREXClass::iterator
554 GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
555 const TargetMachine &TM = MF.getTarget();
556 const TargetRegisterInfo *RI = TM.getRegisterInfo();
557 if (RI->hasFP(MF))
558 return X86_GR64_NOREX_AO_fp;
559 else
560 return X86_GR64_NOREX_AO;
561 }
562
563 GR64_NOREXClass::iterator
564 GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
565 const TargetMachine &TM = MF.getTarget();
566 const TargetRegisterInfo *RI = TM.getRegisterInfo();
567 if (RI->hasFP(MF))
568 return X86_GR64_NOREX_AO_fp +
569 (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned));
570 else
571 return X86_GR64_NOREX_AO +
572 (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned));
573 }
574 }];
461575 }
462576
463577 // A class to support the 'A' assembler constraint: EAX then EDX.
0 ; RUN: llvm-as < %s | llc -march=x86 | grep {movzbl %\[abcd\]h,} | count 7
1
2 ; Use h-register extract and zero-extend.
3
4 define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {
5 %t0 = lshr i32 %x, 8
6 %t1 = and i32 %t0, 255
7 %t2 = getelementptr double* %p, i32 %t1
8 %t3 = load double* %t2, align 8
9 ret double %t3
10 }
11 define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
12 %t0 = lshr i32 %x, 8
13 %t1 = and i32 %t0, 255
14 %t2 = getelementptr float* %p, i32 %t1
15 %t3 = load float* %t2, align 8
16 ret float %t3
17 }
18 define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
19 %t0 = lshr i32 %x, 8
20 %t1 = and i32 %t0, 255
21 %t2 = getelementptr i16* %p, i32 %t1
22 %t3 = load i16* %t2, align 8
23 ret i16 %t3
24 }
25 define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
26 %t0 = lshr i32 %x, 8
27 %t1 = and i32 %t0, 255
28 %t2 = getelementptr i8* %p, i32 %t1
29 %t3 = load i8* %t2, align 8
30 ret i8 %t3
31 }
32 define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
33 %t0 = lshr i32 %x, 5
34 %t1 = and i32 %t0, 2040
35 %t2 = getelementptr i8* %p, i32 %t1
36 %t3 = load i8* %t2, align 8
37 ret i8 %t3
38 }
39 define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
40 %t0 = lshr i32 %x, 6
41 %t1 = and i32 %t0, 1020
42 %t2 = getelementptr i8* %p, i32 %t1
43 %t3 = load i8* %t2, align 8
44 ret i8 %t3
45 }
46 define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
47 %t0 = lshr i32 %x, 7
48 %t1 = and i32 %t0, 510
49 %t2 = getelementptr i8* %p, i32 %t1
50 %t3 = load i8* %t2, align 8
51 ret i8 %t3
52 }
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 7
1
2 ; Use h-register extract and zero-extend.
3
4 define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly {
5 %t0 = lshr i64 %x, 8
6 %t1 = and i64 %t0, 255
7 %t2 = getelementptr double* %p, i64 %t1
8 %t3 = load double* %t2, align 8
9 ret double %t3
10 }
11 define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
12 %t0 = lshr i64 %x, 8
13 %t1 = and i64 %t0, 255
14 %t2 = getelementptr float* %p, i64 %t1
15 %t3 = load float* %t2, align 8
16 ret float %t3
17 }
18 define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
19 %t0 = lshr i64 %x, 8
20 %t1 = and i64 %t0, 255
21 %t2 = getelementptr i16* %p, i64 %t1
22 %t3 = load i16* %t2, align 8
23 ret i16 %t3
24 }
25 define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
26 %t0 = lshr i64 %x, 8
27 %t1 = and i64 %t0, 255
28 %t2 = getelementptr i8* %p, i64 %t1
29 %t3 = load i8* %t2, align 8
30 ret i8 %t3
31 }
32 define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
33 %t0 = lshr i64 %x, 5
34 %t1 = and i64 %t0, 2040
35 %t2 = getelementptr i8* %p, i64 %t1
36 %t3 = load i8* %t2, align 8
37 ret i8 %t3
38 }
39 define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
40 %t0 = lshr i64 %x, 6
41 %t1 = and i64 %t0, 1020
42 %t2 = getelementptr i8* %p, i64 %t1
43 %t3 = load i8* %t2, align 8
44 ret i8 %t3
45 }
46 define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
47 %t0 = lshr i64 %x, 7
48 %t1 = and i64 %t0, 510
49 %t2 = getelementptr i8* %p, i64 %t1
50 %t3 = load i8* %t2, align 8
51 ret i8 %t3
52 }
0 ; RUN: llvm-as < %s | llc -march=x86-64 > %t
1 ; RUN: grep mov %t | count 6
2 ; RUN: grep {movb %ah, (%rsi)} %t | count 3
3 ; RUN: llvm-as < %s | llc -march=x86 > %t
4 ; RUN: grep mov %t | count 3
5 ; RUN: grep {movb %ah, (%e} %t | count 3
6
7 ; Use h-register extract and store.
8
9 define void @foo16(i16 inreg %p, i8* inreg %z) nounwind {
10 %q = lshr i16 %p, 8
11 %t = trunc i16 %q to i8
12 store i8 %t, i8* %z
13 ret void
14 }
15 define void @foo32(i32 inreg %p, i8* inreg %z) nounwind {
16 %q = lshr i32 %p, 8
17 %t = trunc i32 %q to i8
18 store i8 %t, i8* %z
19 ret void
20 }
21 define void @foo64(i64 inreg %p, i8* inreg %z) nounwind {
22 %q = lshr i64 %p, 8
23 %t = trunc i64 %q to i8
24 store i8 %t, i8* %z
25 ret void
26 }
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 4
1 ; RUN: llvm-as < %s | llc -march=x86 > %t
2 ; RUN: grep {incb %ah} %t | count 3
3 ; RUN: grep {movzbl %ah,} %t | count 3
4
5 ; Use h registers. On x86-64, codegen doesn't support general allocation
6 ; of h registers yet, due to x86 encoding complications.
7
8 define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
9 %t0 = lshr i64 %x, 8
10 %t1 = trunc i64 %t0 to i8
11 %t2 = add i8 %t1, 1
12 store i8 %t2, i8* %p
13 ret void
14 }
15
16 define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
17 %t0 = lshr i32 %x, 8
18 %t1 = trunc i32 %t0 to i8
19 %t2 = add i8 %t1, 1
20 store i8 %t2, i8* %p
21 ret void
22 }
23
24 define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
25 %t0 = lshr i16 %x, 8
26 %t1 = trunc i16 %t0 to i8
27 %t2 = add i8 %t1, 1
28 store i8 %t2, i8* %p
29 ret void
30 }
31
32 define i64 @qux64(i64 inreg %x) nounwind {
33 %t0 = lshr i64 %x, 8
34 %t1 = and i64 %t0, 255
35 ret i64 %t1
36 }
37
38 define i32 @qux32(i32 inreg %x) nounwind {
39 %t0 = lshr i32 %x, 8
40 %t1 = and i32 %t0, 255
41 ret i32 %t1
42 }
43
44 define i16 @qux16(i16 inreg %x) nounwind {
45 %t0 = lshr i16 %x, 8
46 ret i16 %t0
47 }
0 ; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu
1 ; XFAIL: *
2 ; Expected to run out of registers during allocation.
31 ; PR3391
42
53 @pci_indirect = external global { } ; <{ }*> [#uses=1]