llvm.org GIT mirror llvm / 2bce5f4
Enable i16 to i32 promotion by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102493 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 9 years ago
16 changed file(s) with 99 addition(s) and 47 deletion(s). Raw diff Collapse all Expand all
18531853
18541854 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
18551855 // use a smaller encoding.
1856 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
1857 // Look past the truncate if CMP is the only use of it.
1858 N0 = N0.getOperand(0);
18561859 if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
18571860 N0.getValueType() != MVT::i8 &&
18581861 X86::isZeroNode(N1)) {
60746074 // the encoding for the i16 version is larger than the i32 version.
60756075 // Also promote i16 to i32 for performance / code size reason.
60766076 if (LHS.getValueType() == MVT::i8 ||
6077 (Subtarget->shouldPromote16Bit() && LHS.getValueType() == MVT::i16))
6077 LHS.getValueType() == MVT::i16)
60786078 LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
60796079
60806080 // If the operand types disagree, extend the shift amount to match. Since
99489948 bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
99499949 if (!isTypeLegal(VT))
99509950 return false;
9951 if (!Subtarget->shouldPromote16Bit() || VT != MVT::i16)
9951 if (VT != MVT::i16)
99529952 return true;
99539953
99549954 switch (Opc) {
99829982 /// beneficial for dag combiner to promote the specified node. If true, it
99839983 /// should return the desired promotion type by reference.
99849984 bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
9985 if (!Subtarget->shouldPromote16Bit())
9986 return false;
9987
99889985 EVT VT = Op.getValueType();
99899986 if (VT != MVT::i16)
99909987 return false;
99979994 LoadSDNode *LD = cast(Op);
99989995 // If the non-extending load has a single use and it's not live out, then it
99999996 // might be folded.
10000 if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
10001 Op.hasOneUse() &&
10002 Op.getNode()->use_begin()->getOpcode() != ISD::CopyToReg)
10003 return false;
9997 if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
9998 Op.hasOneUse()*/) {
9999 for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
10000 UE = Op.getNode()->use_end(); UI != UE; ++UI) {
10001 // The only case where we'd want to promote LOAD (rather then it being
10002 // promoted as an operand is when it's only use is liveout.
10003 if (UI->getOpcode() != ISD::CopyToReg)
10004 return false;
10005 }
10006 }
1000410007 Promote = true;
1000510008 break;
1000610009 }
1001010013 Promote = true;
1001110014 break;
1001210015 case ISD::SHL:
10013 case ISD::SRL:
10014 {
10016 case ISD::SRL: {
1001510017 SDValue N0 = Op.getOperand(0);
1001610018 // Look out for (store (shl (load), x)).
1001710019 if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
20852085 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
20862086 x86_subreg_8bit_hi))>,
20872087 Requires<[In64BitMode]>;
2088 def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
2089 (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
2090 GR32_ABCD)),
2091 x86_subreg_8bit_hi))>,
2092 Requires<[In64BitMode]>;
20882093 def : Pat<(srl GR16:$src, (i8 8)),
20892094 (EXTRACT_SUBREG
20902095 (MOVZX32_NOREXrr8
330330 def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
331331 def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
332332 def HasAES : Predicate<"Subtarget->hasAES()">;
333 def Promote16Bit : Predicate<"Subtarget->shouldPromote16Bit()">;
334 def NotPromote16Bit : Predicate<"!Subtarget->shouldPromote16Bit()">;
335333
336334 //===----------------------------------------------------------------------===//
337335 // X86 Instruction Format Definitions.
44494447 // avoid partial-register updates.
44504448 def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
44514449 def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
4452 def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>,
4453 Requires<[NotPromote16Bit]>;
4454
4450
4451 // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
44554452 def : Pat<(i32 (anyext GR16:$src)),
4456 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>,
4457 Requires<[Promote16Bit]>;
4453 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
44584454
44594455
44604456 //===----------------------------------------------------------------------===//
45454541 GR32_ABCD)),
45464542 x86_subreg_8bit_hi))>,
45474543 Requires<[In32BitMode]>;
4544 def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
4545 (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
4546 GR32_ABCD)),
4547 x86_subreg_8bit_hi))>,
4548 Requires<[In32BitMode]>;
45484549
45494550 // (shl x, 1) ==> (add x, x)
45504551 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
1515 #include "X86InstrInfo.h"
1616 #include "X86GenSubtarget.inc"
1717 #include "llvm/GlobalValue.h"
18 #include "llvm/Support/CommandLine.h"
1918 #include "llvm/Support/Debug.h"
2019 #include "llvm/Support/raw_ostream.h"
2120 #include "llvm/System/Host.h"
2322 #include "llvm/Target/TargetOptions.h"
2423 #include "llvm/ADT/SmallVector.h"
2524 using namespace llvm;
26
27 static cl::opt
28 DoPromote16Bit("promote-16bit", cl::Hidden,
29 cl::desc("Promote 16-bit instructions"));
3025
3126 #if defined(_MSC_VER)
3227 #include
297292 , IsBTMemSlow(false)
298293 , IsUAMemFast(false)
299294 , HasVectorUAMem(false)
300 , Promote16Bit(DoPromote16Bit)
301295 , DarwinVers(0)
302296 , stackAlignment(8)
303297 // FIXME: this is a known good value for Yonah. How about others?
8686 /// HasVectorUAMem - True if SIMD operations can have unaligned memory
8787 /// operands. This may require setting a feature bit in the processor.
8888 bool HasVectorUAMem;
89
90 /// Promote16Bit - True if codegen should promote 16-bit operations to 32-bit.
91 /// This is a temporary option.
92 bool Promote16Bit;
9389
9490 /// DarwinVers - Nonzero if this is a darwin platform: the numeric
9591 /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
159155 bool isBTMemSlow() const { return IsBTMemSlow; }
160156 bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
161157 bool hasVectorUAMem() const { return HasVectorUAMem; }
162 bool shouldPromote16Bit() const { return Promote16Bit; }
163158
164159 bool isTargetDarwin() const { return TargetType == isDarwin; }
165160 bool isTargetELF() const { return TargetType == isELF; }
22
33 ; CHECK: andl $65534, %
44 ; CHECK-NEXT: movl %
5 ; CHECK-NEXT: movzwl
56 ; CHECK-NEXT: movl $17
67
78 @g_5 = external global i16 ; [#uses=2]
None ; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 40
0 ; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 41
11 ; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s
22
33 %struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
6262 define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
6363 entry:
6464 ; CHECK: t:
65 ; CHECK: xorl %ecx, %ecx
6665 %0 = trunc i64 %key_token to i32 ; [#uses=1]
6766 %1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5]
6867 br label %bb1132
6968
7069 bb51: ; preds = %bb1132
7170 ; CHECK: .align 4
71 ; CHECK: xorl %ecx, %ecx
7272 ; CHECK: andl $7
7373 %2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; [#uses=1]
7474 %3 = load i16* %2, align 1 ; [#uses=3]
33 ; rdar://6808032
44
55 ; CHECK: pextrw $14
6 ; CHECK-NEXT: movzbl
6 ; CHECK-NEXT: shrl $8
77 ; CHECK-NEXT: (%ebp)
88 ; CHECK-NEXT: pinsrw
99
191191 define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
192192 entry:
193193 ; CHECK: sub2:
194 ; CHECK: subw
194 ; CHECK: negl
195195 %0 = trunc i32 %v to i16 ; [#uses=1]
196196 %1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0) ; [#uses=0]
197197 ret void
None ; RUN: llc < %s -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 4
1 ; RUN: llc < %s -march=x86 > %t
2 ; RUN: grep {incb %ah} %t | count 3
3 ; RUN: grep {movzbl %ah,} %t | count 3
0 ; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X86-64
1 ; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X86-32
42
53 ; Use h registers. On x86-64, codegen doesn't support general allocation
64 ; of h registers yet, due to x86 encoding complications.
75
86 define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
7 ; X86-64: bar64:
8 ; X86-64: shrq $8, %rdi
9 ; X86-64: incb %dil
10
11 ; X86-32: bar64:
12 ; X86-32: incb %ah
913 %t0 = lshr i64 %x, 8
1014 %t1 = trunc i64 %t0 to i8
1115 %t2 = add i8 %t1, 1
1418 }
1519
1620 define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
21 ; X86-64: bar32:
22 ; X86-64: shrl $8, %edi
23 ; X86-64: incb %dil
24
25 ; X86-32: bar32:
26 ; X86-32: incb %ah
1727 %t0 = lshr i32 %x, 8
1828 %t1 = trunc i32 %t0 to i8
1929 %t2 = add i8 %t1, 1
2232 }
2333
2434 define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
35 ; X86-64: bar16:
36 ; X86-64: shrl $8, %edi
37 ; X86-64: incb %dil
38
39 ; X86-32: bar16:
40 ; X86-32: incb %ah
2541 %t0 = lshr i16 %x, 8
2642 %t1 = trunc i16 %t0 to i8
2743 %t2 = add i8 %t1, 1
3046 }
3147
3248 define i64 @qux64(i64 inreg %x) nounwind {
49 ; X86-64: qux64:
50 ; X86-64: movq %rdi, %rax
51 ; X86-64: movzbl %ah, %eax
52
53 ; X86-32: qux64:
54 ; X86-32: movzbl %ah, %eax
3355 %t0 = lshr i64 %x, 8
3456 %t1 = and i64 %t0, 255
3557 ret i64 %t1
3658 }
3759
3860 define i32 @qux32(i32 inreg %x) nounwind {
61 ; X86-64: qux32:
62 ; X86-64: movl %edi, %eax
63 ; X86-64: movzbl %ah, %eax
64
65 ; X86-32: qux32:
66 ; X86-32: movzbl %ah, %eax
3967 %t0 = lshr i32 %x, 8
4068 %t1 = and i32 %t0, 255
4169 ret i32 %t1
4270 }
4371
4472 define i16 @qux16(i16 inreg %x) nounwind {
73 ; X86-64: qux16:
74 ; X86-64: movl %edi, %eax
75 ; X86-64: movzbl %ah, %eax
76
77 ; X86-32: qux16:
78 ; X86-32: movzbl %ah, %eax
4579 %t0 = lshr i16 %x, 8
4680 ret i16 %t0
4781 }
None ; RUN: llc < %s -march=x86 | grep mov | count 3
0 ; RUN: llc < %s -march=x86 | FileCheck %s
11
2 define fastcc i32 @sqlite3ExprResolveNames() nounwind {
2 define fastcc i32 @t() nounwind {
33 entry:
4 ; CHECK: t:
5 ; CHECK: movzwl 0, %eax
6 ; CHECK: orl $2, %eax
7 ; CHECK: movw %ax, 0
8 ; CHECK: shrl $3, %eax
9 ; CHECK: andl $1, %eax
410 br i1 false, label %UnifiedReturnBlock, label %bb4
511 bb4: ; preds = %entry
612 br i1 false, label %bb17, label %bb22
0 ; RUN: llc < %s -march=x86 | FileCheck %s
1
2 define signext i16 @foo(i16 signext %x) nounwind {
3 entry:
4 ; CHECK: foo:
5 ; CHECK: movzwl 4(%esp), %eax
6 ; CHECK: xorl $21998, %eax
7 ; CHECK: movswl %ax, %eax
8 %0 = xor i16 %x, 21998
9 ret i16 %0
10 }
6666 ; X64: movw %si, 2(%rdi)
6767
6868 ; X32: test4:
69 ; X32: movw 8(%esp), %ax
69 ; X32: movzwl 8(%esp), %eax
7070 ; X32: movw %ax, 2(%{{.*}})
7171 }
7272
8383 ; X64: movw %si, 2(%rdi)
8484
8585 ; X32: test5:
86 ; X32: movw 8(%esp), %ax
86 ; X32: movzwl 8(%esp), %eax
8787 ; X32: movw %ax, 2(%{{.*}})
8888 }
8989
0 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
1 ; RUN: grep {movw %gs:i@NTPOFF, %ax} %t
1 ; RUN: grep {movzwl %gs:i@NTPOFF, %eax} %t
22 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
3 ; RUN: grep {movw %fs:i@TPOFF, %ax} %t2
3 ; RUN: grep {movzwl %fs:i@TPOFF, %eax} %t2
44
55 @i = thread_local global i16 15
66
7979 bb12:
8080 ret i16 %tmp3
8181 ; X64: test5:
82 ; X64: notw [[REG:%[a-z]+]]
83 ; X64: andw {{.*}}[[REG]]
82 ; X64: notl [[REG:%[a-z]+]]
83 ; X64: andl {{.*}}[[REG]]
8484 ; X32: test5:
85 ; X32: notw [[REG:%[a-z]+]]
86 ; X32: andw {{.*}}[[REG]]
85 ; X32: notl [[REG:%[a-z]+]]
86 ; X32: andl {{.*}}[[REG]]
8787 }
8888
8989 define i8 @test6(i8 %a, i8 %b) nounwind {