llvm.org GIT mirror llvm / 25ab690
Committing X86-64 support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30177 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 13 years ago
25 changed file(s) with 3627 addition(s) and 486 deletion(s). Raw diff Collapse all Expand all
0 //===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
1
2 Implement different PIC models? Right now we only support Mac OS X with small
3 PIC code model.
4
5 //===---------------------------------------------------------------------===//
6
7 Make use of "Red Zone".
8
9 //===---------------------------------------------------------------------===//
10
11 Implement __int128 and long double support.
12
13 //===---------------------------------------------------------------------===//
14
15 For this:
16
17 extern void xx(void);
18 void bar(void) {
19 xx();
20 }
21
22 gcc compiles to:
23
24 .globl _bar
25 _bar:
26 jmp _xx
27
28 We need to do the tailcall optimization as well.
29
30 //===---------------------------------------------------------------------===//
31
32 For this:
33
34 int test(int a)
35 {
36 return a * 3;
37 }
38
39 We generates
40 leal (%edi,%edi,2), %eax
41
42 We should be generating
43 leal (%rdi,%rdi,2), %eax
44
45 instead. The later form does not require an address-size prefix 67H.
46
47 It's probably ok to simply emit the corresponding 64-bit super class registers
48 in this case?
49
50
51 //===---------------------------------------------------------------------===//
52
53 AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
54 multiplication by a constant. How much of it applies to Intel's X86-64
55 implementation? There are definite trade-offs to consider: latency vs. register
56 pressure vs. code size.
57
58 //===---------------------------------------------------------------------===//
59
60 Are we better off using branches instead of cmove to implement FP to
61 unsigned i64?
62
63 _conv:
64 ucomiss LC0(%rip), %xmm0
65 cvttss2siq %xmm0, %rdx
66 jb L3
67 subss LC0(%rip), %xmm0
68 movabsq $-9223372036854775808, %rax
69 cvttss2siq %xmm0, %rdx
70 xorq %rax, %rdx
71 L3:
72 movq %rdx, %rax
73 ret
74
75 instead of
76
77 _conv:
78 movss LCPI1_0(%rip), %xmm1
79 cvttss2siq %xmm0, %rcx
80 movaps %xmm0, %xmm2
81 subss %xmm1, %xmm2
82 cvttss2siq %xmm2, %rax
83 movabsq $-9223372036854775808, %rdx
84 xorq %rdx, %rax
85 ucomiss %xmm1, %xmm0
86 cmovb %rcx, %rax
87 ret
88
89 Seems like the jb branch has high likelyhood of being taken. It would have
90 saved a few instructions.
91
92 //===---------------------------------------------------------------------===//
93
94 Poor codegen:
95
96 int X[2];
97 int b;
98 void test(void) {
99 memset(X, b, 2*sizeof(X[0]));
100 }
101
102 llc:
103 movq _b@GOTPCREL(%rip), %rax
104 movzbq (%rax), %rax
105 movq %rax, %rcx
106 shlq $8, %rcx
107 orq %rax, %rcx
108 movq %rcx, %rax
109 shlq $16, %rax
110 orq %rcx, %rax
111 movq %rax, %rcx
112 shlq $32, %rcx
113 movq _X@GOTPCREL(%rip), %rdx
114 orq %rax, %rcx
115 movq %rcx, (%rdx)
116 ret
117
118 gcc:
119 movq _b@GOTPCREL(%rip), %rax
120 movabsq $72340172838076673, %rdx
121 movzbq (%rax), %rax
122 imulq %rdx, %rax
123 movq _X@GOTPCREL(%rip), %rdx
124 movq %rax, (%rdx)
125 ret
126
127 //===---------------------------------------------------------------------===//
128
129 Vararg function prologue can be further optimized. Currently all XMM registers
130 are stored into register save area. Most of them can be eliminated since the
131 upper bound of the number of XMM registers used are passed in %al. gcc produces
132 something like the following:
133
134 movzbl %al, %edx
135 leaq 0(,%rdx,4), %rax
136 leaq 4+L2(%rip), %rdx
137 leaq 239(%rsp), %rax
138 jmp *%rdx
139 movaps %xmm7, -15(%rax)
140 movaps %xmm6, -31(%rax)
141 movaps %xmm5, -47(%rax)
142 movaps %xmm4, -63(%rax)
143 movaps %xmm3, -79(%rax)
144 movaps %xmm2, -95(%rax)
145 movaps %xmm1, -111(%rax)
146 movaps %xmm0, -127(%rax)
147 L2:
148
149 It jumps over the movaps that do not need to be stored. Hard to see this being
150 significant as it added 5 instruciton (including a indirect branch) to avoid
151 executing 0 to 8 stores in the function prologue.
152
153 Perhaps we can optimize for the common case where no XMM registers are used for
154 parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
155 leaf function where we can determine that no XMM input parameter is need, avoid
156 emitting the stores at all.
157
158 //===---------------------------------------------------------------------===//
159
160 AMD64 has a complex calling convention for aggregate passing by value:
161
162 1. If the size of an object is larger than two eightbytes, or in C++, is a non-
163 POD structure or union type, or contains unaligned fields, it has class
164 MEMORY.
165 2. Both eightbytes get initialized to class NO_CLASS.
166 3. Each field of an object is classified recursively so that always two fields
167 are considered. The resulting class is calculated according to the classes
168 of the fields in the eightbyte:
169 (a) If both classes are equal, this is the resulting class.
170 (b) If one of the classes is NO_CLASS, the resulting class is the other
171 class.
172 (c) If one of the classes is MEMORY, the result is the MEMORY class.
173 (d) If one of the classes is INTEGER, the result is the INTEGER.
174 (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
175 class.
176 (f) Otherwise class SSE is used.
177 4. Then a post merger cleanup is done:
178 (a) If one of the classes is MEMORY, the whole argument is passed in memory.
179 (b) If SSEUP is not preceeded by SSE, it is converted to SSE.
180
181 Currently llvm frontend does not handle this correctly.
182
183 Problem 1:
184 typedef struct { int i; double d; } QuadWordS;
185 It is currently passed in two i64 integer registers. However, gcc compiled
186 callee expects the second element 'd' to be passed in XMM0.
187
188 Problem 2:
189 typedef struct { int32_t i; float j; double d; } QuadWordS;
190 The size of the first two fields == i64 so they will be combined and passed in
191 a integer register RDI. The third field is still passed in XMM0.
192
193 Problem 3:
194 typedef struct { int64_t i; int8_t j; int64_t d; } S;
195 void test(S s)
196 The size of this aggregate is greater than two i64 so it should be passed in
197 memory. Currently llvm breaks this down and passed it in three integer
198 registers.
199
200 Problem 4:
201 Taking problem 3 one step ahead where a function expects a aggregate value
202 in memory followed by more parameter(s) passed in register(s).
203 void test(S s, int b)
204
205 LLVM IR does not allow parameter passing by aggregates, therefore it must break
206 the aggregates value (in problem 3 and 4) into a number of scalar values:
207 void %test(long %s.i, byte %s.j, long %s.d);
208
209 However, if the backend were to lower this code literally it would pass the 3
210 values in integer registers. To force it be passed in memory, the frontend
211 should change the function signiture to:
212 void %test(long %undef1, long %undef2, long %undef3, long %undef4,
213 long %undef5, long %undef6,
214 long %s.i, byte %s.j, long %s.d);
215 And the callee would look something like this:
216 call void %test( undef, undef, undef, undef, undef, undef,
217 %tmp.s.i, %tmp.s.j, %tmp.s.d );
218 The first 6 undef parameters would exhaust the 6 integer registers used for
219 parameter passing. The following three integer values would then be forced into
220 memory.
221
222 For problem 4, the parameter 'd' would be moved to the front of the parameter
223 list so it will be passed in register:
224 void %test(int %d,
225 long %undef1, long %undef2, long %undef3, long %undef4,
226 long %undef5, long %undef6,
227 long %s.i, byte %s.j, long %s.d);
228
229 //===---------------------------------------------------------------------===//
230
231 For this:
232
233 extern int dst[];
234 extern int* ptr;
235
236 void test(void) {
237 ptr = dst;
238 }
239
240 We generate this code for static relocation model:
241
242 _test:
243 leaq _dst(%rip), %rax
244 movq %rax, _ptr(%rip)
245 ret
246
247 If we are in small code model, they we can treat _dst as a 32-bit constant.
248 movq $_dst, _ptr(%rip)
249
250 Note, however, we should continue to use RIP relative addressing mode as much as
251 possible. The above is actually one byte shorter than
252 movq $_dst, _ptr
253
254 //===---------------------------------------------------------------------===//
255
256 Right now the asm printer assumes GlobalAddress are accessed via RIP relative
257 addressing. Therefore, it is not possible to generate this:
258 movabsq $__ZTV10polynomialIdE+16, %rax
259
260 That is ok for now since we currently only support small model. So the above
261 is selected as
262 leaq __ZTV10polynomialIdE+16(%rip), %rax
263
264 This is probably slightly slower but is much shorter than movabsq. However, if
265 we were to support medium or larger code models, we need to use the movabs
266 instruction. We should probably introduce something like AbsoluteAddress to
267 distinguish it from GlobalAddress so the asm printer and JIT code emitter can
268 do the right thing.
1919 // X86 Subtarget features.
2020 //
2121
22 def Feature64Bit : SubtargetFeature<"64bit", "Is64Bit", "true",
23 "Enable 64-bit instructions">;
22 def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
23 "Support 64-bit instructions">;
2424 def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
2525 "Enable MMX instructions">;
2626 def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
6060 FeatureSSE3]>;
6161 def : Proc<"nocona", [FeatureMMX, FeatureSSE1, FeatureSSE2,
6262 FeatureSSE3, Feature64Bit]>;
63 def : Proc<"core2", [FeatureMMX, FeatureSSE1, FeatureSSE2,
64 FeatureSSE3, Feature64Bit]>;
6365
6466 def : Proc<"k6", [FeatureMMX]>;
6567 def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
104106 // should be kept up-to-date with the fields in the X86InstrInfo.h file.
105107 let TSFlagsFields = ["FormBits",
106108 "hasOpSizePrefix",
109 "hasAdSizePrefix",
107110 "Prefix",
111 "hasREX_WPrefix",
108112 "ImmTypeBits",
109113 "FPFormBits",
110114 "Opcode"];
111115 let TSFlagsShifts = [0,
112116 6,
113117 7,
114 11,
118 8,
119 12,
115120 13,
116 16];
121 16,
122 24];
117123 }
118124
119125 // The X86 target supports two different syntaxes for emitting machine code.
125125 O << '%';
126126 unsigned Reg = MO.getReg();
127127 if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
128 MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0)
129 ? MVT::i16 : MVT::i8;
128 MVT::ValueType VT = (strcmp(Modifier+6,"64") == 0) ?
129 MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
130 ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
130131 Reg = getX86SubSuperRegister(Reg, VT);
131132 }
132133 for (const char *Name = RI.get(Reg).Name; *Name; ++Name)
147148 if (!isMemOp) O << '$';
148149 O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << "_"
149150 << MO.getJumpTableIndex();
150 if (Subtarget->isTargetDarwin() &&
151 if (X86PICStyle == PICStyle::Stub &&
151152 TM.getRelocationModel() == Reloc::PIC_)
152153 O << "-\"L" << getFunctionNumber() << "$pb\"";
154 if (Subtarget->is64Bit())
155 O << "(%rip)";
153156 return;
154157 }
155158 case MachineOperand::MO_ConstantPoolIndex: {
157160 if (!isMemOp) O << '$';
158161 O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
159162 << MO.getConstantPoolIndex();
160 if (Subtarget->isTargetDarwin() &&
163 if (X86PICStyle == PICStyle::Stub &&
161164 TM.getRelocationModel() == Reloc::PIC_)
162165 O << "-\"L" << getFunctionNumber() << "$pb\"";
163166 int Offset = MO.getOffset();
165168 O << "+" << Offset;
166169 else if (Offset < 0)
167170 O << Offset;
171
172 if (Subtarget->is64Bit())
173 O << "(%rip)";
168174 return;
169175 }
170176 case MachineOperand::MO_GlobalAddress: {
171177 bool isCallOp = Modifier && !strcmp(Modifier, "call");
172178 bool isMemOp = Modifier && !strcmp(Modifier, "mem");
173179 if (!isMemOp && !isCallOp) O << '$';
174 // Darwin block shameless ripped from PPCAsmPrinter.cpp
175 if (Subtarget->isTargetDarwin() &&
180
181 GlobalValue *GV = MO.getGlobal();
182 std::string Name = Mang->getValueName(GV);
183 bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
184 GV->hasLinkOnceLinkage());
185 if (X86PICStyle == PICStyle::Stub &&
176186 TM.getRelocationModel() != Reloc::Static) {
177 GlobalValue *GV = MO.getGlobal();
178 std::string Name = Mang->getValueName(GV);
179187 // Link-once, External, or Weakly-linked global variables need
180188 // non-lazily-resolved stubs
181 if (GV->isExternal() || GV->hasWeakLinkage() ||
182 GV->hasLinkOnceLinkage()) {
189 if (isExt) {
183190 // Dynamically-resolved functions need a stub for the function.
184 if (isCallOp && isa(GV) && cast(GV)->isExternal()) {
191 if (isCallOp && isa(GV)) {
185192 FnStubs.insert(Name);
186193 O << "L" << Name << "$stub";
187194 } else {
188195 GVStubs.insert(Name);
189196 O << "L" << Name << "$non_lazy_ptr";
190197 }
191 } else {
192 O << Mang->getValueName(GV);
193 }
198 } else
199 O << Name;
194200 if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
195201 O << "-\"L" << getFunctionNumber() << "$pb\"";
196 } else
197 O << Mang->getValueName(MO.getGlobal());
202 } else
203 O << Name;
204
198205 int Offset = MO.getOffset();
199206 if (Offset > 0)
200207 O << "+" << Offset;
201208 else if (Offset < 0)
202209 O << Offset;
210
211 if (!isCallOp &&
212 Subtarget->is64Bit()) {
213 if (isExt && TM.getRelocationModel() != Reloc::Static)
214 O << "@GOTPCREL";
215 O << "(%rip)";
216 }
217
203218 return;
204219 }
205220 case MachineOperand::MO_ExternalSymbol: {
206221 bool isCallOp = Modifier && !strcmp(Modifier, "call");
207222 if (isCallOp &&
208 Subtarget->isTargetDarwin() &&
223 X86PICStyle == PICStyle::Stub &&
209224 TM.getRelocationModel() != Reloc::Static) {
210225 std::string Name(TAI->getGlobalPrefix());
211226 Name += MO.getSymbolName();
215230 }
216231 if (!isCallOp) O << '$';
217232 O << TAI->getGlobalPrefix() << MO.getSymbolName();
233
234 if (!isCallOp &&
235 Subtarget->is64Bit())
236 O << "(%rip)";
237
218238 return;
219239 }
220240 default:
237257 }
238258 }
239259
240 void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
260 void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
261 const char *Modifier){
241262 assert(isMem(MI, Op) && "Invalid memory reference!");
242263
243264 const MachineOperand &BaseReg = MI->getOperand(Op);
265286
266287 if (IndexReg.getReg() || BaseReg.getReg()) {
267288 O << "(";
268 if (BaseReg.getReg())
269 printOperand(MI, Op);
289 if (BaseReg.getReg()) {
290 printOperand(MI, Op, Modifier);
291 }
270292
271293 if (IndexReg.getReg()) {
272294 O << ",";
273 printOperand(MI, Op+2);
295 printOperand(MI, Op+2, Modifier);
274296 if (ScaleVal != 1)
275297 O << "," << ScaleVal;
276298 }
349371 ///
350372 void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
351373 ++EmittedInsts;
352 // This works around some Darwin assembler bugs.
353 if (Subtarget->isTargetDarwin()) {
354 switch (MI->getOpcode()) {
355 case X86::REP_MOVSB:
356 O << "rep/movsb (%esi),(%edi)\n";
357 return;
358 case X86::REP_MOVSD:
359 O << "rep/movsl (%esi),(%edi)\n";
360 return;
361 case X86::REP_MOVSW:
362 O << "rep/movsw (%esi),(%edi)\n";
363 return;
364 case X86::REP_STOSB:
365 O << "rep/stosb\n";
366 return;
367 case X86::REP_STOSD:
368 O << "rep/stosl\n";
369 return;
370 case X86::REP_STOSW:
371 O << "rep/stosw\n";
372 return;
373 default:
374 break;
375 }
376 }
377374
378375 // See if a truncate instruction can be turned into a nop.
379376 switch (MI->getOpcode()) {
380377 default: break;
381 case X86::TRUNC_GR32_GR16:
382 case X86::TRUNC_GR32_GR8:
383 case X86::TRUNC_GR16_GR8: {
378 case X86::TRUNC_64to32:
379 case X86::TRUNC_64to16:
380 case X86::TRUNC_32to16:
381 case X86::TRUNC_32to8:
382 case X86::TRUNC_16to8:
383 case X86::TRUNC_32_to8:
384 case X86::TRUNC_16_to8: {
384385 const MachineOperand &MO0 = MI->getOperand(0);
385386 const MachineOperand &MO1 = MI->getOperand(1);
386387 unsigned Reg0 = MO0.getReg();
387388 unsigned Reg1 = MO1.getReg();
388 if (MI->getOpcode() == X86::TRUNC_GR32_GR16)
389 unsigned Opc = MI->getOpcode();
390 if (Opc == X86::TRUNC_64to32)
391 Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
392 else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
389393 Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
390394 else
391395 Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
394398 O << "\n\t";
395399 break;
396400 }
401 case X86::PsMOVZX64rr32:
402 O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
403 break;
397404 }
398405
399406 // Call the autogenerated instruction printer routines.
5959 void printf128mem(const MachineInstr *MI, unsigned OpNo) {
6060 printMemReference(MI, OpNo);
6161 }
62 void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
63 printMemReference(MI, OpNo, "subreg64");
64 }
6265
6366 bool printAsmMRegister(const MachineOperand &MO, const char Mode);
6467 bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
6871
6972 void printMachineInstruction(const MachineInstr *MI);
7073 void printSSECC(const MachineInstr *MI, unsigned Op);
71 void printMemReference(const MachineInstr *MI, unsigned Op);
74 void printMemReference(const MachineInstr *MI, unsigned Op,
75 const char *Modifier=NULL);
7276 void printPICLabel(const MachineInstr *MI, unsigned Op);
7377 bool runOnMachineFunction(MachineFunction &F);
7478 };
2929 "Number of machine instrs printed");
3030
3131 /// doInitialization
32 bool X86SharedAsmPrinter::doInitialization(Module &M) {
32 bool X86SharedAsmPrinter::doInitialization(Module &M) {
3333 if (Subtarget->isTargetDarwin()) {
34 const X86Subtarget *Subtarget = &TM.getSubtarget();
35 if (!Subtarget->is64Bit())
36 X86PICStyle = PICStyle::Stub;
37
3438 // Emit initial debug information.
3539 DW.BeginModule(&M);
3640 }
2828
2929 extern Statistic<> EmittedInsts;
3030
31 // FIXME: Move this to CodeGen/AsmPrinter.h
32 namespace PICStyle {
33 enum X86AsmPICStyle {
34 Stub, GOT
35 };
36 }
37
3138 struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
3239 DwarfWriter DW;
3340
3441 X86SharedAsmPrinter(std::ostream &O, X86TargetMachine &TM,
3542 const TargetAsmInfo *T)
36 : AsmPrinter(O, TM, T), DW(O, this, T) {
43 : AsmPrinter(O, TM, T), DW(O, this, T), X86PICStyle(PICStyle::GOT) {
3744 Subtarget = &TM.getSubtarget();
3845 }
3946
4855 MachineFunctionPass::getAnalysisUsage(AU);
4956 }
5057
58 PICStyle::X86AsmPICStyle X86PICStyle;
59
5160 const X86Subtarget *Subtarget;
5261
5362 // Necessary for Darwin to print out the apprioriate types of linker stubs
1111 //
1212 //===----------------------------------------------------------------------===//
1313
14 #include "X86InstrInfo.h"
15 #include "X86Subtarget.h"
1416 #include "X86TargetMachine.h"
1517 #include "X86Relocations.h"
1618 #include "X86.h"
3436 namespace {
3537 class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
3638 const X86InstrInfo *II;
37 TargetMachine &TM;
39 const TargetData *TD;
40 TargetMachine &TM;
3841 MachineCodeEmitter &MCE;
42 bool Is64BitMode;
3943 public:
4044 explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce)
41 : II(0), TM(tm), MCE(mce) {}
45 : II(0), TD(0), TM(tm), MCE(mce), Is64BitMode(false) {}
4246 Emitter(TargetMachine &tm, MachineCodeEmitter &mce,
43 const X86InstrInfo& ii)
44 : II(&ii), TM(tm), MCE(mce) {}
47 const X86InstrInfo &ii, const TargetData &td, bool is64)
48 : II(&ii), TD(&td), TM(tm), MCE(mce), Is64BitMode(is64) {}
4549
4650 bool runOnMachineFunction(MachineFunction &MF);
4751
5357
5458 private:
5559 void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
56 void emitPCRelativeValue(unsigned Address);
57 void emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall);
58 void emitGlobalAddressForPtr(GlobalValue *GV, int Disp = 0);
60 void emitPCRelativeValue(intptr_t Address);
61 void emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub);
62 void emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
63 int Disp = 0, unsigned PCAdj = 0);
5964 void emitExternalSymbolAddress(const char *ES, bool isPCRelative);
60
61 void emitDisplacementField(const MachineOperand *RelocOp, int DispVal);
65 void emitPCRelativeConstPoolAddress(unsigned CPI, int Disp = 0,
66 unsigned PCAdj = 0);
67 void emitPCRelativeJumpTableAddress(unsigned JTI, unsigned PCAdj = 0);
68
69 void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
70 unsigned PCAdj = 0);
6271
6372 void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
6473 void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
65 void emitConstant(unsigned Val, unsigned Size);
74 void emitConstant(uint64_t Val, unsigned Size);
6675
6776 void emitMemModRMByte(const MachineInstr &MI,
68 unsigned Op, unsigned RegOpcodeField);
69
77 unsigned Op, unsigned RegOpcodeField,
78 unsigned PCAdj = 0);
79
80 unsigned getX86RegNum(unsigned RegNo);
81 bool isX86_64ExtendedReg(const MachineOperand &MO);
82 unsigned determineREX(const MachineInstr &MI);
7083 };
7184 }
7285
8295 MF.getTarget().getRelocationModel() != Reloc::Static) &&
8396 "JIT relocation model must be set to static or default!");
8497 II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo();
98 TD = ((X86TargetMachine&)MF.getTarget()).getTargetData();
99 Is64BitMode =
100 ((X86TargetMachine&)MF.getTarget()).getSubtarget().is64Bit();
85101
86102 do {
87103 MCE.startFunction(MF);
97113 return false;
98114 }
99115
100 /// emitPCRelativeValue - Emit a 32-bit PC relative address.
116 /// emitPCRelativeValue - Emit a PC relative address.
101117 ///
102 void Emitter::emitPCRelativeValue(unsigned Address) {
118 void Emitter::emitPCRelativeValue(intptr_t Address) {
103119 MCE.emitWordLE(Address-MCE.getCurrentPCValue()-4);
104120 }
105121
118134 /// emitGlobalAddressForCall - Emit the specified address to the code stream
119135 /// assuming this is part of a function call, which is PC relative.
120136 ///
121 void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall) {
137 void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub) {
122138 MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
123139 X86::reloc_pcrel_word, GV, 0,
124 !isTailCall /*Doesn'tNeedStub*/));
140 DoesntNeedStub));
125141 MCE.emitWordLE(0);
126142 }
127143
128144 /// emitGlobalAddress - Emit the specified address to the code stream assuming
129 /// this is part of a "take the address of a global" instruction, which is not
130 /// PC relative.
145 /// this is part of a "take the address of a global" instruction.
131146 ///
132 void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, int Disp /* = 0 */) {
133 MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
134 X86::reloc_absolute_word, GV));
147 void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
148 int Disp /* = 0 */,
149 unsigned PCAdj /* = 0 */) {
150 unsigned rt = isPCRelative ? X86::reloc_pcrel_word : X86::reloc_absolute_word;
151 MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), rt,
152 GV, PCAdj));
135153 MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
136154 }
137155
144162 MCE.emitWordLE(0);
145163 }
146164
165 /// emitPCRelativeConstPoolAddress - Arrange for the address of an constant pool
166 /// to be emitted to the current location in the function, and allow it to be PC
167 /// relative.
168 void Emitter::emitPCRelativeConstPoolAddress(unsigned CPI, int Disp /* = 0 */,
169 unsigned PCAdj /* = 0 */) {
170 MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
171 X86::reloc_pcrel_word, CPI, PCAdj));
172 MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
173 }
174
175 /// emitPCRelativeJumpTableAddress - Arrange for the address of a jump table to
176 /// be emitted to the current location in the function, and allow it to be PC
177 /// relative.
178 void Emitter::emitPCRelativeJumpTableAddress(unsigned JTI,
179 unsigned PCAdj /* = 0 */) {
180 MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
181 X86::reloc_pcrel_word, JTI, PCAdj));
182 MCE.emitWordLE(0); // The relocated value will be added to the displacement
183 }
184
147185 /// N86 namespace - Native X86 Register numbers... used by X86 backend.
148186 ///
149187 namespace N86 {
152190 };
153191 }
154192
155
156193 // getX86RegNum - This function maps LLVM register identifiers to their X86
157194 // specific numbering, which is used in various places encoding instructions.
158195 //
159 static unsigned getX86RegNum(unsigned RegNo) {
196 unsigned Emitter::getX86RegNum(unsigned RegNo) {
160197 switch(RegNo) {
161 case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
162 case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
163 case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
164 case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
165 case X86::ESP: case X86::SP: case X86::AH: return N86::ESP;
166 case X86::EBP: case X86::BP: case X86::CH: return N86::EBP;
167 case X86::ESI: case X86::SI: case X86::DH: return N86::ESI;
168 case X86::EDI: case X86::DI: case X86::BH: return N86::EDI;
198 case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
199 case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
200 case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
201 case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
202 case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
203 return N86::ESP;
204 case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
205 return N86::EBP;
206 case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
207 return N86::ESI;
208 case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
209 return N86::EDI;
210
211 case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
212 return N86::EAX;
213 case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
214 return N86::ECX;
215 case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
216 return N86::EDX;
217 case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
218 return N86::EBX;
219 case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
220 return N86::ESP;
221 case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
222 return N86::EBP;
223 case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
224 return N86::ESI;
225 case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
226 return N86::EDI;
169227
170228 case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
171229 case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
172230 return RegNo-X86::ST0;
173231
174 case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
175 case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
176 return RegNo-X86::XMM0;
232 case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
233 case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
234 return II->getRegisterInfo().getDwarfRegNum(RegNo) -
235 II->getRegisterInfo().getDwarfRegNum(X86::XMM0);
236 case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
237 case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
238 return II->getRegisterInfo().getDwarfRegNum(RegNo) -
239 II->getRegisterInfo().getDwarfRegNum(X86::XMM8);
177240
178241 default:
179242 assert(MRegisterInfo::isVirtualRegister(RegNo) &&
198261 MCE.emitByte(ModRMByte(SS, Index, Base));
199262 }
200263
201 void Emitter::emitConstant(unsigned Val, unsigned Size) {
264 void Emitter::emitConstant(uint64_t Val, unsigned Size) {
202265 // Output the constant in little endian byte order...
203266 for (unsigned i = 0; i != Size; ++i) {
204267 MCE.emitByte(Val & 255);
213276 }
214277
215278 void Emitter::emitDisplacementField(const MachineOperand *RelocOp,
216 int DispVal) {
279 int DispVal, unsigned PCAdj) {
217280 // If this is a simple integer displacement that doesn't require a relocation,
218281 // emit it now.
219282 if (!RelocOp) {
224287 // Otherwise, this is something that requires a relocation. Emit it as such
225288 // now.
226289 if (RelocOp->isGlobalAddress()) {
227 emitGlobalAddressForPtr(RelocOp->getGlobal(), RelocOp->getOffset());
290 // In 64-bit static small code model, we could potentially emit absolute.
291 // But it's probably not beneficial.
292 // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
293 // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
294 emitGlobalAddressForPtr(RelocOp->getGlobal(), Is64BitMode,
295 RelocOp->getOffset(), PCAdj);
296 } else if (RelocOp->isConstantPoolIndex()) {
297 // Must be in 64-bit mode.
298 emitPCRelativeConstPoolAddress(RelocOp->getConstantPoolIndex(),
299 RelocOp->getOffset(), PCAdj);
300 } else if (RelocOp->isJumpTableIndex()) {
301 // Must be in 64-bit mode.
302 emitPCRelativeJumpTableAddress(RelocOp->getJumpTableIndex(), PCAdj);
228303 } else {
229304 assert(0 && "Unknown value to relocate!");
230305 }
231306 }
232307
233308 void Emitter::emitMemModRMByte(const MachineInstr &MI,
234 unsigned Op, unsigned RegOpcodeField) {
309 unsigned Op, unsigned RegOpcodeField,
310 unsigned PCAdj) {
235311 const MachineOperand &Op3 = MI.getOperand(Op+3);
236312 int DispVal = 0;
237313 const MachineOperand *DispForReloc = 0;
240316 if (Op3.isGlobalAddress()) {
241317 DispForReloc = &Op3;
242318 } else if (Op3.isConstantPoolIndex()) {
243 DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
244 DispVal += Op3.getOffset();
319 if (Is64BitMode) {
320 DispForReloc = &Op3;
321 } else {
322 DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
323 DispVal += Op3.getOffset();
324 }
245325 } else if (Op3.isJumpTableIndex()) {
246 DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
326 if (Is64BitMode) {
327 DispForReloc = &Op3;
328 } else {
329 DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
330 }
247331 } else {
248332 DispVal = Op3.getImm();
249333 }
255339 unsigned BaseReg = Base.getReg();
256340
257341 // Is a SIB byte needed?
258 if (IndexReg.getReg() == 0 && BaseReg != X86::ESP) {
342 if (IndexReg.getReg() == 0 &&
343 (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
259344 if (BaseReg == 0) { // Just a displacement?
260345 // Emit special case [disp32] encoding
261346 MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
262347
263 emitDisplacementField(DispForReloc, DispVal);
348 emitDisplacementField(DispForReloc, DispVal, PCAdj);
264349 } else {
265350 unsigned BaseRegNo = getX86RegNum(BaseReg);
266351 if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
273358 } else {
274359 // Emit the most general non-SIB encoding: [REG+disp32]
275360 MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
276 emitDisplacementField(DispForReloc, DispVal);
361 emitDisplacementField(DispForReloc, DispVal, PCAdj);
277362 }
278363 }
279364
280365 } else { // We need a SIB byte, so start by outputting the ModR/M byte first
281 assert(IndexReg.getReg() != X86::ESP && "Cannot use ESP as index reg!");
366 assert(IndexReg.getReg() != X86::ESP &&
367 IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
282368
283369 bool ForceDisp32 = false;
284370 bool ForceDisp8 = false;
291377 // Emit the normal disp32 encoding.
292378 MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
293379 ForceDisp32 = true;
294 } else if (DispVal == 0 && BaseReg != X86::EBP) {
380 } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
295381 // Emit no displacement ModR/M byte
296382 MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
297383 } else if (isDisp8(DispVal)) {
326412 if (ForceDisp8) {
327413 emitConstant(DispVal, 1);
328414 } else if (DispVal != 0 || ForceDisp32) {
329 emitDisplacementField(DispForReloc, DispVal);
415 emitDisplacementField(DispForReloc, DispVal, PCAdj);
330416 }
331417 }
332418 }
336422 case X86II::Imm8: return 1;
337423 case X86II::Imm16: return 2;
338424 case X86II::Imm32: return 4;
425 case X86II::Imm64: return 8;
339426 default: assert(0 && "Immediate size not set!");
340427 return 0;
341428 }
342429 }
343430
431 /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
432 /// e.g. r8, xmm8, etc.
433 bool Emitter::isX86_64ExtendedReg(const MachineOperand &MO) {
434 if (!MO.isRegister()) return false;
435 unsigned RegNo = MO.getReg();
436 int DWNum = II->getRegisterInfo().getDwarfRegNum(RegNo);
437 if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::R8) &&
438 DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::R15))
439 return true;
440 if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::XMM8) &&
441 DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::XMM15))
442 return true;
443 return false;
444 }
445
446 inline static bool isX86_64TruncToByte(unsigned oc) {
447 return (oc == X86::TRUNC_64to8 || oc == X86::TRUNC_32to8 ||
448 oc == X86::TRUNC_16to8);
449 }
450
451
452 inline static bool isX86_64NonExtLowByteReg(unsigned reg) {
453 return (reg == X86::SPL || reg == X86::BPL ||
454 reg == X86::SIL || reg == X86::DIL);
455 }
456
457 /// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
458 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
459 /// size, and 3) use of X86-64 extended registers.
460 unsigned Emitter::determineREX(const MachineInstr &MI) {
461 unsigned REX = 0;
462 unsigned Opcode = MI.getOpcode();
463 const TargetInstrDescriptor &Desc = II->get(Opcode);
464
465 // Pseudo instructions do not need REX prefix byte.
466 if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
467 return 0;
468 if (Desc.TSFlags & X86II::REX_W)
469 REX |= 1 << 3;
470
471 if (MI.getNumOperands()) {
472 // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
473 bool isTrunc8 = isX86_64TruncToByte(Opcode);
474 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
475 const MachineOperand& MO = MI.getOperand(i);
476 if (MO.isRegister()) {
477 unsigned Reg = MO.getReg();
478 // Trunc to byte are actually movb. The real source operand is the low
479 // byte of the register.
480 if (isTrunc8 && i == 1)
481 Reg = getX86SubSuperRegister(Reg, MVT::i8);
482 if (isX86_64NonExtLowByteReg(Reg))
483 REX |= 0x40;
484 }
485 }
486
487 switch (Desc.TSFlags & X86II::FormMask) {
488 case X86II::MRMInitReg:
489 if (isX86_64ExtendedReg(MI.getOperand(0)))
490 REX |= (1 << 0) | (1 << 2);
491 break;
492 case X86II::MRMSrcReg: {
493 if (isX86_64ExtendedReg(MI.getOperand(0)))
494 REX |= 1 << 2;
495 for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) {
496 const MachineOperand& MO = MI.getOperand(i);
497 if (isX86_64ExtendedReg(MO))
498 REX |= 1 << 0;
499 }
500 break;
501 }
502 case X86II::MRMSrcMem: {
503 if (isX86_64ExtendedReg(MI.getOperand(0)))
504 REX |= 1 << 2;
505 unsigned Bit = 0;
506 for (unsigned i = 1; i != 5; ++i) {
507 const MachineOperand& MO = MI.getOperand(i);
508 if (MO.isRegister()) {
509 if (isX86_64ExtendedReg(MO))
510 REX |= 1 << Bit;
511 Bit++;
512 }
513 }
514 break;
515 }
516 case X86II::MRM0m: case X86II::MRM1m:
517 case X86II::MRM2m: case X86II::MRM3m:
518 case X86II::MRM4m: case X86II::MRM5m:
519 case X86II::MRM6m: case X86II::MRM7m:
520 case X86II::MRMDestMem: {
521 if (MI.getNumOperands() >= 5 &&
522 isX86_64ExtendedReg(MI.getOperand(4)))
523 REX |= 1 << 2;
524 unsigned Bit = 0;
525 for (unsigned i = 0; i != 4; ++i) {
526 const MachineOperand& MO = MI.getOperand(i);
527 if (MO.isRegister()) {
528 if (isX86_64ExtendedReg(MO))
529 REX |= 1 << Bit;
530 Bit++;
531 }
532 }
533 break;
534 }
535 default: {
536 if (isX86_64ExtendedReg(MI.getOperand(0)))
537 REX |= 1 << 0;
538 for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) {
539 const MachineOperand& MO = MI.getOperand(i);
540 if (isX86_64ExtendedReg(MO))
541 REX |= 1 << 2;
542 }
543 break;
544 }
545 }
546 }
547 return REX;
548 }
549
344550 void Emitter::emitInstruction(const MachineInstr &MI) {
345551 NumEmitted++; // Keep track of the # of mi's emitted
346552
353559 // Emit the operand size opcode prefix as needed.
354560 if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);
355561
562 // Emit the address size opcode prefix as needed.
563 if (Desc.TSFlags & X86II::AdSize) MCE.emitByte(0x67);
564
565 bool Need0FPrefix = false;
356566 switch (Desc.TSFlags & X86II::Op0Mask) {
357567 case X86II::TB:
358 MCE.emitByte(0x0F); // Two-byte opcode prefix
568 Need0FPrefix = true; // Two-byte opcode prefix
359569 break;
360570 case X86II::REP: break; // already handled.
361571 case X86II::XS: // F3 0F
362572 MCE.emitByte(0xF3);
363 MCE.emitByte(0x0F);
573 Need0FPrefix = true;
364574 break;
365575 case X86II::XD: // F2 0F
366576 MCE.emitByte(0xF2);
367 MCE.emitByte(0x0F);
577 Need0FPrefix = true;
368578 break;
369579 case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
370580 case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
375585 default: assert(0 && "Invalid prefix!");
376586 case 0: break; // No prefix!
377587 }
588
589 if (Is64BitMode) {
590 // REX prefix
591 unsigned REX = determineREX(MI);
592 if (REX)
593 MCE.emitByte(0x40 | REX);
594 }
595
596 // 0x0F escape code must be emitted just before the opcode.
597 if (Need0FPrefix)
598 MCE.emitByte(0x0F);
378599
379600 // If this is a two-address instruction, skip one of the register operands.
380601 unsigned CurOp = 0;
396617 case X86::IMPLICIT_DEF_GR8:
397618 case X86::IMPLICIT_DEF_GR16:
398619 case X86::IMPLICIT_DEF_GR32:
620 case X86::IMPLICIT_DEF_GR64:
399621 case X86::IMPLICIT_DEF_FR32:
400622 case X86::IMPLICIT_DEF_FR64:
401623 case X86::IMPLICIT_DEF_VR64:
416638 } else if (MO.isGlobalAddress()) {
417639 bool isTailCall = Opcode == X86::TAILJMPd ||
418640 Opcode == X86::TAILJMPr || Opcode == X86::TAILJMPm;
419 emitGlobalAddressForCall(MO.getGlobal(), isTailCall);
641 emitGlobalAddressForCall(MO.getGlobal(), !isTailCall);
420642 } else if (MO.isExternalSymbol()) {
421643 emitExternalSymbolAddress(MO.getSymbolName(), true);
422644 } else if (MO.isImmediate()) {
433655 if (CurOp != MI.getNumOperands()) {
434656 const MachineOperand &MO1 = MI.getOperand(CurOp++);
435657 if (MO1.isGlobalAddress()) {
436 assert(sizeOfImm(Desc) == 4 &&
658 assert(sizeOfImm(Desc) == TD->getPointerSize() &&
437659 "Don't know how to emit non-pointer values!");
438 emitGlobalAddressForPtr(MO1.getGlobal(), MO1.getOffset());
660 emitGlobalAddressForPtr(MO1.getGlobal(), Is64BitMode, MO1.getOffset());
439661 } else if (MO1.isExternalSymbol()) {
440 assert(sizeOfImm(Desc) == 4 &&
662 assert(sizeOfImm(Desc) == TD->getPointerSize() &&
441663 "Don't know how to emit non-pointer values!");
442664 emitExternalSymbolAddress(MO1.getSymbolName(), false);
443665 } else if (MO1.isJumpTableIndex()) {
444 assert(sizeOfImm(Desc) == 4 &&
666 assert(sizeOfImm(Desc) == TD->getPointerSize() &&
445667 "Don't know how to emit non-pointer values!");
446668 emitConstant(MCE.getJumpTableEntryAddress(MO1.getJumpTableIndex()), 4);
447669 } else {
459681 emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
460682 break;
461683 }
462 case X86II::MRMDestMem:
684 case X86II::MRMDestMem: {
463685 MCE.emitByte(BaseOpcode);
464686 emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(CurOp+4).getReg()));
465687 CurOp += 5;
466688 if (CurOp != MI.getNumOperands())
467689 emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
468690 break;
691 }
469692
470693 case X86II::MRMSrcReg:
471694 MCE.emitByte(BaseOpcode);
476699 emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
477700 break;
478701
479 case X86II::MRMSrcMem:
702 case X86II::MRMSrcMem: {
703 unsigned PCAdj = (CurOp+5 != MI.getNumOperands()) ? sizeOfImm(Desc) : 0;
704
480705 MCE.emitByte(BaseOpcode);
481 emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()));
706 emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
707 PCAdj);
482708 CurOp += 5;
483709 if (CurOp != MI.getNumOperands())
484710 emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
485711 break;
712 }
486713
487714 case X86II::MRM0r: case X86II::MRM1r:
488715 case X86II::MRM2r: case X86II::MRM3r:
499726 case X86II::MRM0m: case X86II::MRM1m:
500727 case X86II::MRM2m: case X86II::MRM3m:
501728 case X86II::MRM4m: case X86II::MRM5m:
502 case X86II::MRM6m: case X86II::MRM7m:
729 case X86II::MRM6m: case X86II::MRM7m: {
730 unsigned PCAdj = (CurOp+4 != MI.getNumOperands()) ?
731 (MI.getOperand(CurOp+4).isImmediate() ? sizeOfImm(Desc) : 4) : 0;
732
503733 MCE.emitByte(BaseOpcode);
504 emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m);
734 emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m,
735 PCAdj);
505736 CurOp += 4;
506737
507738 if (CurOp != MI.getNumOperands()) {
509740 if (MO.isImmediate())
510741 emitConstant(MO.getImm(), sizeOfImm(Desc));
511742 else if (MO.isGlobalAddress())
512 emitGlobalAddressForPtr(MO.getGlobal(), MO.getOffset());
743 emitGlobalAddressForPtr(MO.getGlobal(), Is64BitMode, MO.getOffset());
513744 else if (MO.isJumpTableIndex())
514745 emitConstant(MCE.getJumpTableEntryAddress(MO.getJumpTableIndex()), 4);
515746 else
516747 assert(0 && "Unknown operand!");
517748 }
518749 break;
750 }
519751
520752 case X86II::MRMInitReg:
521753 MCE.emitByte(BaseOpcode);
2929 #include "llvm/CodeGen/SSARegMap.h"
3030 #include "llvm/CodeGen/SelectionDAGISel.h"
3131 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Support/Compiler.h"
3233 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/MathExtras.h"
3435 #include "llvm/ADT/Statistic.h"
3536 #include
3637 #include
5758 int FrameIndex;
5859 } Base;
5960
61 bool isRIPRel; // RIP relative?
6062 unsigned Scale;
6163 SDOperand IndexReg;
6264 unsigned Disp;
6365 GlobalValue *GV;
6466 Constant *CP;
67 const char *ES;
68 int JT;
6569 unsigned Align; // CP alignment.
6670
6771 X86ISelAddressMode()
68 : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0),
69 CP(0), Align(0) {
72 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
73 GV(0), CP(0), ES(0), JT(-1), Align(0) {
7074 }
7175 };
7276 }
9195 ///
9296 bool FastISel;
9397
98 /// TM - Keep a reference to X86TargetMachine.
99 ///
100 X86TargetMachine &TM;
101
94102 /// X86Lowering - This object fully describes how to lower LLVM code to an
95103 /// X86-specific SelectionDAG.
96104 X86TargetLowering X86Lowering;
99107 /// make the right decision when generating code for different targets.
100108 const X86Subtarget *Subtarget;
101109
110 /// GlobalBaseReg - keeps track of the virtual register mapped onto global
111 /// base register.
102112 unsigned GlobalBaseReg;
103113
104114 public:
105 X86DAGToDAGISel(X86TargetMachine &TM, bool fast)
115 X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
106116 : SelectionDAGISel(X86Lowering),
107 ContainsFPCode(false), FastISel(fast),
117 ContainsFPCode(false), FastISel(fast), TM(tm),
108118 X86Lowering(*TM.getTargetLowering()),
109119 Subtarget(&TM.getSubtarget()) {}
110120
155165 SDOperand &Scale, SDOperand &Index,
156166 SDOperand &Disp) {
157167 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
158 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg;
168 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
169 AM.Base.Reg;
159170 Scale = getI8Imm(AM.Scale);
160171 Index = AM.IndexReg;
161 Disp = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp)
162 : (AM.CP ?
163 CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp)
164 : getI32Imm(AM.Disp));
172 // These are 32-bit even in 64-bit mode since RIP relative offset
173 // is 32-bit.
174 if (AM.GV)
175 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
176 else if (AM.CP)
177 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
178 else if (AM.ES)
179 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
180 else if (AM.JT != -1)
181 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
182 else
183 Disp = getI32Imm(AM.Disp);
165184 }
166185
167186 /// getI8Imm - Return a target constant with the specified value, of type
475494 /// addressing mode
476495 bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
477496 bool isRoot) {
497 // RIP relative addressing: %rip + 32-bit displacement!
498 if (AM.isRIPRel) {
499 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
500 uint64_t Val = cast(N)->getValue();
501 if (isInt32(AM.Disp + Val)) {
502 AM.Disp += Val;
503 return false;
504 }
505 }
506 return true;
507 }
508
478509 int id = N.Val->getNodeId();
479510 bool Available = isSelected(id);
480511
481512 switch (N.getOpcode()) {
482513 default: break;
483 case ISD::Constant:
484 AM.Disp += cast(N)->getValue();
485 return false;
514 case ISD::Constant: {
515 uint64_t Val = cast(N)->getValue();
516 if (isInt32(AM.Disp + Val)) {
517 AM.Disp += Val;
518 return false;
519 }
520 break;
521 }
486522
487523 case X86ISD::Wrapper:
488 // If both base and index components have been picked, we can't fit
489 // the result available in the register in the addressing mode. Duplicate
490 // GlobalAddress or ConstantPool as displacement.
491 if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
524 // If value is available in a register both base and index components have
525 // been picked, we can't fit the result available in the register in the
526 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
527
528 // Can't fit GV or CP in addressing mode for X86-64 medium or large code
529 // model since the displacement field is 32-bit. Ok for small code model.
530
531 // For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP
532 // relative addressing mode.
533 if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) &&
534 (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) {
535 bool isRIP = Subtarget->is64Bit();
536 if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val ||
537 AM.BaseType == X86ISelAddressMode::FrameIndexBase))
538 break;
492539 if (ConstantPoolSDNode *CP =
493540 dyn_cast(N.getOperand(0))) {
494541 if (AM.CP == 0) {
495542 AM.CP = CP->get();
496543 AM.Align = CP->getAlignment();
497544 AM.Disp += CP->getOffset();
545 if (isRIP)
546 AM.isRIPRel = true;
498547 return false;
499548 }
500549 } else if (GlobalAddressSDNode *G =
502551 if (AM.GV == 0) {
503552 AM.GV = G->getGlobal();
504553 AM.Disp += G->getOffset();
554 if (isRIP)
555 AM.isRIPRel = true;
556 return false;
557 }
558 } else if (isRoot && isRIP) {
559 if (ExternalSymbolSDNode *S =
560 dyn_cast(N.getOperand(0))) {
561 AM.ES = S->getSymbol();
562 AM.isRIPRel = true;
563 return false;
564 } else if (JumpTableSDNode *J =
565 dyn_cast(N.getOperand(0))) {
566 AM.JT = J->getIndex();
567 AM.isRIPRel = true;
505568 return false;
506569 }
507570 }
532595 AM.IndexReg = ShVal.Val->getOperand(0);
533596 ConstantSDNode *AddVal =
534597 cast(ShVal.Val->getOperand(1));
535 AM.Disp += AddVal->getValue() << Val;
598 uint64_t Disp = AM.Disp + AddVal->getValue() << Val;
599 if (isInt32(Disp))
600 AM.Disp = Disp;
601 else
602 AM.IndexReg = ShVal;
536603 } else {
537604 AM.IndexReg = ShVal;
538605 }
562629 Reg = MulVal.Val->getOperand(0);
563630 ConstantSDNode *AddVal =
564631 cast(MulVal.Val->getOperand(1));
565 AM.Disp += AddVal->getValue() * CN->getValue();
632 uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
633 if (isInt32(Disp))
634 AM.Disp = Disp;
635 else
636 Reg = N.Val->getOperand(0);
566637 } else {
567638 Reg = N.Val->getOperand(0);
568639 }
640711 if (MatchAddress(N, AM))
641712 return false;
642713
714 MVT::ValueType VT = N.getValueType();
643715 if (AM.BaseType == X86ISelAddressMode::RegBase) {
644716 if (!AM.Base.Reg.Val)
645 AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
717 AM.Base.Reg = CurDAG->getRegister(0, VT);
646718 }
647719
648720 if (!AM.IndexReg.Val)
649 AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
721 AM.IndexReg = CurDAG->getRegister(0, VT);
650722
651723 getAddressOperands(AM, Base, Scale, Index, Disp);
652724 return true;
661733 if (MatchAddress(N, AM))
662734 return false;
663735
736 MVT::ValueType VT = N.getValueType();
664737 unsigned Complexity = 0;
665738 if (AM.BaseType == X86ISelAddressMode::RegBase)
666739 if (AM.Base.Reg.Val)
667740 Complexity = 1;
668741 else
669 AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
742 AM.Base.Reg = CurDAG->getRegister(0, VT);
670743 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
671744 Complexity = 4;
672745
673746 if (AM.IndexReg.Val)
674747 Complexity++;
675748 else
676 AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
749 AM.IndexReg = CurDAG->getRegister(0, VT);
677750
678751 if (AM.Scale > 2)
679752 Complexity += 2;
686759 // optimal (especially for code size consideration). LEA is nice because of
687760 // its three-address nature. Tweak the cost function again when we can run
688761 // convertToThreeAddress() at register allocation time.
689 if (AM.GV || AM.CP)
690 Complexity += 2;
762 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
763 // For X86-64, we should always use lea to materialize RIP relative
764 // addresses.
765 if (Subtarget->is64Bit())
766 Complexity = 4;
767 else
768 Complexity += 2;
769 }
691770
692771 if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
693772 Complexity++;
720799 /// base address to use for accessing globals into a register.
721800 ///
722801 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
802 assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
723803 if (!GlobalBaseReg) {
724804 // Insert the set of GlobalBaseReg into the first MBB of the function
725805 MachineBasicBlock &FirstMBB = BB->getParent()->front();
731811 BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
732812 BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
733813 }
734 return CurDAG->getRegister(GlobalBaseReg, MVT::i32).Val;
814 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
735815 }
736816
737817 static SDNode *FindCallStartFromCall(SDNode *Node) {
775855 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
776856 // code and is matched first so to prevent it from being turned into
777857 // LEA32r X+c.
858 // In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
859 MVT::ValueType PtrVT = TLI.getPointerTy();
778860 SDOperand N0 = N.getOperand(0);
779861 SDOperand N1 = N.getOperand(1);
780 if (N.Val->getValueType(0) == MVT::i32 &&
862 if (N.Val->getValueType(0) == PtrVT &&
781863 N0.getOpcode() == X86ISD::Wrapper &&
782864 N1.getOpcode() == ISD::Constant) {
783865 unsigned Offset = (unsigned)cast(N1)->getValue();
785867 // TODO: handle ExternalSymbolSDNode.
786868 if (GlobalAddressSDNode *G =
787869 dyn_cast(N0.getOperand(0))) {
788 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), MVT::i32,
870 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
789871 G->getOffset() + Offset);
790872 } else if (ConstantPoolSDNode *CP =
791873 dyn_cast(N0.getOperand(0))) {
792 C = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
874 C = CurDAG->getTargetConstantPool(CP->get(), PtrVT,
793875 CP->getAlignment(),
794876 CP->getOffset()+Offset);
795877 }
796878
797 if (C.Val)
798 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, MVT::i32, C);
879 if (C.Val) {
880 if (Subtarget->is64Bit()) {
881 SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
882 CurDAG->getRegister(0, PtrVT), C };
883 return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
884 } else
885 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
886 }
799887 }
800888
801889 // Other cases are handled by auto-generated code.
810898 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
811899 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
812900 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
901 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
813902 }
814903 else
815904 switch (NVT) {
817906 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
818907 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
819908 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
909 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
820910 }
821911
822912 unsigned LoReg, HiReg;
825915 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
826916 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
827917 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
918 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
828919 }
829920
830921 SDOperand N0 = Node->getOperand(0);
898989 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
899990 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
900991 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
992 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
901993 }
902994 else
903995 switch (NVT) {
905997 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
906998 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
907999 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1000 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
9081001 }
9091002
9101003 unsigned LoReg, HiReg;
9261019 ClrOpcode = X86::MOV32r0;
9271020 SExtOpcode = X86::CDQ;
9281021 break;
1022 case MVT::i64:
1023 LoReg = X86::RAX; HiReg = X86::RDX;
1024 ClrOpcode = X86::MOV64r0;
1025 SExtOpcode = X86::CQO;
1026 break;
9291027 }
9301028
9311029 SDOperand N0 = Node->getOperand(0);
9931091 }
9941092
9951093 case ISD::TRUNCATE: {
996 if (NVT == MVT::i8) {
1094 if (!Subtarget->is64Bit() && NVT == MVT::i8) {
9971095 unsigned Opc2;
9981096 MVT::ValueType VT;
9991097 switch (Node->getOperand(0).getValueType()) {
10011099 case MVT::i16:
10021100 Opc = X86::MOV16to16_;
10031101 VT = MVT::i16;
1004 Opc2 = X86::TRUNC_GR16_GR8;
1102 Opc2 = X86::TRUNC_16_to8;
10051103 break;
10061104 case MVT::i32:
10071105 Opc = X86::MOV32to32_;
10081106 VT = MVT::i32;
1009 Opc2 = X86::TRUNC_GR32_GR8;
1107 Opc2 = X86::TRUNC_32_to8;
10101108 break;
10111109 }
10121110
4141 : TargetLowering(TM) {
4242 Subtarget = &TM.getSubtarget();
4343 X86ScalarSSE = Subtarget->hasSSE2();
44 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
4445
4546 // Set up the TargetLowering object.
4647
5051 setSetCCResultContents(ZeroOrOneSetCCResult);
5152 setSchedulingPreference(SchedulingForRegPressure);
5253 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
53 setStackPointerRegisterToSaveRestore(X86::ESP);
54 setStackPointerRegisterToSaveRestore(X86StackPtr);
5455
5556 if (!Subtarget->isTargetDarwin())
5657 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
7071 addRegisterClass(MVT::i8, X86::GR8RegisterClass);
7172 addRegisterClass(MVT::i16, X86::GR16RegisterClass);
7273 addRegisterClass(MVT::i32, X86::GR32RegisterClass);
74 if (Subtarget->is64Bit())
75 addRegisterClass(MVT::i64, X86::GR64RegisterClass);
7376
7477 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
7578 // operation.
7780 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
7881 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
7982
80 if (X86ScalarSSE)
81 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
82 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
83 else
83 if (Subtarget->is64Bit()) {
84 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
8485 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
86 } else {
87 if (X86ScalarSSE)
88 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
89 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
90 else
91 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
92 }
8593
8694 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
8795 // this operation.
95103 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
96104 }
97105
98 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
99 // isn't legal.
100 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
101 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
106 if (!Subtarget->is64Bit()) {
107 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
108 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
109 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
110 }
102111
103112 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
104113 // this operation.
118127 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
119128 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
120129
121 if (X86ScalarSSE && !Subtarget->hasSSE3())
122 // Expand FP_TO_UINT into a select.
123 // FIXME: We would like to use a Custom expander here eventually to do
124 // the optimal thing for SSE vs. the default expansion in the legalizer.
125 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
126 else
127 // With SSE3 we can use fisttpll to convert to a signed i64.
130 if (Subtarget->is64Bit()) {
131 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
128132 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
133 } else {
134 if (X86ScalarSSE && !Subtarget->hasSSE3())
135 // Expand FP_TO_UINT into a select.
136 // FIXME: We would like to use a Custom expander here eventually to do
137 // the optimal thing for SSE vs. the default expansion in the legalizer.
138 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
139 else
140 // With SSE3 we can use fisttpll to convert to a signed i64.
141 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
142 }
129143
130144 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
131145 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
134148 setOperationAction(ISD::BR_CC , MVT::Other, Expand);
135149 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
136150 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
151 if (Subtarget->is64Bit())
152 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
137153 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand);
138154 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
139155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
140156 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
141157 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand);
142158 setOperationAction(ISD::FREM , MVT::f64 , Expand);
159
143160 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
144161 setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
145162 setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
149166 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
150167 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
151168 setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
169 if (Subtarget->is64Bit()) {
170 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
171 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
172 setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
173 }
174
152175 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
153176 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
154177
155178 // These should be promoted to a larger select which is supported.
156179 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
157180 setOperationAction(ISD::SELECT , MVT::i8 , Promote);
158
159181 // X86 wants to expand cmov itself.
160182 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
161183 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
166188 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
167189 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
168190 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
191 if (Subtarget->is64Bit()) {
192 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
193 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
194 }
169195 // X86 ret instruction may pop stack.
170196 setOperationAction(ISD::RET , MVT::Other, Custom);
171197 // Darwin ABI issue.
173199 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
174200 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
175201 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
202 if (Subtarget->is64Bit()) {
203 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
204 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
205 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
206 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
207 }
176208 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
177209 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
178210 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
197229 setOperationAction(ISD::VAEND , MVT::Other, Expand);
198230 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
199231 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
232 if (Subtarget->is64Bit())
233 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
200234 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
201235
202236 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
440474 if (ObjXMMRegs) {
441475 // Passed in a XMM register.
442476 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
443 X86::VR128RegisterClass);
477 X86::VR128RegisterClass);
444478 ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT);
445479 ArgValues.push_back(ArgValue);
446480 NumXMMRegs += ObjXMMRegs;
465499 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
466500 if (isVarArg)
467501 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
468 ReturnAddrIndex = 0; // No return address slot generated yet.
469 BytesToPopOnReturn = 0; // Callee pops nothing.
502 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
503 ReturnAddrIndex = 0; // No return address slot generated yet.
504 BytesToPopOnReturn = 0; // Callee pops nothing.
470505 BytesCallerReserves = ArgOffset;
471506
472507 // If this is a struct return on Darwin/X86, the callee pops the hidden struct
538573 NumXMMRegs = 0;
539574 std::vector > RegsToPass;
540575 std::vector MemOpChains;
541 SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy());
576 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
542577 for (unsigned i = 0; i != NumOps; ++i) {
543578 SDOperand Arg = Op.getOperand(5+2*i);
544579
736771 NodeTys.push_back(RetVT);
737772 break;
738773 }
774 }
775
776 // If the function returns void, just return the chain.
777 if (ResultVals.empty())
778 return Chain;
779
780 // Otherwise, merge everything together with a MERGE_VALUES node.
781 NodeTys.push_back(MVT::Other);
782 ResultVals.push_back(Chain);
783 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
784 &ResultVals[0], ResultVals.size());
785 return Res.getValue(Op.ResNo);
786 }
787
788
789 //===----------------------------------------------------------------------===//
790 // X86-64 C Calling Convention implementation
791 //===----------------------------------------------------------------------===//
792
793 /// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified
794 /// type should be passed. If it is through stack, returns the size of the stack
795 /// slot; if it is through integer or XMM register, returns the number of
796 /// integer or XMM registers are needed.
797 static void
798 HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT,
799 unsigned NumIntRegs, unsigned NumXMMRegs,
800 unsigned &ObjSize, unsigned &ObjIntRegs,
801 unsigned &ObjXMMRegs) {
802 ObjSize = 0;
803 ObjIntRegs = 0;
804 ObjXMMRegs = 0;
805
806 switch (ObjectVT) {
807 default: assert(0 && "Unhandled argument type!");
808 case MVT::i8:
809 case MVT::i16:
810 case MVT::i32:
811 case MVT::i64:
812 if (NumIntRegs < 6)
813 ObjIntRegs = 1;
814 else {
815 switch (ObjectVT) {
816 default: break;
817 case MVT::i8: ObjSize = 1; break;
818 case MVT::i16: ObjSize = 2; break;
819 case MVT::i32: ObjSize = 4; break;
820 case MVT::i64: ObjSize = 8; break;
821 }
822 }
823 break;
824 case MVT::f32:
825 case MVT::f64:
826 case MVT::v16i8:
827 case MVT::v8i16:
828 case MVT::v4i32:
829 case MVT::v2i64:
830 case MVT::v4f32:
831 case MVT::v2f64:
832 if (NumXMMRegs < 8)
833 ObjXMMRegs = 1;
834 else {
835 switch (ObjectVT) {
836 default: break;
837 case MVT::f32: ObjSize = 4; break;
838 case MVT::f64: ObjSize = 8; break;
839 case MVT::v16i8:
840 case MVT::v8i16:
841 case MVT::v4i32:
842 case MVT::v2i64:
843 case MVT::v4f32:
844 case MVT::v2f64: ObjSize = 16; break;
845 }
846 break;
847 }
848 }
849 }
850
851 SDOperand
852 X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
853 unsigned NumArgs = Op.Val->getNumValues() - 1;
854 MachineFunction &MF = DAG.getMachineFunction();
855 MachineFrameInfo *MFI = MF.getFrameInfo();
856 SDOperand Root = Op.getOperand(0);
857 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
858 std::vector ArgValues;
859
860 // Add DAG nodes to load the arguments... On entry to a function on the X86,
861 // the stack frame looks like this:
862 //
863 // [RSP] -- return address
864 // [RSP + 8] -- first nonreg argument (leftmost lexically)
865 // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size
866 // ...
867 //
868 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
869 unsigned NumIntRegs = 0; // Int regs used for parameter passing.
870 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing.
871
872 static const unsigned GPR8ArgRegs[] = {
873 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B
874 };
875 static const unsigned GPR16ArgRegs[] = {
876 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W
877 };
878 static const unsigned GPR32ArgRegs[] = {
879 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
880 };
881 static const unsigned GPR64ArgRegs[] = {
882 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
883 };
884 static const unsigned XMMArgRegs[] = {
885 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
886 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
887 };
888
889 for (unsigned i = 0; i < NumArgs; ++i) {
890 MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
891 unsigned ArgIncrement = 8;
892 unsigned ObjSize = 0;
893 unsigned ObjIntRegs = 0;
894 unsigned ObjXMMRegs = 0;
895
896 // FIXME: __int128 and long double support?
897 HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
898 ObjSize, ObjIntRegs, ObjXMMRegs);
899 if (ObjSize > 8)
900 ArgIncrement = ObjSize;
901
902 unsigned Reg = 0;
903 SDOperand ArgValue;
904 if (ObjIntRegs || ObjXMMRegs) {
905 switch (ObjectVT) {
906 default: assert(0 && "Unhandled argument type!");
907 case MVT::i8:
908 case MVT::i16:
909 case MVT::i32:
910 case MVT::i64: {
911 TargetRegisterClass *RC = NULL;
912 switch (ObjectVT) {
913 default: break;
914 case MVT::i8:
915 RC = X86::GR8RegisterClass;
916 Reg = GPR8ArgRegs[NumIntRegs];
917 break;
918 case MVT::i16:
919 RC = X86::GR16RegisterClass;
920 Reg = GPR16ArgRegs[NumIntRegs];
921 break;
922 case MVT::i32:
923 RC = X86::GR32RegisterClass;
924 Reg = GPR32ArgRegs[NumIntRegs];
925 break;
926 case MVT::i64:
927 RC = X86::GR64RegisterClass;
928 Reg = GPR64ArgRegs[NumIntRegs];
929 break;
930 }
931 Reg = AddLiveIn(MF, Reg, RC);
932 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
933 break;
934 }
935 case MVT::f32:
936 case MVT::f64:
937 case MVT::v16i8:
938 case MVT::v8i16:
939 case MVT::v4i32:
940 case MVT::v2i64:
941 case MVT::v4f32:
942 case MVT::v2f64: {
943 TargetRegisterClass *RC= (ObjectVT == MVT::f32) ?
944 X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ?
945 X86::FR64RegisterClass : X86::VR128RegisterClass);
946 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC);
947 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
948 break;
949 }
950 }
951 NumIntRegs += ObjIntRegs;
952 NumXMMRegs += ObjXMMRegs;
953 } else if (ObjSize) {
954 // XMM arguments have to be aligned on 16-byte boundary.
955 if (ObjSize == 16)
956 ArgOffset = ((ArgOffset + 15) / 16) * 16;
957 // Create the SelectionDAG nodes corresponding to a load from this
958 // parameter.
959 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
960 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
961 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
962 DAG.getSrcValue(NULL));
963 ArgOffset += ArgIncrement; // Move on to the next argument.
964 }
965
966 ArgValues.push_back(ArgValue);
967 }
968
969 // If the function takes variable number of arguments, make a frame index for
970 // the start of the first vararg value... for expansion of llvm.va_start.
971 if (isVarArg) {
972 // For X86-64, if there are vararg parameters that are passed via
973 // registers, then we must store them to their spots on the stack so they
974 // may be loaded by deferencing the result of va_next.
975 VarArgsGPOffset = NumIntRegs * 8;
976 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
977 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
978 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
979
980 // Store the integer parameter registers.
981 std::vector MemOps;
982 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
983 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
984 DAG.getConstant(VarArgsGPOffset, getPointerTy()));
985 for (; NumIntRegs != 6; ++NumIntRegs) {
986 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
987 X86::GR64RegisterClass);
988 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
989 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
990 Val, FIN, DAG.getSrcValue(NULL));
991 MemOps.push_back(Store);
992 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
993 DAG.getConstant(8, getPointerTy()));
994 }
995
996 // Now store the XMM (fp + vector) parameter registers.
997 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
998 DAG.getConstant(VarArgsFPOffset, getPointerTy()));
999 for (; NumXMMRegs != 8; ++NumXMMRegs) {
1000 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
1001 X86::VR128RegisterClass);
1002 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
1003 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
1004 Val, FIN, DAG.getSrcValue(NULL));
1005 MemOps.push_back(Store);
1006 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1007 DAG.getConstant(16, getPointerTy()));
1008 }
1009 if (!MemOps.empty())
1010 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
1011 &MemOps[0], MemOps.size());
1012 }
1013
1014 ArgValues.push_back(Root);
1015
1016 ReturnAddrIndex = 0; // No return address slot generated yet.
1017 BytesToPopOnReturn = 0; // Callee pops nothing.
1018 BytesCallerReserves = ArgOffset;
1019
1020 // Return the new list of results.
1021 std::vector RetVTs(Op.Val->value_begin(),
1022 Op.Val->value_end());
1023 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
1024 }
1025
1026 SDOperand
1027 X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) {
1028 SDOperand Chain = Op.getOperand(0);
1029 unsigned CallingConv= cast(Op.getOperand(1))->getValue();
1030 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
1031 bool isTailCall = cast(Op.getOperand(3))->getValue() != 0;
1032 SDOperand Callee = Op.getOperand(4);
1033 MVT::ValueType RetVT= Op.Val->getValueType(0);
1034 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1035
1036 // Count how many bytes are to be pushed on the stack.
1037 unsigned NumBytes = 0;
1038 unsigned NumIntRegs = 0; // Int regs used for parameter passing.
1039 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing.
1040
1041 static const unsigned GPR8ArgRegs[] = {
1042 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B
1043 };
1044 static const unsigned GPR16ArgRegs[] = {
1045 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W
1046 };
1047 static const unsigned GPR32ArgRegs[] = {
1048 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
1049 };
1050 static const unsigned GPR64ArgRegs[] = {
1051 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1052 };
1053 static const unsigned XMMArgRegs[] = {
1054 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1055 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1056 };
1057
1058 for (unsigned i = 0; i != NumOps; ++i) {
1059 SDOperand Arg = Op.getOperand(5+2*i);
1060 MVT::ValueType ArgVT = Arg.getValueType();
1061
1062 switch (ArgVT) {
1063 default: assert(0 && "Unknown value type!");
1064 case MVT::i8:
1065 case MVT::i16:
1066 case MVT::i32:
1067 case MVT::i64:
1068 if (NumIntRegs < 6)
1069 ++NumIntRegs;
1070 else
1071 NumBytes += 8;
1072 break;
1073 case MVT::f32:
1074 case MVT::f64:
1075 case MVT::v16i8:
1076 case MVT::v8i16:
1077 case MVT::v4i32:
1078 case MVT::v2i64:
1079 case MVT::v4f32:
1080 case MVT::v2f64:
1081 if (NumXMMRegs < 8)
1082 NumXMMRegs++;
1083 else if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
1084 NumBytes += 8;
1085 else {
1086 // XMM arguments have to be aligned on 16-byte boundary.
1087 NumBytes = ((NumBytes + 15) / 16) * 16;
1088 NumBytes += 16;
1089 }
1090 break;
1091 }
1092 }
1093
1094 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1095
1096 // Arguments go on the stack in reverse order, as specified by the ABI.
1097 unsigned ArgOffset = 0;
1098 NumIntRegs = 0;
1099 NumXMMRegs = 0;
1100 std::vector > RegsToPass;
1101 std::vector MemOpChains;
1102 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1103 for (unsigned i = 0; i != NumOps; ++i) {
1104 SDOperand Arg = Op.getOperand(5+2*i);
1105 MVT::ValueType ArgVT = Arg.getValueType();
1106
1107 switch (ArgVT) {
1108 default: assert(0 && "Unexpected ValueType for argument!");
1109 case MVT::i8:
1110 case MVT::i16:
1111 case MVT::i32:
1112 case MVT::i64:
1113 if (NumIntRegs < 6) {
1114 unsigned Reg = 0;
1115 switch (ArgVT) {
1116 default: break;
1117 case MVT::i8: Reg = GPR8ArgRegs[NumIntRegs]; break;
1118 case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break;
1119 case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break;
1120 case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break;
1121 }
1122 RegsToPass.push_back(std::make_pair(Reg, Arg));
1123 ++NumIntRegs;
1124 } else {
1125 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1126 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1127 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1128 Arg, PtrOff, DAG.getSrcValue(NULL)));
1129 ArgOffset += 8;
1130 }
1131 break;
1132 case MVT::f32:
1133 case MVT::f64:
1134 case MVT::v16i8:
1135 case MVT::v8i16:
1136 case MVT::v4i32:
1137 case MVT::v2i64:
1138 case MVT::v4f32:
1139 case MVT::v2f64:
1140 if (NumXMMRegs < 8) {
1141 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1142 NumXMMRegs++;
1143 } else {
1144 if (ArgVT != MVT::f32 && ArgVT != MVT::f64) {
1145 // XMM arguments have to be aligned on 16-byte boundary.
1146 ArgOffset = ((ArgOffset + 15) / 16) * 16;
1147 }
1148 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1149 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1150 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1151 Arg, PtrOff, DAG.getSrcValue(NULL)));
1152 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
1153 ArgOffset += 8;
1154 else
1155 ArgOffset += 16;
1156 }
1157 }
1158 }
1159
1160 if (!MemOpChains.empty())
1161 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1162 &MemOpChains[0], MemOpChains.size());
1163
1164 // Build a sequence of copy-to-reg nodes chained together with token chain
1165 // and flag operands which copy the outgoing args into registers.
1166 SDOperand InFlag;
1167 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1168 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1169 InFlag);
1170 InFlag = Chain.getValue(1);
1171 }
1172
1173 if (isVarArg) {
1174 // From AMD64 ABI document:
1175 // For calls that may call functions that use varargs or stdargs
1176 // (prototype-less calls or calls to functions containing ellipsis (...) in
1177 // the declaration) %al is used as hidden argument to specify the number
1178 // of SSE registers used. The contents of %al do not need to match exactly
1179 // the number of registers, but must be an ubound on the number of SSE
1180 // registers used and is in the range 0 - 8 inclusive.
1181 Chain = DAG.getCopyToReg(Chain, X86::AL,
1182 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
1183 InFlag = Chain.getValue(1);
1184 }
1185
1186 // If the callee is a GlobalAddress node (quite common, every direct call is)
1187 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1188 if (GlobalAddressSDNode *G = dyn_cast(Callee))
1189 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1190 else if (ExternalSymbolSDNode *S = dyn_cast(Callee))
1191 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1192
1193 std::vector NodeTys;
1194 NodeTys.push_back(MVT::Other); // Returns a chain
1195 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1196 std::vector Ops;
1197 Ops.push_back(Chain);
1198 Ops.push_back(Callee);
1199
1200 // Add argument registers to the end of the list so that they are known live
1201 // into the call.
1202 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1203 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1204 RegsToPass[i].second.getValueType()));
1205
1206 if (InFlag.Val)
1207 Ops.push_back(InFlag);
1208
1209 // FIXME: Do not generate X86ISD::TAILCALL for now.
1210 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1211 NodeTys, &Ops[0], Ops.size());
1212 InFlag = Chain.getValue(1);
1213
1214 NodeTys.clear();
1215 NodeTys.push_back(MVT::Other); // Returns a chain
1216 if (RetVT != MVT::Other)
1217 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1218 Ops.clear();
1219 Ops.push_back(Chain);
1220 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1221 Ops.push_back(DAG.getConstant(0, getPointerTy()));
1222 Ops.push_back(InFlag);
1223 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1224 if (RetVT != MVT::Other)
1225 InFlag = Chain.getValue(1);
1226
1227 std::vector ResultVals;
1228 NodeTys.clear();
1229 switch (RetVT) {
1230 default: assert(0 && "Unknown value type to return!");
1231 case MVT::Other: break;
1232 case MVT::i8:
1233 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
1234 ResultVals.push_back(Chain.getValue(0));
1235 NodeTys.push_back(MVT::i8);
1236 break;
1237 case MVT::i16:
1238 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
1239 ResultVals.push_back(Chain.getValue(0));
1240 NodeTys.push_back(MVT::i16);
1241 break;
1242 case MVT::i32:
1243 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1244 ResultVals.push_back(Chain.getValue(0));
1245 NodeTys.push_back(MVT::i32);
1246 break;
1247 case MVT::i64:
1248 if (Op.Val->getValueType(1) == MVT::i64) {
1249 // FIXME: __int128 support?
1250 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
1251 ResultVals.push_back(Chain.getValue(0));
1252 Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64,
1253 Chain.getValue(2)).getValue(1);
1254 ResultVals.push_back(Chain.getValue(0));
1255 NodeTys.push_back(MVT::i64);
1256 } else {
1257 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
1258 ResultVals.push_back(Chain.getValue(0));
1259 }
1260 NodeTys.push_back(MVT::i64);
1261 break;
1262 case MVT::f32:
1263 case MVT::f64:
1264 case MVT::v16i8:
1265 case MVT::v8i16:
1266 case MVT::v4i32:
1267 case MVT::v2i64:
1268 case MVT::v4f32:
1269 case MVT::v2f64:
1270 // FIXME: long double support?
1271 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
1272 ResultVals.push_back(Chain.getValue(0));
1273 NodeTys.push_back(RetVT);
1274 break;
7391275 }
7401276
7411277 // If the function returns void, just return the chain.
9481484 ArgOffset += 4;
9491485
9501486 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
1487 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
9511488 ReturnAddrIndex = 0; // No return address slot generated yet.
9521489 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments.
9531490 BytesCallerReserves = 0;
10621599 NumIntRegs = 0;
10631600 std::vector > RegsToPass;
10641601 std::vector MemOpChains;
1065 SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy());
1602 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
10661603 for (unsigned i = 0; i != NumOps; ++i) {
10671604 SDOperand Arg = Op.getOperand(5+2*i);
10681605
12721809 if (ReturnAddrIndex == 0) {
12731810 // Set up a frame object for the return address.
12741811 MachineFunction &MF = DAG.getMachineFunction();
1275 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1276 }
1277
1278 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
1812 if (Subtarget->is64Bit())
1813 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
1814 else
1815 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1816 }
1817
1818 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
12791819 }
12801820
12811821
12901830 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
12911831 if (!isFrameAddress)
12921832 // Just load the return address
1293 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
1833 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI,
12941834 DAG.getSrcValue(NULL));
12951835 else
1296 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
1297 DAG.getConstant(4, MVT::i32));
1836 Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
1837 DAG.getConstant(4, getPointerTy()));
12981838 }
12991839 return std::make_pair(Result, Chain);
13001840 }
21832723 ///
21842724 static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
21852725 unsigned NumNonZero, unsigned NumZero,
2186 SelectionDAG &DAG) {
2726 SelectionDAG &DAG, TargetLowering &TLI) {
21872727 if (NumNonZero > 8)
21882728 return SDOperand();
21892729
22162756
22172757 if (ThisElt.Val)
22182758 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
2219 DAG.getConstant(i/2, MVT::i32));
2759 DAG.getConstant(i/2, TLI.getPointerTy()));
22202760 }
22212761 }
22222762
22272767 ///
22282768 static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
22292769 unsigned NumNonZero, unsigned NumZero,
2230 SelectionDAG &DAG) {
2770 SelectionDAG &DAG, TargetLowering &TLI) {
22312771 if (NumNonZero > 4)
22322772 return SDOperand();
22332773
22442784 First = false;
22452785 }
22462786 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
2247 DAG.getConstant(i, MVT::i32));
2787 DAG.getConstant(i, TLI.getPointerTy()));
22482788 }
22492789 }
22502790
23232863
23242864 // If element VT is < 32 bits, convert it to inserts into a zero vector.
23252865 if (EVTBits == 8) {
2326 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG);
2866 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
2867 *this);
23272868 if (V.Val) return V;
23282869 }
23292870
23302871 if (EVTBits == 16) {
2331 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG);
2872 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
2873 *this);
23322874 if (V.Val) return V;
23332875 }
23342876
27903332 CP->getAlignment()));
27913333 if (Subtarget->isTargetDarwin()) {
27923334 // With PIC, the address is actually $g + Offset.
2793 if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
3335 if (!Subtarget->is64Bit() &&
3336 getTargetMachine().getRelocationModel() == Reloc::PIC_)
27943337 Result = DAG.getNode(ISD::ADD, getPointerTy(),
27953338 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
27963339 }
28063349 getPointerTy()));
28073350 if (Subtarget->isTargetDarwin()) {
28083351 // With PIC, the address is actually $g + Offset.
2809 if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
3352 if (!Subtarget->is64Bit() &&
3353 getTargetMachine().getRelocationModel() == Reloc::PIC_)
28103354 Result = DAG.getNode(ISD::ADD, getPointerTy(),
28113355 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
28123356 Result);
28173361 // not the GV offset field.
28183362 if (getTargetMachine().getRelocationModel() != Reloc::Static &&
28193363 DarwinGVRequiresExtraLoad(GV))
2820 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(),
3364 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(),
28213365 Result, DAG.getSrcValue(NULL));
28223366 }
28233367
28323376 getPointerTy()));
28333377 if (Subtarget->isTargetDarwin()) {
28343378 // With PIC, the address is actually $g + Offset.
2835 if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
3379 if (!Subtarget->is64Bit() &&
3380 getTargetMachine().getRelocationModel() == Reloc::PIC_)
28363381 Result = DAG.getNode(ISD::ADD, getPointerTy(),
28373382 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
28383383 Result);
32333778 getPointerTy()));
32343779 if (Subtarget->isTargetDarwin()) {
32353780 // With PIC, the address is actually $g + Offset.
3236 if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
3781 if (!Subtarget->is64Bit() &&
3782 getTargetMachine().getRelocationModel() == Reloc::PIC_)
32373783 Result = DAG.getNode(ISD::ADD, getPointerTy(),
32383784 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
32393785 Result);
32443790
32453791 SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
32463792 unsigned CallingConv= cast(Op.getOperand(1))->getValue();
3247 if (CallingConv == CallingConv::Fast && EnableFastCC)
3793 if (Subtarget->is64Bit())
3794 return LowerX86_64CCCCallTo(Op, DAG);
3795 else if (CallingConv == CallingConv::Fast && EnableFastCC)
32483796 return LowerFastCCCallTo(Op, DAG);
32493797 else
32503798 return LowerCCCCallTo(Op, DAG);
32633811 case 3: {
32643812 MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
32653813
3266 if (MVT::isVector(ArgVT)) {
3814 if (MVT::isVector(ArgVT) ||
3815 (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) {
32673816 // Integer or FP vector result -> XMM0.
32683817 if (DAG.getMachineFunction().liveout_empty())
32693818 DAG.getMachineFunction().addLiveOut(X86::XMM0);
32703819 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
32713820 SDOperand());
32723821 } else if (MVT::isInteger(ArgVT)) {
3273 // Integer result -> EAX
3822 // Integer result -> EAX / RAX.
3823 // The C calling convention guarantees the return value has been
3824 // promoted to at least MVT::i32. The X86-64 ABI doesn't require the
3825 // value to be promoted MVT::i64. So we don't have to extend it to
3826 // 64-bit. Return the value in EAX, but mark RAX as liveout.
3827 unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
32743828 if (DAG.getMachineFunction().liveout_empty())
3275 DAG.getMachineFunction().addLiveOut(X86::EAX);
3276
3277 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1),
3829 DAG.getMachineFunction().addLiveOut(Reg);
3830
3831 Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX;
3832 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1),
32783833 SDOperand());
32793834 } else if (!X86ScalarSSE) {
32803835 // FP return with fp-stack value.
33283883 }
33293884 break;
33303885 }
3331 case 5:
3886 case 5: {
3887 unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
3888 unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX;
33323889 if (DAG.getMachineFunction().liveout_empty()) {
3333 DAG.getMachineFunction().addLiveOut(X86::EAX);
3334 DAG.getMachineFunction().addLiveOut(X86::EDX);
3890 DAG.getMachineFunction().addLiveOut(Reg1);
3891 DAG.getMachineFunction().addLiveOut(Reg2);
33353892 }
33363893
3337 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(3),
3894 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3),
33383895 SDOperand());
3339 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1));
3896 Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1));
33403897 break;
3898 }
33413899 }
33423900 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
3343 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
3901 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
33443902 Copy.getValue(1));
33453903 }
33463904
33543912 MF.getInfo()->setForceFramePointer(true);
33553913
33563914 unsigned CC = cast(Op.getOperand(1))->getValue();
3357 if (CC == CallingConv::Fast && EnableFastCC)
3915 if (Subtarget->is64Bit())
3916 return LowerX86_64CCCArguments(Op, DAG);
3917 else if (CC == CallingConv::Fast && EnableFastCC)
33583918 return LowerFastCCArguments(Op, DAG);
33593919 else
33603920 return LowerCCCArguments(Op, DAG);
33933953 bool TwoRepStos = false;
33943954 if (ValC) {
33953955 unsigned ValReg;
3396 unsigned Val = ValC->getValue() & 255;
3956 uint64_t Val = ValC->getValue() & 255;
33973957
33983958 // If the value is a constant, then we can potentially use larger sets.
33993959 switch (Align & 3) {
34003960 case 2: // WORD aligned
34013961 AVT = MVT::i16;
3402 Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
3403 BytesLeft = I->getValue() % 2;
3404 Val = (Val << 8) | Val;
34053962 ValReg = X86::AX;
3963 Val = (Val << 8) | Val;
34063964 break;
3407 case 0: // DWORD aligned
3965 case 0: // DWORD aligned
34083966 AVT = MVT::i32;
3409 if (I) {
3410 Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
3411 BytesLeft = I->getValue() % 4;
3412 } else {
3413 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
3414 DAG.getConstant(2, MVT::i8));
3415 TwoRepStos = true;
3416 }
3967 ValReg = X86::EAX;
34173968 Val = (Val << 8) | Val;
34183969 Val = (Val << 16) | Val;
3419 ValReg = X86::EAX;
3970 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
3971 AVT = MVT::i64;
3972 ValReg = X86::RAX;
3973 Val = (Val << 32) | Val;
3974 }
34203975 break;
34213976 default: // Byte aligned
34223977 AVT = MVT::i8;
3978 ValReg = X86::AL;
34233979 Count = Op.getOperand(3);
3424 ValReg = X86::AL;
34253980 break;
3981 }
3982
3983 if (AVT > MVT::i8) {
3984 if (I) {
3985 unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
3986 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
3987 BytesLeft = I->getValue() % UBytes;
3988 } else {
3989 assert(AVT >= MVT::i32 &&
3990 "Do not use rep;stos if not at least DWORD aligned");
3991 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
3992 Op.getOperand(3), DAG.getConstant(2, MVT::i8));
3993 TwoRepStos = true;
3994 }
34263995 }
34273996
34283997 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
34354004 InFlag = Chain.getValue(1);
34364005 }
34374006
3438 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
4007 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
4008 Count, InFlag);
34394009 InFlag = Chain.getValue(1);
3440 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
4010 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4011 Op.getOperand(1), InFlag);
34414012 InFlag = Chain.getValue(1);
34424013
34434014 std::vector Tys;
34544025 Count = Op.getOperand(3);
34554026 MVT::ValueType CVT = Count.getValueType();
34564027 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3457 DAG.getConstant(3, CVT));
3458 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
4028 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4029 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4030 Left, InFlag);
34594031 InFlag = Chain.getValue(1);
34604032 Tys.clear();
34614033 Tys.push_back(MVT::Other);
34664038 Ops.push_back(InFlag);
34674039 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
34684040 } else if (BytesLeft) {
3469 // Issue stores for the last 1 - 3 bytes.
4041 // Issue stores for the last 1 - 7 bytes.
34704042 SDOperand Value;
34714043 unsigned Val = ValC->getValue() & 255;
34724044 unsigned Offset = I->getValue() - BytesLeft;
34734045 SDOperand DstAddr = Op.getOperand(1);
34744046 MVT::ValueType AddrVT = DstAddr.getValueType();
4047 if (BytesLeft >= 4) {
4048 Val = (Val << 8) | Val;
4049 Val = (Val << 16) | Val;
4050 Value = DAG.getConstant(Val, MVT::i32);
4051 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4052 DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4053 DAG.getConstant(Offset, AddrVT)),
4054 DAG.getSrcValue(NULL));
4055 BytesLeft -= 4;
4056 Offset += 4;
4057 }
34754058 if (BytesLeft >= 2) {
34764059 Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
34774060 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
34814064 BytesLeft -= 2;
34824065 Offset += 2;
34834066 }
3484
34854067 if (BytesLeft == 1) {
34864068 Value = DAG.getConstant(Val, MVT::i8);
34874069 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
35244106 switch (Align & 3) {
35254107 case 2: // WORD aligned
35264108 AVT = MVT::i16;
3527 Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
3528 BytesLeft = I->getValue() % 2;
35294109 break;
3530 case 0: // DWORD aligned
4110 case 0: // DWORD aligned
35314111 AVT = MVT::i32;
3532 if (I) {
3533 Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
3534 BytesLeft = I->getValue() % 4;
3535 } else {
3536 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
3537 DAG.getConstant(2, MVT::i8));
3538 TwoRepMovs = true;
3539 }
4112 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
4113 AVT = MVT::i64;
35404114 break;
35414115 default: // Byte aligned
35424116 AVT = MVT::i8;
35444118 break;
35454119 }
35464120
4121 if (AVT > MVT::i8) {
4122 if (I) {
4123 unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4124 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
4125 BytesLeft = I->getValue() % UBytes;
4126 } else {
4127 assert(AVT >= MVT::i32 &&
4128 "Do not use rep;movs if not at least DWORD aligned");
4129 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
4130 Op.getOperand(3), DAG.getConstant(2, MVT::i8));
4131 TwoRepMovs = true;
4132 }
4133 }
4134
35474135 SDOperand InFlag(0, 0);
3548 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
4136 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
4137 Count, InFlag);
35494138 InFlag = Chain.getValue(1);
3550 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
4139 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4140 Op.getOperand(1), InFlag);
35514141 InFlag = Chain.getValue(1);
3552 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
4142 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
4143 Op.getOperand(2), InFlag);
35534144 InFlag = Chain.getValue(1);
35544145
35554146 std::vector Tys;
35664157 Count = Op.getOperand(3);
35674158 MVT::ValueType CVT = Count.getValueType();
35684159 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3569 DAG.getConstant(3, CVT));
3570 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
4160 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4161 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4162 Left, InFlag);
35714163 InFlag = Chain.getValue(1);
35724164 Tys.clear();
35734165 Tys.push_back(MVT::Other);
35784170 Ops.push_back(InFlag);
35794171 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
35804172 } else if (BytesLeft) {
3581 // Issue loads and stores for the last 1 - 3 bytes.
4173 // Issue loads and stores for the last 1 - 7 bytes.
35824174 unsigned Offset = I->getValue() - BytesLeft;
35834175 SDOperand DstAddr = Op.getOperand(1);
35844176 MVT::ValueType DstVT = DstAddr.getValueType();
35854177 SDOperand SrcAddr = Op.getOperand(2);
35864178 MVT::ValueType SrcVT = SrcAddr.getValueType();
35874179 SDOperand Value;
4180 if (BytesLeft >= 4) {
4181 Value = DAG.getLoad(MVT::i32, Chain,
4182 DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4183 DAG.getConstant(Offset, SrcVT)),
4184 DAG.getSrcValue(NULL));
4185 Chain = Value.getValue(1);
4186 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4187 DAG.getNode(ISD::ADD, DstVT, DstAddr,
4188 DAG.getConstant(Offset, DstVT)),
4189 DAG.getSrcValue(NULL));
4190 BytesLeft -= 4;
4191 Offset += 4;
4192 }
35884193 if (BytesLeft >= 2) {
35894194 Value = DAG.getLoad(MVT::i16, Chain,
35904195 DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
36344239 }
36354240
36364241 SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
3637 // vastart just stores the address of the VarArgsFrameIndex slot into the
3638 // memory location argument.
3639 // FIXME: Replace MVT::i32 with PointerTy
3640 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
3641 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
3642 Op.getOperand(1), Op.getOperand(2));
4242 if (!Subtarget->is64Bit()) {
4243 // vastart just stores the address of the VarArgsFrameIndex slot into the
4244 // memory location argument.
4245 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4246 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
4247 Op.getOperand(1), Op.getOperand(2));
4248 }
4249
4250 // __va_list_tag:
4251 // gp_offset (0 - 6 * 8)
4252 // fp_offset (48 - 48 + 8 * 16)
4253 // overflow_arg_area (point to parameters coming in memory).
4254 // reg_save_area
4255 std::vector MemOps;
4256 SDOperand FIN = Op.getOperand(1);
4257 // Store gp_offset
4258 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4259 DAG.getConstant(VarArgsGPOffset, MVT::i32),
4260 FIN, Op.getOperand(2));
4261 MemOps.push_back(Store);
4262
4263 // Store fp_offset
4264 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4265 DAG.getConstant(4, getPointerTy()));
4266 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4267 DAG.getConstant(VarArgsFPOffset, MVT::i32),
4268 FIN, Op.getOperand(2));
4269 MemOps.push_back(Store);
4270
4271 // Store ptr to overflow_arg_area
4272 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4273 DAG.getConstant(4, getPointerTy()));
4274 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4275 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4276 OVFIN, FIN, Op.getOperand(2));
4277 MemOps.push_back(Store);
4278
4279 // Store ptr to reg_save_area.
4280 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4281 DAG.getConstant(8, getPointerTy()));
4282 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
4283 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4284 RSFIN, FIN, Op.getOperand(2));
4285 MemOps.push_back(Store);
4286 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
36434287 }
36444288
36454289 SDOperand
43324976 Res.first = DestReg;
43334977 Res.second = Res.second = X86::GR32RegisterClass;
43344978 }
4979 } else if (VT == MVT::i64) {
4980 unsigned DestReg = 0;
4981 switch (Res.first) {
4982 default: break;
4983 case X86::AX: DestReg = X86::RAX; break;
4984 case X86::DX: DestReg = X86::RDX; break;
4985 case X86::CX: DestReg = X86::RCX; break;
4986 case X86::BX: DestReg = X86::RBX; break;
4987 case X86::SI: DestReg = X86::RSI; break;
4988 case X86::DI: DestReg = X86::RDI; break;
4989 case X86::BP: DestReg = X86::RBP; break;
4990 case X86::SP: DestReg = X86::RSP; break;
4991 }
4992 if (DestReg) {
4993 Res.first = DestReg;
4994 Res.second = Res.second = X86::GR64RegisterClass;
4995 }
43354996 }
43364997
43374998 return Res;
266266 // X86TargetLowering - X86 Implementation of the TargetLowering interface
267267 class X86TargetLowering : public TargetLowering {
268268 int VarArgsFrameIndex; // FrameIndex for start of varargs area.
269 int RegSaveFrameIndex; // X86-64 vararg func register save area.
270 unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset.
271 unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
269272 int ReturnAddrIndex; // FrameIndex for return slot.
270273 int BytesToPopOnReturn; // Number of arg bytes ret should pop.
271274 int BytesCallerReserves; // Number of arg bytes caller makes.
346349 /// make the right decision when generating code for different targets.
347350 const X86Subtarget *Subtarget;
348351
352 /// X86StackPtr - X86 physical register used as stack ptr.
353 unsigned X86StackPtr;
354
349355 /// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
350356 bool X86ScalarSSE;
351357
352358 // C Calling Convention implementation.
353359 SDOperand LowerCCCArguments(SDOperand Op, SelectionDAG &DAG);
354360 SDOperand LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG);
361
362 // X86-64 C Calling Convention implementation.
363 SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG);
364 SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG);
355365
356366 // Fast Calling Convention implementation.
357367 SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG);
2121
2222 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
2323 : TargetInstrInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])),
24 TM(tm), RI(*this) {
24 TM(tm), RI(tm, *this) {
2525 }
2626
2727
2929 unsigned& sourceReg,
3030 unsigned& destReg) const {
3131 MachineOpCode oc = MI.getOpcode();
32 if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
32 if (oc == X86::MOV8rr || oc == X86::MOV16rr ||
33 oc == X86::MOV32rr || oc == X86::MOV64rr ||
3334 oc == X86::MOV16to16_ || oc == X86::MOV32to32_ ||
3435 oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
3536 oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
5859 case X86::MOV16_rm:
5960 case X86::MOV32rm:
6061 case X86::MOV32_rm:
62 case X86::MOV64rm:
6163 case X86::FpLD64m:
6264 case X86::MOVSSrm:
6365 case X86::MOVSDrm:
8587 case X86::MOV16_mr:
8688 case X86::MOV32mr:
8789 case X86::MOV32_mr:
90 case X86::MOV64mr:
8891 case X86::FpSTP64m:
8992 case X86::MOVSSmr:
9093 case X86::MOVSDmr:
144147
145148 switch (MI->getOpcode()) {
146149 case X86::INC32r:
150 case X86::INC64_32r:
147151 assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
148152 return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, 1);
149153 case X86::INC16r:
154 case X86::INC64_16r:
150155 if (DisableLEA16) return 0;
151156 assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
152157 return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, 1);
153158 case X86::DEC32r:
159 case X86::DEC64_32r:
154160 assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
155161 return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, -1);
156162 case X86::DEC16r:
163 case X86::DEC64_16r:
157164 if (DisableLEA16) return 0;
158165 assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
159166 return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, -1);
263270 return BuildMI(*MBB, MBB->erase(MI), ROpcode, 1).addMBB(TMBB);
264271 }
265272
273 const TargetRegisterClass *X86InstrInfo::getPointerRegClass() const {
274 const X86Subtarget *Subtarget = &TM.getSubtarget();
275 if (Subtarget->is64Bit())
276 return &X86::GR64RegClass;
277 else
278 return &X86::GR32RegClass;
279 }
1717 #include "X86RegisterInfo.h"
1818
1919 namespace llvm {
20 class X86RegisterInfo;
2021 class X86TargetMachine;
2122
2223 /// X86II - This namespace holds all of the target specific flags that
8990 // instead of 32 bit data.
9091 OpSize = 1 << 6,
9192
93 // AsSize - Set if this instruction requires an operand size prefix (0x67),
94 // which most often indicates that the instruction address 16 bit address
95 // instead of 32 bit address (or 32 bit address in 64 bit mode).
96 AdSize = 1 << 7,
97
98 //===------------------------------------------------------------------===//
9299 // Op0Mask - There are several prefix bytes that are used to form two byte
93100 // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
94101 // used to obtain the setting of this field. If no bits in this field is
95102 // set, there is no prefix byte for obtaining a multibyte opcode.
96103 //
97 Op0Shift = 7,
104 Op0Shift = 8,
98105 Op0Mask = 0xF << Op0Shift,
99106
100107 // TB - TwoByte - Set if this instruction has a two byte opcode, which
117124 XD = 11 << Op0Shift, XS = 12 << Op0Shift,
118125
119126 //===------------------------------------------------------------------===//
120 // This two-bit field describes the size of an immediate operand. Zero is
127 // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
128 // They are used to specify GPRs and SSE registers, 64-bit operand size,
129 // etc. We only cares about REX.W and REX.R bits and only the former is
130 // statically determined.
131 //
132 REXShift = 12,
133 REX_W = 1 << REXShift,
134
135 //===------------------------------------------------------------------===//
136 // This three-bit field describes the size of an immediate operand. Zero is
121137 // unused so that we can tell if we forgot to set a value.
122 ImmShift = 11,
123 ImmMask = 3 << ImmShift,
138 ImmShift = 13,
139 ImmMask = 7 << ImmShift,
124140 Imm8 = 1 << ImmShift,
125141 Imm16 = 2 << ImmShift,
126142 Imm32 = 3 << ImmShift,
143 Imm64 = 4 << ImmShift,
127144
128145 //===------------------------------------------------------------------===//
129146 // FP Instruction Classification... Zero is non-fp instruction.
130147
131148 // FPTypeMask - Mask for all of the FP types...
132 FPTypeShift = 13,
149 FPTypeShift = 16,
133150 FPTypeMask = 7 << FPTypeShift,
134151
135152 // NotFP - The default, set for instructions that do not use FP registers.
161178 // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
162179 SpecialFP = 7 << FPTypeShift,
163180
164 OpcodeShift = 16,
181 // Bits 19 -> 23 are unused
182 OpcodeShift = 24,
165183 OpcodeMask = 0xFF << OpcodeShift
166 // Bits 25 -> 31 are unused
167184 };
168185 }
169186
215232 virtual MachineBasicBlock::iterator
216233 reverseBranchCondition(MachineBasicBlock::iterator MI) const;
217234
235 const TargetRegisterClass *getPointerRegClass() const;
236
218237 // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
219238 // specified opcode number.
220239 //
3838 def SDT_X86CallSeqEnd : SDTypeProfile<0, 2, [ SDTCisVT<0, i32>,
3939 SDTCisVT<1, i32> ]>;
4040
41 def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
41 def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
4242
4343 def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
4444
9494 class X86MemOperand : Operand {
9595 let PrintMethod = printMethod;
9696 let NumMIOperands = 4;
97 let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
97 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
9898 }
9999
100100 def i8mem : X86MemOperand<"printi8mem">;
106106 def f64mem : X86MemOperand<"printf64mem">;
107107 def f128mem : X86MemOperand<"printf128mem">;
108108
109 def lea32mem : Operand {
110 let PrintMethod = "printi32mem";
111 let NumMIOperands = 4;
112 let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
113 }
114
109115 def SSECC : Operand {
110116 let PrintMethod = "printSSECC";
111117 }
128134 //
129135
130136 // Define X86 specific addressing mode.
131 def addr : ComplexPattern;
132 def leaaddr : ComplexPattern
133 [add, mul, shl, or, frameindex]>;
137 def addr : ComplexPattern]>;
138 def lea32addr : ComplexPattern
139 [add, mul, shl, or, frameindex]>;
134140
135141 //===----------------------------------------------------------------------===//
136142 // X86 Instruction Format Definitions.
157163
158164 //===----------------------------------------------------------------------===//
159165 // X86 Instruction Predicate Definitions.
160 def HasMMX : Predicate<"Subtarget->hasMMX()">;
161 def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
162 def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
163 def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
164 def FPStack : Predicate<"!Subtarget->hasSSE2()">;
166 def HasMMX : Predicate<"Subtarget->hasMMX()">;
167 def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
168 def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
169 def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
170 def FPStack : Predicate<"!Subtarget->hasSSE2()">;
171 def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
172 def In64BitMode : Predicate<"Subtarget->is64Bit()">;
165173
166174 //===----------------------------------------------------------------------===//
167175 // X86 specific pattern fragments.
170178 // ImmType - This specifies the immediate type used by an instruction. This is
171179 // part of the ad-hoc solution used to emit machine instruction encodings by our
172180 // machine code emitter.
173 class ImmType val> {
174 bits<2> Value = val;
181 class ImmType val> {
182 bits<3> Value = val;
175183 }
176184 def NoImm : ImmType<0>;
177185 def Imm8 : ImmType<1>;
178186 def Imm16 : ImmType<2>;
179187 def Imm32 : ImmType<3>;
188 def Imm64 : ImmType<4>;
180189
181190 // FPFormat - This specifies what form this FP instruction has. This is used by
182191 // the Floating-Point stackifier pass.
201210 Format Form = f;
202211 bits<6> FormBits = Form.Value;
203212 ImmType ImmT = i;
204 bits<2> ImmTypeBits = ImmT.Value;
213 bits<3> ImmTypeBits = ImmT.Value;
205214
206215 dag OperandList = ops;
207216 string AsmString = AsmStr;
209218 //
210219 // Attributes specific to X86 instructions...
211220 //
212 bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
221 bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
222 bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
213223
214224 bits<4> Prefix = 0; // Which prefix byte does this inst have?
225 bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
215226 FPFormat FPForm; // What flavor of FP instruction is this?
216227 bits<3> FPFormBits = 0;
217228 }
225236 // Prefix byte classes which are used to indicate to the ad-hoc machine code
226237 // emitter that various prefix bytes are required.
227238 class OpSize { bit hasOpSizePrefix = 1; }
239 class AdSize { bit hasAdSizePrefix = 1; }
240 class REX_W { bit hasREX_WPrefix = 1; }
228241 class TB { bits<4> Prefix = 1; }
229242 class REP { bits<4> Prefix = 2; }
230243 class D8 { bits<4> Prefix = 3; }
275288 }]>;
276289
277290 // Helper fragments for loads.
278 def loadiPTR : PatFrag<(ops node:$ptr), (iPTR (load node:$ptr))>;
279
280291 def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
281292 def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>;
282293 def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>;
307318
308319 //===----------------------------------------------------------------------===//
309320 // Instruction templates...
321 //
310322
311323 class I o, Format f, dag ops, string asm, list pattern>
312324 : X86Inst {
354366 def NOOP : I<0x90, RawFrm, (ops), "nop", []>;
355367
356368 // Truncate
357 def TRUNC_GR32_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
358 "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
359 def TRUNC_GR16_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
360 "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
361 def TRUNC_GR32_GR16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
362 "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
363 [(set GR16:$dst, (trunc GR32:$src))]>;
369 def TRUNC_32_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
370 "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
371 def TRUNC_16_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
372 "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
373 def TRUNC_32to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
374 "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
375 [(set GR16:$dst, (trunc GR32:$src))]>;
364376
365377 //===----------------------------------------------------------------------===//
366378 // Control Flow Instructions...
387399 def JMP32r : I<0xFF, MRM4r, (ops GR32:$dst), "jmp{l} {*}$dst",
388400 [(brind GR32:$dst)]>;
389401 def JMP32m : I<0xFF, MRM4m, (ops i32mem:$dst), "jmp{l} {*}$dst",
390 [(brind (loadiPTR addr:$dst))]>;
402 [(brind (loadi32 addr:$dst))]>;
391403 }
392404
393405 // Conditional branches
509521 (ops GR16:$dst, i32mem:$src),
510522 "lea{w} {$src|$dst}, {$dst|$src}", []>, OpSize;
511523 def LEA32r : I<0x8D, MRMSrcMem,
512 (ops GR32:$dst, i32mem:$src),
524 (ops GR32:$dst, lea32mem:$src),
513525 "lea{l} {$src|$dst}, {$dst|$src}",
514 [(set GR32:$dst, leaaddr:$src)]>;
526 [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
515527
516528 def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}",
517529 [(X86rep_movs i8)]>,
11001112 [(set GR8:$dst, (add GR8:$src, 1))]>;
11011113 let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
11021114 def INC16r : I<0x40, AddRegFrm, (ops GR16:$dst, GR16:$src), "inc{w} $dst",
1103 [(set GR16:$dst, (add GR16:$src, 1))]>, OpSize;
1115 [(set GR16:$dst, (add GR16:$src, 1))]>,
1116 OpSize, Requires<[In32BitMode]>;
11041117 def INC32r : I<0x40, AddRegFrm, (ops GR32:$dst, GR32:$src), "inc{l} $dst",
1105 [(set GR32:$dst, (add GR32:$src, 1))]>;
1118 [(set GR32:$dst, (add GR32:$src, 1))]>, Requires<[In32BitMode]>;
11061119 }
11071120 let isTwoAddress = 0, CodeSize = 2 in {
11081121 def INC8m : I<0xFE, MRM0m, (ops i8mem :$dst), "inc{b} $dst",
11181131 [(set GR8:$dst, (add GR8:$src, -1))]>;
11191132 let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
11201133 def DEC16r : I<0x48, AddRegFrm, (ops GR16:$dst, GR16:$src), "dec{w} $dst",
1121 [(set GR16:$dst, (add GR16:$src, -1))]>, OpSize;
1134 [(set GR16:$dst, (add GR16:$src, -1))]>,
1135 OpSize, Requires<[In32BitMode]>;
11221136 def DEC32r : I<0x48, AddRegFrm, (ops GR32:$dst, GR32:$src), "dec{l} $dst",
1123 [(set GR32:$dst, (add GR32:$src, -1))]>;
1137 [(set GR32:$dst, (add GR32:$src, -1))]>, Requires<[In32BitMode]>;
11241138 }
11251139
11261140 let isTwoAddress = 0, CodeSize = 2 in {
24542468 // Non-Instruction Patterns
24552469 //===----------------------------------------------------------------------===//
24562470
2457 // ConstantPool GlobalAddress, ExternalSymbol
2471 // ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
24582472 def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
24592473 def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
24602474 def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
24762490
24772491 // Calls
24782492 def : Pat<(X86tailcall GR32:$dst),
2479 (CALL32r GR32:$dst)>;
2480
2481 def : Pat<(X86tailcall tglobaladdr:$dst),
2493 (CALL32r GR32:$dst)>;
2494
2495 def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
24822496 (CALLpcrel32 tglobaladdr:$dst)>;
2483 def : Pat<(X86tailcall texternalsym:$dst),
2497 def : Pat<(X86tailcall (i32 texternalsym:$dst)),
24842498 (CALLpcrel32 texternalsym:$dst)>;
24852499
2486
2487
2488 def : Pat<(X86call tglobaladdr:$dst),
2500 def : Pat<(X86call (i32 tglobaladdr:$dst)),
24892501 (CALLpcrel32 tglobaladdr:$dst)>;
2490 def : Pat<(X86call texternalsym:$dst),
2502 def : Pat<(X86call (i32 texternalsym:$dst)),
24912503 (CALLpcrel32 texternalsym:$dst)>;
24922504
24932505 // X86 specific add which produces a flag.
26102622 //===----------------------------------------------------------------------===//
26112623
26122624 include "X86InstrSSE.td"
2625
2626 //===----------------------------------------------------------------------===//
2627 // X86-64 Support
2628 //===----------------------------------------------------------------------===//
2629
2630 include "X86InstrX86-64.td"
0 //====- X86InstrX86-64.td - Describe the X86 Instruction Set ----*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file was developed by the Evan Cheng and is distributed under
5 // the University of Illinois Open Source License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86-64 instruction set, defining the instructions,
10 // and properties of the instructions which are needed for code generation,
11 // machine code emission, and analysis.
12 //
13 //===----------------------------------------------------------------------===//
14
15 //===----------------------------------------------------------------------===//
16 // Operand Definitions...
17 //
18
19 // 64-bits but only 32 bits are significant.
20 def i64i32imm : Operand;
21 // 64-bits but only 8 bits are significant.
22 def i64i8imm : Operand;
23
24 def lea64mem : Operand {
25 let PrintMethod = "printi64mem";
26 let NumMIOperands = 4;
27 let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm);
28 }
29
30 def lea64_32mem : Operand {
31 let PrintMethod = "printlea64_32mem";
32 let NumMIOperands = 4;
33 let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
34 }
35
36 //===----------------------------------------------------------------------===//
37 // Complex Pattern Definitions...
38 //
39 def lea64addr : ComplexPattern
40 [add, mul, shl, or, frameindex, X86Wrapper]>;
41
42 //===----------------------------------------------------------------------===//
43 // Instruction templates...
44 //
45
46 class RI o, Format F, dag ops, string asm, list pattern>
47 : I, REX_W;
48 class RIi8 o, Format F, dag ops, string asm, list pattern>
49 : Ii8, REX_W;
50 class RIi32 o, Format F, dag ops, string asm, list pattern>
51 : Ii32, REX_W;
52
53 class RIi64 o, Format f, dag ops, string asm, list pattern>
54 : X86Inst, REX_W {
55 let Pattern = pattern;
56 let CodeSize = 3;
57 }
58
59 class RSSI o, Format F, dag ops, string asm, list pattern>
60 : SSI, REX_W;
61 class RSDI o, Format F, dag ops, string asm, list pattern>
62 : SDI, REX_W;
63
64 //===----------------------------------------------------------------------===//
65 // Pattern fragments...
66 //
67
68 def i64immSExt32 : PatLeaf<(i64 imm), [{
69 // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
70 // sign extended field.
71 return (int64_t)N->getValue() == (int32_t)N->getValue();
72 }]>;
73
74 def i64immZExt32 : PatLeaf<(i64 imm), [{
75 // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
76 // unsignedsign extended field.
77 return (uint64_t)N->getValue() == (uint32_t)N->getValue();
78 }]>;
79
80 def i64immSExt8 : PatLeaf<(i64 imm), [{
81 // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
82 // sign extended field.
83 return (int64_t)N->getValue() == (int8_t)N->getValue();
84 }]>;
85
86 def sextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i1))>;
87 def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i8))>;
88 def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i16))>;
89 def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i32))>;
90
91 def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i1))>;
92 def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i8))>;
93 def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i16))>;
94 def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i32))>;
95
96 def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i1))>;
97 def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i8))>;
98 def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i16))>;
99 def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i32))>;
100
101 //===----------------------------------------------------------------------===//
102 // Instruction list...
103 //
104
105 def IMPLICIT_DEF_GR64 : I<0, Pseudo, (ops GR64:$dst),
106 "#IMPLICIT_DEF $dst",
107 [(set GR64:$dst, (undef))]>;
108
109 //===----------------------------------------------------------------------===//
110 // Call Instructions...
111 //
112 let isCall = 1, noResults = 1 in
113 // All calls clobber the non-callee saved registers...
114 let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
115 FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
116 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
117 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15] in {
118 def CALL64pcrel32 : I<0xE8, RawFrm, (ops i64imm:$dst, variable_ops),
119 "call ${dst:call}", []>;
120 def CALL64r : I<0xFF, MRM2r, (ops GR64:$dst, variable_ops),
121 "call {*}$dst", [(X86call GR64:$dst)]>;
122 def CALL64m : I<0xFF, MRM2m, (ops i64mem:$dst, variable_ops),
123 "call {*}$dst", []>;
124 }
125
126 // Branches
127 let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in {
128 def JMP64r : I<0xFF, MRM4r, (ops GR64:$dst), "jmp{q} {*}$dst",
129 [(brind GR64:$dst)]>;
130 def JMP64m : I<0xFF, MRM4m, (ops i64mem:$dst), "jmp{q} {*}$dst",
131 [(brind (loadi64 addr:$dst))]>;
132 }
133
134 //===----------------------------------------------------------------------===//
135 // Miscellaneous Instructions...
136 //
137 def LEAVE64 : I<0xC9, RawFrm,
138 (ops), "leave", []>, Imp<[RBP,RSP],[RBP,RSP]>;
139 def POP64r : I<0x58, AddRegFrm,
140 (ops GR64:$reg), "pop{q} $reg", []>, Imp<[RSP],[RSP]>;
141
142 def LEA64_32r : I<0x8D, MRMSrcMem,
143 (ops GR32:$dst, lea64_32mem:$src),
144 "lea{l} {$src|$dst}, {$dst|$src}",
145 [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
146
147 def LEA64r : RI<0x8D, MRMSrcMem, (ops GR64:$dst, lea64mem:$src),
148 "lea{q} {$src|$dst}, {$dst|$src}",
149 [(set GR64:$dst, lea64addr:$src)]>;
150
151 let isTwoAddress = 1 in
152 def BSWAP64r : RI<0xC8, AddRegFrm, (ops GR64:$dst, GR64:$src),
153 "bswap{q} $dst",
154 [(set GR64:$dst, (bswap GR64:$src))]>, TB;
155 // Exchange
156