llvm.org GIT mirror llvm / e8b4a4a
Revert r160920 and r160919 due to dragonegg and clang selfhost failure git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160927 91177308-0d34-0410-b5e6-96231b3b80d8 Manman Ren 7 years ago
10 changed file(s) with 54 addition(s) and 171 deletion(s). Raw diff Collapse all Expand all
1313 #ifndef LLVM_TARGET_TARGETINSTRINFO_H
1414 #define LLVM_TARGET_TARGETINSTRINFO_H
1515
16 #include "llvm/ADT/SmallSet.h"
1716 #include "llvm/MC/MCInstrInfo.h"
1817 #include "llvm/CodeGen/DFAPacketizer.h"
1918 #include "llvm/CodeGen/MachineFunction.h"
691690 int Mask, int Value,
692691 const MachineRegisterInfo *MRI) const {
693692 return false;
694 }
695
696 /// optimizeLoadInstr - Try to remove the load by folding it to a register
697 /// operand at the use. We fold the load instructions if and only if the
698 /// def and use are in the same BB.
699 virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
700 const MachineRegisterInfo *MRI,
701 SmallSet &FoldAsLoadDefRegs,
702 MachineInstr *&DefMI) const {
703 return 0;
704693 }
705694
706695 /// FoldImmediate - 'Reg' is known to be defined by a move immediate
7777 STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
7878 STATISTIC(NumCmps, "Number of compares eliminated");
7979 STATISTIC(NumImmFold, "Number of move immediate folded");
80 STATISTIC(NumLoadFold, "Number of loads folded");
8180
8281 namespace {
8382 class PeepholeOptimizer : public MachineFunctionPass {
441440 SmallPtrSet LocalMIs;
442441 SmallSet ImmDefRegs;
443442 DenseMap ImmDefMIs;
444 SmallSet FoldAsLoadDefRegs;
445443 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
446444 MachineBasicBlock *MBB = &*I;
447445
449447 LocalMIs.clear();
450448 ImmDefRegs.clear();
451449 ImmDefMIs.clear();
452 FoldAsLoadDefRegs.clear();
453450
454451 bool First = true;
455452 MachineBasicBlock::iterator PMII;
491488 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
492489 }
493490
494 MachineInstr *DefMI = 0;
495 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefRegs,
496 DefMI);
497 if (FoldMI) {
498 // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
499 LocalMIs.erase(MI);
500 LocalMIs.erase(DefMI);
501 LocalMIs.insert(FoldMI);
502 MI->eraseFromParent();
503 DefMI->eraseFromParent();
504 ++NumLoadFold;
505
506 // MI is replaced with FoldMI.
507 Changed = true;
508 PMII = FoldMI;
509 MII = llvm::next(PMII);
510 continue;
511 }
512
513491 First = false;
514492 PMII = MII;
515493 ++MII;
33223322 return true;
33233323 }
33243324
3325 /// optimizeLoadInstr - Try to remove the load by folding it to a register
3326 /// operand at the use. We fold the load instructions if and only if the
3327 /// def and use are in the same BB.
3328 MachineInstr* X86InstrInfo::
3329 optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
3330 SmallSet &FoldAsLoadDefRegs,
3331 MachineInstr *&DefMI) const {
3332 if (MI->mayStore() || MI->isCall())
3333 // To be conservative, we don't fold the loads if there is a store in
3334 // between.
3335 FoldAsLoadDefRegs.clear();
3336 // We only fold loads to a virtual register.
3337 if (MI->canFoldAsLoad()) {
3338 const MCInstrDesc &MCID = MI->getDesc();
3339 if (MCID.getNumDefs() == 1) {
3340 unsigned Reg = MI->getOperand(0).getReg();
3341 // To reduce compilation time, we check MRI->hasOneUse when inserting
3342 // loads. It should be checked when processing uses of the load, since
3343 // uses can be removed during peephole.
3344 if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->hasOneUse(Reg)) {
3345 FoldAsLoadDefRegs.insert(Reg);
3346 return 0;
3347 }
3348 }
3349 }
3350
3351 // Collect information about virtual register operands of MI.
3352 DenseMap SrcVirtualRegToOp;
3353 SmallSet DstVirtualRegs;
3354 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
3355 MachineOperand &MO = MI->getOperand(i);
3356 if (!MO.isReg())
3357 continue;
3358 unsigned Reg = MO.getReg();
3359 if (!TargetRegisterInfo::isVirtualRegister(Reg))
3360 continue;
3361 if (MO.isDef())
3362 DstVirtualRegs.insert(Reg);
3363 else if (FoldAsLoadDefRegs.count(Reg)) {
3364 // Only handle the case where Reg is used in a single src operand.
3365 if (SrcVirtualRegToOp.find(Reg) != SrcVirtualRegToOp.end())
3366 SrcVirtualRegToOp.erase(Reg);
3367 else
3368 SrcVirtualRegToOp.insert(std::make_pair(Reg, i));
3369 }
3370 }
3371
3372 for (DenseMap::iterator SI = SrcVirtualRegToOp.begin(),
3373 SE = SrcVirtualRegToOp.end(); SI != SE; SI++) {
3374 // If the virtual register is updated by MI, we can't fold the load.
3375 if (DstVirtualRegs.count(SI->first)) continue;
3376
3377 // Check whether we can fold the def into this operand.
3378 DefMI = MRI->getVRegDef(SI->first);
3379 assert(DefMI);
3380 bool SawStore = false;
3381 if (!DefMI->isSafeToMove(this, 0, SawStore))
3382 continue;
3383
3384 SmallVector Ops;
3385 Ops.push_back(SI->second);
3386 MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
3387 if (!FoldMI) continue;
3388 FoldAsLoadDefRegs.erase(SI->first);
3389 return FoldMI;
3390 }
3391 return 0;
3392 }
3393
33943325 /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
33953326 /// instruction with two undef reads of the register being defined. This is
33963327 /// used for mapping:
386386 unsigned SrcReg2, int CmpMask, int CmpValue,
387387 const MachineRegisterInfo *MRI) const;
388388
389 /// optimizeLoadInstr - Try to remove the load by folding it to a register
390 /// operand at the use. We fold the load instructions if and only if the
391 /// def and use are in the same BB.
392 virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
393 const MachineRegisterInfo *MRI,
394 SmallSet &FoldAsLoadDefRegs,
395 MachineInstr *&DefMI) const;
396
397389 private:
398390 MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
399391 MachineFunction::iterator &MFI,
22 define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
33 entry:
44 ; CHECK: vmovaps
5 ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
5 ; CHECK: vmovaps
6 ; CHECK: vinsertf128
67 ; CHECK: vmovups
78 %A = load <4 x i32>* %Ap
89 %B = load <4 x i32>* %Bp
3333 define double @squirt(double* %x) nounwind {
3434 entry:
3535 ; CHECK: squirt:
36 ; CHECK: sqrtsd ([[A0]]), %xmm0
36 ; CHECK: movsd ([[A0]]), %xmm0
37 ; CHECK: sqrtsd %xmm0, %xmm0
3738 %z = load double* %x
3839 %t = call double @llvm.sqrt.f64(double %z)
3940 ret double %t
4444
4545 }
4646
47 ; rdar://10554090
48 ; xor in exit block will be CSE'ed and load will be folded to xor in entry.
49 define i1 @test3(i32* %P, i32* %Q) nounwind {
50 ; CHECK: test3:
51 ; CHECK: movl 8(%esp), %eax
52 ; CHECK: xorl (%eax),
53 ; CHECK: j
54 ; CHECK-NOT: xor
55 entry:
56 %0 = load i32* %P, align 4
57 %1 = load i32* %Q, align 4
58 %2 = xor i32 %0, %1
59 %3 = and i32 %2, 65535
60 %4 = icmp eq i32 %3, 0
61 br i1 %4, label %exit, label %land.end
62
63 exit:
64 %shr.i.i19 = xor i32 %1, %0
65 %5 = and i32 %shr.i.i19, 2147418112
66 %6 = icmp eq i32 %5, 0
67 br label %land.end
68
69 land.end:
70 %7 = phi i1 [ %6, %exit ], [ false, %entry ]
71 ret i1 %7
72 }
None ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
1 ; RUN: grep pcmpeqd %t | count 1
2 ; RUN: grep xor %t | count 1
3 ; RUN: not grep LCP %t
14
25 define <2 x double> @foo() nounwind {
36 ret <2 x double> bitcast (<2 x i64> to <2 x double>)
4 ; CHECK: foo:
5 ; CHECK: pcmpeqd %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
6 ; CHECK-NEXT: ret
77 }
88 define <2 x double> @bar() nounwind {
99 ret <2 x double> bitcast (<2 x i64> to <2 x double>)
10 ; CHECK: bar:
11 ; CHECK: xorps %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
12 ; CHECK-NEXT: ret
1310 }
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=nehalem -asm-verbose=false | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
0 ; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false | FileCheck %s
1 ; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
2 ; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
33
44 ; Some of these patterns can be matched as SSE min or max. Some of
55 ; then can be matched provided that the operands are swapped.
136136 }
137137
138138 ; CHECK: ogt_x:
139 ; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
139 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
140 ; CHECK-NEXT: maxsd %xmm1, %xmm0
140141 ; CHECK-NEXT: ret
141142 ; UNSAFE: ogt_x:
142 ; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
143 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
144 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
143145 ; UNSAFE-NEXT: ret
144146 ; FINITE: ogt_x:
145 ; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
147 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
148 ; FINITE-NEXT: maxsd %xmm1, %xmm0
146149 ; FINITE-NEXT: ret
147150 define double @ogt_x(double %x) nounwind {
148151 %c = fcmp ogt double %x, 0.000000e+00
151154 }
152155
153156 ; CHECK: olt_x:
154 ; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
157 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
158 ; CHECK-NEXT: minsd %xmm1, %xmm0
155159 ; CHECK-NEXT: ret
156160 ; UNSAFE: olt_x:
157 ; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
161 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
162 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
158163 ; UNSAFE-NEXT: ret
159164 ; FINITE: olt_x:
160 ; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
165 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
166 ; FINITE-NEXT: minsd %xmm1, %xmm0
161167 ; FINITE-NEXT: ret
162168 define double @olt_x(double %x) nounwind {
163169 %c = fcmp olt double %x, 0.000000e+00
210216 ; CHECK: oge_x:
211217 ; CHECK: ucomisd %xmm1, %xmm0
212218 ; UNSAFE: oge_x:
213 ; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
219 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
220 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
214221 ; UNSAFE-NEXT: ret
215222 ; FINITE: oge_x:
216 ; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
223 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
224 ; FINITE-NEXT: maxsd %xmm1, %xmm0
217225 ; FINITE-NEXT: ret
218226 define double @oge_x(double %x) nounwind {
219227 %c = fcmp oge double %x, 0.000000e+00
224232 ; CHECK: ole_x:
225233 ; CHECK: ucomisd %xmm0, %xmm1
226234 ; UNSAFE: ole_x:
227 ; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
235 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
236 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
228237 ; UNSAFE-NEXT: ret
229238 ; FINITE: ole_x:
230 ; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
239 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
240 ; FINITE-NEXT: minsd %xmm1, %xmm0
231241 ; FINITE-NEXT: ret
232242 define double @ole_x(double %x) nounwind {
233243 %c = fcmp ole double %x, 0.000000e+00
400410 ; CHECK: ugt_x:
401411 ; CHECK: ucomisd %xmm0, %xmm1
402412 ; UNSAFE: ugt_x:
403 ; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
413 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
414 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
404415 ; UNSAFE-NEXT: ret
405416 ; FINITE: ugt_x:
406 ; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
417 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
418 ; FINITE-NEXT: maxsd %xmm1, %xmm0
407419 ; FINITE-NEXT: ret
408420 define double @ugt_x(double %x) nounwind {
409421 %c = fcmp ugt double %x, 0.000000e+00
414426 ; CHECK: ult_x:
415427 ; CHECK: ucomisd %xmm1, %xmm0
416428 ; UNSAFE: ult_x:
417 ; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
429 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
430 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
418431 ; UNSAFE-NEXT: ret
419432 ; FINITE: ult_x:
420 ; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
433 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
434 ; FINITE-NEXT: minsd %xmm1, %xmm0
421435 ; FINITE-NEXT: ret
422436 define double @ult_x(double %x) nounwind {
423437 %c = fcmp ult double %x, 0.000000e+00
467481 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
468482 ; CHECK-NEXT: ret
469483 ; UNSAFE: uge_x:
470 ; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
484 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
485 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
471486 ; UNSAFE-NEXT: ret
472487 ; FINITE: uge_x:
473 ; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
488 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
489 ; FINITE-NEXT: maxsd %xmm1, %xmm0
474490 ; FINITE-NEXT: ret
475491 define double @uge_x(double %x) nounwind {
476492 %c = fcmp uge double %x, 0.000000e+00
484500 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
485501 ; CHECK-NEXT: ret
486502 ; UNSAFE: ule_x:
487 ; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
503 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
504 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
488505 ; UNSAFE-NEXT: ret
489506 ; FINITE: ule_x:
490 ; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
507 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
508 ; FINITE-NEXT: minsd %xmm1, %xmm0
491509 ; FINITE-NEXT: ret
492510 define double @ule_x(double %x) nounwind {
493511 %c = fcmp ule double %x, 0.000000e+00
496514 }
497515
498516 ; CHECK: uge_inverse_x:
499 ; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
517 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
518 ; CHECK-NEXT: minsd %xmm1, %xmm0
500519 ; CHECK-NEXT: ret
501520 ; UNSAFE: uge_inverse_x:
502521 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
515534 }
516535
517536 ; CHECK: ule_inverse_x:
518 ; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
537 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
538 ; CHECK-NEXT: maxsd %xmm1, %xmm0
519539 ; CHECK-NEXT: ret
520540 ; UNSAFE: ule_inverse_x:
521541 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
None ; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i386-apple-darwin | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
11
22
33 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
1313 define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
1414 ; CHECK: test2:
1515 ; CHECK: pcmp
16 ; CHECK: pxor LCP
17 ; CHECK: movdqa
16 ; CHECK: pcmp
17 ; CHECK: pxor
1818 ; CHECK: ret
1919 %C = icmp sge <4 x i32> %A, %B
2020 %D = sext <4 x i1> %C to <4 x i32>