llvm.org GIT mirror llvm / 832e494
Implement support for x86 fastisel of small fixed-sized memcpys, which are generated en-mass for C++ PODs. On my c++ test file, this cuts the fast isel rejects by 10x and shrinks the generated .s file by 5% git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129755 91177308-0d34-0410-b5e6-96231b3b80d8 Chris Lattner 9 years ago
3 changed file(s) with 106 addition(s) and 58 deletion(s). Raw diff Collapse all Expand all
485485 if (!F) return false;
486486
487487 // Handle selected intrinsic function calls.
488 unsigned IID = F->getIntrinsicID();
489 switch (IID) {
488 switch (F->getIntrinsicID()) {
490489 default: break;
491490 case Intrinsic::dbg_declare: {
492491 const DbgDeclareInst *DI = cast(I);
551550 }
552551 case Intrinsic::eh_exception: {
553552 EVT VT = TLI.getValueType(I->getType());
554 switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
555 default: break;
556 case TargetLowering::Expand: {
557 assert(FuncInfo.MBB->isLandingPad() &&
558 "Call to eh.exception not in landing pad!");
559 unsigned Reg = TLI.getExceptionAddressRegister();
560 const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
561 unsigned ResultReg = createResultReg(RC);
562 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
563 ResultReg).addReg(Reg);
564 UpdateValueMap(I, ResultReg);
565 return true;
566 }
567 }
568 break;
553 if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
554 break;
555
556 assert(FuncInfo.MBB->isLandingPad() &&
557 "Call to eh.exception not in landing pad!");
558 unsigned Reg = TLI.getExceptionAddressRegister();
559 const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
560 unsigned ResultReg = createResultReg(RC);
561 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
562 ResultReg).addReg(Reg);
563 UpdateValueMap(I, ResultReg);
564 return true;
569565 }
570566 case Intrinsic::eh_selector: {
571567 EVT VT = TLI.getValueType(I->getType());
572 switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
573 default: break;
574 case TargetLowering::Expand: {
575 if (FuncInfo.MBB->isLandingPad())
576 AddCatchInfo(*cast(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
577 else {
568 if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
569 break;
570 if (FuncInfo.MBB->isLandingPad())
571 AddCatchInfo(*cast(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
572 else {
578573 #ifndef NDEBUG
579 FuncInfo.CatchInfoLost.insert(cast(I));
574 FuncInfo.CatchInfoLost.insert(cast(I));
580575 #endif
581 // FIXME: Mark exception selector register as live in. Hack for PR1508.
582 unsigned Reg = TLI.getExceptionSelectorRegister();
583 if (Reg) FuncInfo.MBB->addLiveIn(Reg);
584 }
585
576 // FIXME: Mark exception selector register as live in. Hack for PR1508.
586577 unsigned Reg = TLI.getExceptionSelectorRegister();
587 EVT SrcVT = TLI.getPointerTy();
588 const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
589 unsigned ResultReg = createResultReg(RC);
590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
591 ResultReg).addReg(Reg);
592
593 bool ResultRegIsKill = hasTrivialKill(I);
594
595 // Cast the register to the type of the selector.
596 if (SrcVT.bitsGT(MVT::i32))
597 ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
598 ResultReg, ResultRegIsKill);
599 else if (SrcVT.bitsLT(MVT::i32))
600 ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
601 ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
602 if (ResultReg == 0)
603 // Unhandled operand. Halt "fast" selection and bail.
604 return false;
605
606 UpdateValueMap(I, ResultReg);
607
608 return true;
578 if (Reg) FuncInfo.MBB->addLiveIn(Reg);
609579 }
610 }
611 break;
580
581 unsigned Reg = TLI.getExceptionSelectorRegister();
582 EVT SrcVT = TLI.getPointerTy();
583 const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
584 unsigned ResultReg = createResultReg(RC);
585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
586 ResultReg).addReg(Reg);
587
588 bool ResultRegIsKill = hasTrivialKill(I);
589
590 // Cast the register to the type of the selector.
591 if (SrcVT.bitsGT(MVT::i32))
592 ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
593 ResultReg, ResultRegIsKill);
594 else if (SrcVT.bitsLT(MVT::i32))
595 ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
596 ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
597 if (ResultReg == 0)
598 // Unhandled operand. Halt "fast" selection and bail.
599 return false;
600
601 UpdateValueMap(I, ResultReg);
602
603 return true;
612604 }
613605 }
614606
13241324 // FIXME: Handle more intrinsics.
13251325 switch (I.getIntrinsicID()) {
13261326 default: return false;
1327 case Intrinsic::memcpy: {
1328 const MemCpyInst &MCI = cast(I);
1329 // Don't handle volatile or variable length memcpys.
1330 if (MCI.isVolatile() || !isa(MCI.getLength()))
1331 return false;
1332
1333 // Don't inline super long memcpys. We could lower these to a memcpy call,
1334 // but we might as well bail out.
1335 uint64_t Len = cast(MCI.getLength())->getZExtValue();
1336 bool i64Legal = TLI.isTypeLegal(MVT::i64);
1337 if (Len > (i64Legal ? 32 : 16)) return false;
1338
1339 // Get the address of the dest and source addresses.
1340 X86AddressMode DestAM, SrcAM;
1341 if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
1342 !X86SelectAddress(MCI.getRawSource(), SrcAM))
1343 return false;
1344
1345 // We don't care about alignment here since we just emit integer accesses.
1346 while (Len) {
1347 MVT VT;
1348 if (Len >= 8 && i64Legal)
1349 VT = MVT::i64;
1350 else if (Len >= 4)
1351 VT = MVT::i32;
1352 else if (Len >= 2)
1353 VT = MVT::i16;
1354 else {
1355 assert(Len == 1);
1356 VT = MVT::i8;
1357 }
1358
1359 unsigned Reg;
1360 bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
1361 RV &= X86FastEmitStore(VT, Reg, DestAM);
1362 assert(RV && "Failed to emit load or store??");
1363
1364 unsigned Size = VT.getSizeInBits()/8;
1365 Len -= Size;
1366 DestAM.Disp += Size;
1367 SrcAM.Disp += Size;
1368 }
1369
1370 return true;
1371 }
1372
13271373 case Intrinsic::stackprotector: {
13281374 // Emit code inline code to store the stack guard onto the stack.
13291375 EVT PtrTy = TLI.getPointerTy();
13341380 // Grab the frame index.
13351381 X86AddressMode AM;
13361382 if (!X86SelectAddress(Slot, AM)) return false;
1337
13381383 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
1339
13401384 return true;
13411385 }
13421386 case Intrinsic::objectsize: {
1343 ConstantInt *CI = dyn_cast(I.getArgOperand(1));
1387 // FIXME: This should be moved to generic code!
1388 ConstantInt *CI = cast(I.getArgOperand(1));
13441389 const Type *Ty = I.getCalledFunction()->getReturnType();
1345
1346 assert(CI && "Non-constant type in Intrinsic::objectsize?");
13471390
13481391 MVT VT;
13491392 if (!isTypeLegal(Ty, VT))
13821425 }
13831426 case Intrinsic::sadd_with_overflow:
13841427 case Intrinsic::uadd_with_overflow: {
1428 // FIXME: Should fold immediates.
1429
13851430 // Replace "add with overflow" intrinsics with an "add" instruction followed
13861431 // by a seto/setc instruction. Later on, when the "extractvalue"
13871432 // instructions are encountered, we use the fact that two registers were
169169 ; CHECK: callq
170170 }
171171
172 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
173
174 ; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy
175 define void @test15(i8* %a, i8* %b) nounwind {
176 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 4, i32 4, i1 false)
177 ret void
178 ; CHECK: test15:
179 ; CHECK-NEXT: movl (%rsi), %eax
180 ; CHECK-NEXT: movl %eax, (%rdi)
181 ; CHECK-NEXT: ret
182 }