llvm.org GIT mirror llvm / 055d207
[PowerPC] More fast-isel chunks (returns and integer extends) Incremental improvement to fast-isel for PPC64. This allows us to select on ret, sext, and zext. Filling in sext/zext improves some of the existing logic in handling compare-immediates that needed extends. A simplified return convention for fast-isel is also added to the PPC64 calling conventions. All call/return processing for DAG selection is handled with custom code, so there isn't an existing CC to rely on here. The include of PPCGenCallingConv.inc causes compiler warnings due to the 32-bit calling conventions that are not used, so the dummy function "usePPC32CCs()" is added here to silence those. Test cases for the return and extend logic are added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189266 91177308-0d34-0410-b5e6-96231b3b80d8 Bill Schmidt 6 years ago
5 changed file(s) with 472 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
3535 CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
3636 ]>;
3737
38
39 // Note that we don't currently have calling conventions for 64-bit
40 // PowerPC, but handle all the complexities of the ABI in the lowering
41 // logic. FIXME: See if the logic can be simplified with use of CCs.
42 // This may require some extensions to current table generation.
43
44 // Simple return-value convention for 64-bit ELF PowerPC fast isel.
45 // All small ints are promoted to i64. Vector types, quadword ints,
46 // and multiple register returns are "supported" to avoid compile
47 // errors, but none are handled by the fast selector.
48 def RetCC_PPC64_ELF_FIS : CallingConv<[
49 CCIfType<[i8], CCPromoteToType>,
50 CCIfType<[i16], CCPromoteToType>,
51 CCIfType<[i32], CCPromoteToType>,
52 CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
53 CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
54 CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
55 CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
56 CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
57 ]>;
3858
3959 //===----------------------------------------------------------------------===//
4060 // PowerPC System V Release 4 32-bit ABI
9494 private:
9595 bool SelectBranch(const Instruction *I);
9696 bool SelectIndirectBr(const Instruction *I);
97 bool SelectRet(const Instruction *I);
98 bool SelectIntExt(const Instruction *I);
9799
98100 // Utility routines.
99101 private:
108110 unsigned PPCMaterialize64BitInt(int64_t Imm,
109111 const TargetRegisterClass *RC);
110112
113 // Call handling routines.
114 private:
115 CCAssignFn *usePPC32CCs(unsigned Flag);
116
111117 private:
112118 #include "PPCGenFastISel.inc"
113119
114120 };
115121
116122 } // end anonymous namespace
123
124 #include "PPCGenCallingConv.inc"
125
126 // Function whose sole purpose is to kill compiler warnings
127 // stemming from unused functions included from PPCGenCallingConv.inc.
128 CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
129 if (Flag == 1)
130 return CC_PPC32_SVR4;
131 else if (Flag == 2)
132 return CC_PPC32_SVR4_ByVal;
133 else if (Flag == 3)
134 return CC_PPC32_SVR4_VarArg;
135 else
136 return RetCC_PPC;
137 }
117138
118139 static Optional getComparePred(CmpInst::Predicate Pred) {
119140 switch (Pred) {
308329 return true;
309330 }
310331
332 // Attempt to fast-select a return instruction.
333 bool PPCFastISel::SelectRet(const Instruction *I) {
334
335 if (!FuncInfo.CanLowerReturn)
336 return false;
337
338 const ReturnInst *Ret = cast(I);
339 const Function &F = *I->getParent()->getParent();
340
341 // Build a list of return value registers.
342 SmallVector RetRegs;
343 CallingConv::ID CC = F.getCallingConv();
344
345 if (Ret->getNumOperands() > 0) {
346 SmallVector Outs;
347 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
348
349 // Analyze operands of the call, assigning locations to each operand.
350 SmallVector ValLocs;
351 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
352 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
353 const Value *RV = Ret->getOperand(0);
354
355 // FIXME: Only one output register for now.
356 if (ValLocs.size() > 1)
357 return false;
358
359 // Special case for returning a constant integer of any size.
360 // Materialize the constant as an i64 and copy it to the return
361 // register. This avoids an unnecessary extend or truncate.
362 if (isa(*RV)) {
363 const Constant *C = cast(RV);
364 unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
365 unsigned RetReg = ValLocs[0].getLocReg();
366 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
367 RetReg).addReg(SrcReg);
368 RetRegs.push_back(RetReg);
369
370 } else {
371 unsigned Reg = getRegForValue(RV);
372
373 if (Reg == 0)
374 return false;
375
376 // Copy the result values into the output registers.
377 for (unsigned i = 0; i < ValLocs.size(); ++i) {
378
379 CCValAssign &VA = ValLocs[i];
380 assert(VA.isRegLoc() && "Can only return in registers!");
381 RetRegs.push_back(VA.getLocReg());
382 unsigned SrcReg = Reg + VA.getValNo();
383
384 EVT RVEVT = TLI.getValueType(RV->getType());
385 if (!RVEVT.isSimple())
386 return false;
387 MVT RVVT = RVEVT.getSimpleVT();
388 MVT DestVT = VA.getLocVT();
389
390 if (RVVT != DestVT && RVVT != MVT::i8 &&
391 RVVT != MVT::i16 && RVVT != MVT::i32)
392 return false;
393
394 if (RVVT != DestVT) {
395 switch (VA.getLocInfo()) {
396 default:
397 llvm_unreachable("Unknown loc info!");
398 case CCValAssign::Full:
399 llvm_unreachable("Full value assign but types don't match?");
400 case CCValAssign::AExt:
401 case CCValAssign::ZExt: {
402 const TargetRegisterClass *RC =
403 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
404 unsigned TmpReg = createResultReg(RC);
405 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
406 return false;
407 SrcReg = TmpReg;
408 break;
409 }
410 case CCValAssign::SExt: {
411 const TargetRegisterClass *RC =
412 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
413 unsigned TmpReg = createResultReg(RC);
414 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
415 return false;
416 SrcReg = TmpReg;
417 break;
418 }
419 }
420 }
421
422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
423 TII.get(TargetOpcode::COPY), RetRegs[i])
424 .addReg(SrcReg);
425 }
426 }
427 }
428
429 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
430 TII.get(PPC::BLR));
431
432 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
433 MIB.addReg(RetRegs[i], RegState::Implicit);
434
435 return true;
436 }
437
311438 // Attempt to emit an integer extend of SrcReg into DestReg. Both
312439 // signed and zero extensions are supported. Return false if we
313 // can't handle it. Not yet implemented.
440 // can't handle it.
314441 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
315442 unsigned DestReg, bool IsZExt) {
316 return (SrcVT == MVT::i8 && SrcReg && DestVT == MVT::i8 && DestReg
317 && IsZExt && false);
443 if (DestVT != MVT::i32 && DestVT != MVT::i64)
444 return false;
445 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
446 return false;
447
448 // Signed extensions use EXTSB, EXTSH, EXTSW.
449 if (!IsZExt) {
450 unsigned Opc;
451 if (SrcVT == MVT::i8)
452 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
453 else if (SrcVT == MVT::i16)
454 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
455 else {
456 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
457 Opc = PPC::EXTSW_32_64;
458 }
459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
460 .addReg(SrcReg);
461
462 // Unsigned 32-bit extensions use RLWINM.
463 } else if (DestVT == MVT::i32) {
464 unsigned MB;
465 if (SrcVT == MVT::i8)
466 MB = 24;
467 else {
468 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
469 MB = 16;
470 }
471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
472 DestReg)
473 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
474
475 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
476 } else {
477 unsigned MB;
478 if (SrcVT == MVT::i8)
479 MB = 56;
480 else if (SrcVT == MVT::i16)
481 MB = 48;
482 else
483 MB = 32;
484 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
485 TII.get(PPC::RLDICL_32_64), DestReg)
486 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
487 }
488
489 return true;
318490 }
319491
320492 // Attempt to fast-select an indirect branch instruction.
334506 return true;
335507 }
336508
509 // Attempt to fast-select an integer extend instruction.
510 bool PPCFastISel::SelectIntExt(const Instruction *I) {
511 Type *DestTy = I->getType();
512 Value *Src = I->getOperand(0);
513 Type *SrcTy = Src->getType();
514
515 bool IsZExt = isa(I);
516 unsigned SrcReg = getRegForValue(Src);
517 if (!SrcReg) return false;
518
519 EVT SrcEVT, DestEVT;
520 SrcEVT = TLI.getValueType(SrcTy, true);
521 DestEVT = TLI.getValueType(DestTy, true);
522 if (!SrcEVT.isSimple())
523 return false;
524 if (!DestEVT.isSimple())
525 return false;
526
527 MVT SrcVT = SrcEVT.getSimpleVT();
528 MVT DestVT = DestEVT.getSimpleVT();
529
530 // If we know the register class needed for the result of this
531 // instruction, use it. Otherwise pick the register class of the
532 // correct size that does not contain X0/R0, since we don't know
533 // whether downstream uses permit that assignment.
534 unsigned AssignedReg = FuncInfo.ValueMap[I];
535 const TargetRegisterClass *RC =
536 (AssignedReg ? MRI.getRegClass(AssignedReg) :
537 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
538 &PPC::GPRC_and_GPRC_NOR0RegClass));
539 unsigned ResultReg = createResultReg(RC);
540
541 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
542 return false;
543
544 UpdateValueMap(I, ResultReg);
545 return true;
546 }
547
337548 // Attempt to fast-select an instruction that wasn't handled by
338549 // the table-generated machinery.
339550 bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
343554 return SelectBranch(I);
344555 case Instruction::IndirectBr:
345556 return SelectIndirectBr(I);
557 case Instruction::Ret:
558 return SelectRet(I);
559 case Instruction::ZExt:
560 case Instruction::SExt:
561 return SelectIntExt(I);
346562 // Here add other flavors of Instruction::XXX that automated
347563 // cases don't catch. For example, switches are terminators
348564 // that aren't yet handled.
505505 [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
506506 } // Interpretation64Bit
507507
508 // For fast-isel:
509 let isCodeGenOnly = 1 in {
510 def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
511 "extsb $rA, $rS", IntSimple, []>, isPPC64;
512 def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
513 "extsh $rA, $rS", IntSimple, []>, isPPC64;
514 } // isCodeGenOnly for fast-isel
515
508516 defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
509517 "extsw", "$rA, $rS", IntSimple,
510518 [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
568576 (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
569577 "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
570578 []>, isPPC64;
579 // For fast-isel:
580 let isCodeGenOnly = 1 in
581 def RLDICL_32_64 : MDForm_1<30, 0,
582 (outs g8rc:$rA),
583 (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
584 "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
585 []>, isPPC64;
586 // End fast-isel.
571587 defm RLDICR : MDForm_1r<30, 1,
572588 (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
573589 "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
0 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
1
2 ; zext
3
4 define i32 @zext_8_32(i8 %a) nounwind ssp {
5 ; ELF64: zext_8_32
6 %r = zext i8 %a to i32
7 ; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
8 ret i32 %r
9 }
10
11 define i32 @zext_16_32(i16 %a) nounwind ssp {
12 ; ELF64: zext_16_32
13 %r = zext i16 %a to i32
14 ; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
15 ret i32 %r
16 }
17
18 define i64 @zext_8_64(i8 %a) nounwind ssp {
19 ; ELF64: zext_8_64
20 %r = zext i8 %a to i64
21 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
22 ret i64 %r
23 }
24
25 define i64 @zext_16_64(i16 %a) nounwind ssp {
26 ; ELF64: zext_16_64
27 %r = zext i16 %a to i64
28 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
29 ret i64 %r
30 }
31
32 define i64 @zext_32_64(i32 %a) nounwind ssp {
33 ; ELF64: zext_32_64
34 %r = zext i32 %a to i64
35 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
36 ret i64 %r
37 }
38
39 ; sext
40
41 define i32 @sext_8_32(i8 %a) nounwind ssp {
42 ; ELF64: sext_8_32
43 %r = sext i8 %a to i32
44 ; ELF64: extsb
45 ret i32 %r
46 }
47
48 define i32 @sext_16_32(i16 %a) nounwind ssp {
49 ; ELF64: sext_16_32
50 %r = sext i16 %a to i32
51 ; ELF64: extsh
52 ret i32 %r
53 }
54
55 define i64 @sext_8_64(i8 %a) nounwind ssp {
56 ; ELF64: sext_8_64
57 %r = sext i8 %a to i64
58 ; ELF64: extsb
59 ret i64 %r
60 }
61
62 define i64 @sext_16_64(i16 %a) nounwind ssp {
63 ; ELF64: sext_16_64
64 %r = sext i16 %a to i64
65 ; ELF64: extsh
66 ret i64 %r
67 }
68
69 define i64 @sext_32_64(i32 %a) nounwind ssp {
70 ; ELF64: sext_32_64
71 %r = sext i32 %a to i64
72 ; ELF64: extsw
73 ret i64 %r
74 }
0 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
1
2 define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
3 entry:
4 ; ELF64: ret2
5 ; ELF64: extsb
6 ; ELF64: blr
7 ret i8 %a
8 }
9
10 define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
11 entry:
12 ; ELF64: ret3
13 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
14 ; ELF64: blr
15 ret i8 %a
16 }
17
18 define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
19 entry:
20 ; ELF64: ret4
21 ; ELF64: extsh
22 ; ELF64: blr
23 ret i16 %a
24 }
25
26 define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
27 entry:
28 ; ELF64: ret5
29 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
30 ; ELF64: blr
31 ret i16 %a
32 }
33
34 define i16 @ret6(i16 %a) nounwind uwtable ssp {
35 entry:
36 ; ELF64: ret6
37 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
38 ; ELF64: blr
39 ret i16 %a
40 }
41
42 define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
43 entry:
44 ; ELF64: ret7
45 ; ELF64: extsw
46 ; ELF64: blr
47 ret i32 %a
48 }
49
50 define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
51 entry:
52 ; ELF64: ret8
53 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
54 ; ELF64: blr
55 ret i32 %a
56 }
57
58 define i32 @ret9(i32 %a) nounwind uwtable ssp {
59 entry:
60 ; ELF64: ret9
61 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
62 ; ELF64: blr
63 ret i32 %a
64 }
65
66 define i64 @ret10(i64 %a) nounwind uwtable ssp {
67 entry:
68 ; ELF64: ret10
69 ; ELF64-NOT: exts
70 ; ELF64-NOT: rldicl
71 ; ELF64: blr
72 ret i64 %a
73 }
74
75 define float @ret11(float %a) nounwind uwtable ssp {
76 entry:
77 ; ELF64: ret11
78 ; ELF64: blr
79 ret float %a
80 }
81
82 define double @ret12(double %a) nounwind uwtable ssp {
83 entry:
84 ; ELF64: ret12
85 ; ELF64: blr
86 ret double %a
87 }
88
89 define i8 @ret13() nounwind uwtable ssp {
90 entry:
91 ; ELF64: ret13
92 ; ELF64: li
93 ; ELF64: blr
94 ret i8 15;
95 }
96
97 define i16 @ret14() nounwind uwtable ssp {
98 entry:
99 ; ELF64: ret14
100 ; ELF64: li
101 ; ELF64: blr
102 ret i16 -225;
103 }
104
105 define i32 @ret15() nounwind uwtable ssp {
106 entry:
107 ; ELF64: ret15
108 ; ELF64: lis
109 ; ELF64: ori
110 ; ELF64: blr
111 ret i32 278135;
112 }
113
114 define i64 @ret16() nounwind uwtable ssp {
115 entry:
116 ; ELF64: ret16
117 ; ELF64: li
118 ; ELF64: sldi
119 ; ELF64: oris
120 ; ELF64: ori
121 ; ELF64: blr
122 ret i64 27813515225;
123 }
124
125 define float @ret17() nounwind uwtable ssp {
126 entry:
127 ; ELF64: ret17
128 ; ELF64: addis
129 ; ELF64: lfs
130 ; ELF64: blr
131 ret float 2.5;
132 }
133
134 define double @ret18() nounwind uwtable ssp {
135 entry:
136 ; ELF64: ret18
137 ; ELF64: addis
138 ; ELF64: lfd
139 ; ELF64: blr
140 ret double 2.5e-33;
141 }