llvm.org GIT mirror llvm / 7bd3baa
[AArch64][GlobalISel] Use fcsel instead of csel for G_SELECT on FPRs This saves us some unnecessary copies. If the inputs to a G_SELECT are floating point, we should use fcsel rather than csel. Changes here are... - Teach selectCopy about s1-to-s1 copies across register banks. - AArch64RegisterBankInfo about G_SELECT in general. - Teach the instruction selector about the FCSEL instructions. Also add two tests: - select-select.mir to show that we get the expected FCSEL - regbank-select.mir (unfortunately named) to show the register banks on G_SELECT are properly preserved And update fast-isel-select.ll to show that we do the same thing as other instruction selectors in these cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359940 91177308-0d34-0410-b5e6-96231b3b80d8 Jessica Paquette 1 year, 5 months ago
5 changed file(s) with 212 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
520520 return true;
521521 }
522522
523 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
524 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
525 const RegisterBankInfo &RBI) {
526
523 /// Helper function to get the source and destination register classes for a
524 /// copy. Returns a std::pair containing the source register class for the
525 /// copy, and the destination register class for the copy. If a register class
526 /// cannot be determined, then it will be nullptr.
527 static std::pair
528 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
529 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
530 const RegisterBankInfo &RBI) {
527531 unsigned DstReg = I.getOperand(0).getReg();
528532 unsigned SrcReg = I.getOperand(1).getReg();
529533 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
530534 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
531 const TargetRegisterClass *DstRC = getMinClassForRegBank(
532 DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
535 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
536 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
537
538 // Special casing for cross-bank copies of s1s. We can technically represent
539 // a 1-bit value with any size of register. The minimum size for a GPR is 32
540 // bits. So, we need to put the FPR on 32 bits as well.
541 //
542 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
543 // then we can pull it into the helpers that get the appropriate class for a
544 // register bank. Or make a new helper that carries along some constraint
545 // information.
546 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
547 SrcSize = DstSize = 32;
548
549 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
550 getMinClassForRegBank(DstRegBank, DstSize, true)};
551 }
552
553 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
554 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
555 const RegisterBankInfo &RBI) {
556
557 unsigned DstReg = I.getOperand(0).getReg();
558 unsigned SrcReg = I.getOperand(1).getReg();
559 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
560 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
561
562 // Find the correct register classes for the source and destination registers.
563 const TargetRegisterClass *SrcRC;
564 const TargetRegisterClass *DstRC;
565 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
566
533567 if (!DstRC) {
534568 LLVM_DEBUG(dbgs() << "Unexpected dest size "
535569 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
562596 // a SUBREG_TO_REG.
563597 if (I.isCopy()) {
564598 // Yes. Check if there's anything to fix up.
565 const TargetRegisterClass *SrcRC = getMinClassForRegBank(
566 SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
567599 if (!SrcRC) {
568600 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
569601 return false;
17231755 const unsigned TReg = I.getOperand(2).getReg();
17241756 const unsigned FReg = I.getOperand(3).getReg();
17251757
1758 // If we have a floating-point result, then we should use a floating point
1759 // select instead of an integer select.
1760 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
1761 AArch64::GPRRegBankID);
17261762 unsigned CSelOpc = 0;
17271763
17281764 if (Ty == LLT::scalar(32)) {
1729 CSelOpc = AArch64::CSELWr;
1765 CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
17301766 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
1731 CSelOpc = AArch64::CSELXr;
1767 CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
17321768 } else {
17331769 return false;
17341770 }
475475 const TargetSubtargetInfo &STI = MF.getSubtarget();
476476 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
477477
478 // Helper lambda that returns true if MI has floating point constraints.
479 auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
480 unsigned Op = MI.getOpcode();
481
482 // Do we have an explicit floating point instruction?
483 if (isPreISelGenericFloatingPointOpcode(Op))
484 return true;
485
486 // No. Check if we have a copy-like instruction. If we do, then we could
487 // still be fed by floating point instructions.
488 if (Op != TargetOpcode::COPY && !MI.isPHI())
489 return false;
490
491 // MI is copy-like. Return true if it's using an FPR.
492 return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
493 &AArch64::FPRRegBank;
494 };
495
478496 switch (Opc) {
479497 // G_{F|S|U}REM are not listed because they are not legal.
480498 // Arithmetic ops.
656674 break;
657675 }
658676 break;
677 case TargetOpcode::G_SELECT: {
678 // If the destination is FPR, preserve that.
679 if (OpRegBankIdx[0] != PMI_FirstGPR)
680 break;
681 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
682 if (SrcTy.isVector() ||
683 any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
684 [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
685 // Set the register bank of every operand to FPR.
686 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
687 Idx < NumOperands; ++Idx)
688 OpRegBankIdx[Idx] = PMI_FirstFPR;
689 }
690 break;
691 }
659692 case TargetOpcode::G_UNMERGE_VALUES: {
660693 // If the first operand belongs to a FPR register bank, then make sure that
661694 // we preserve that.
662695 if (OpRegBankIdx[0] != PMI_FirstGPR)
663696 break;
664
665 // Helper lambda that returns true if MI has floating point constraints.
666 auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
667 unsigned Op = MI.getOpcode();
668
669 // Do we have an explicit floating point instruction?
670 if (isPreISelGenericFloatingPointOpcode(Op))
671 return true;
672
673 // No. Check if we have a copy-like instruction. If we do, then we could
674 // still be fed by floating point instructions.
675 if (Op != TargetOpcode::COPY && !MI.isPHI())
676 return false;
677
678 // MI is copy-like. Return true if it's using an FPR.
679 return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
680 &AArch64::FPRRegBank;
681 };
682697
683698 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
684699 // UNMERGE into scalars from a vector should always use FPR.
0 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 # RUN: llc -mtriple=aarch64-apple-darwin -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
2
3 ...
4 ---
5 name: select_f32
6 alignment: 2
7 legalized: true
8 tracksRegLiveness: true
9 machineFunctionInfo: {}
10 body: |
11 bb.0:
12 liveins: $s0, $s1, $w0
13
14 ; CHECK-LABEL: name: select_f32
15 ; CHECK: liveins: $s0, $s1, $w0
16 ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
17 ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
18 ; CHECK: [[COPY1:%[0-9]+]]:fpr(s32) = COPY $s0
19 ; CHECK: [[COPY2:%[0-9]+]]:fpr(s32) = COPY $s1
20 ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
21 ; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
22 ; CHECK: $s0 = COPY [[SELECT]](s32)
23 ; CHECK: RET_ReallyLR implicit $s0
24 %3:_(s32) = COPY $w0
25 %0:_(s1) = G_TRUNC %3(s32)
26 %1:_(s32) = COPY $s0
27 %2:_(s32) = COPY $s1
28 %4:_(s32) = G_SELECT %0(s1), %1, %2
29 $s0 = COPY %4(s32)
30 RET_ReallyLR implicit $s0
31
32 ...
33 ---
34 name: select_f64
35 alignment: 2
36 legalized: true
37 tracksRegLiveness: true
38 machineFunctionInfo: {}
39 body: |
40 bb.0:
41 liveins: $d0, $d1, $w0
42
43 ; CHECK-LABEL: name: select_f64
44 ; CHECK: liveins: $d0, $d1, $w0
45 ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
46 ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
47 ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
48 ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1
49 ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
50 ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
51 ; CHECK: $d0 = COPY [[SELECT]](s64)
52 ; CHECK: RET_ReallyLR implicit $d0
53 %3:_(s32) = COPY $w0
54 %0:_(s1) = G_TRUNC %3(s32)
55 %1:_(s64) = COPY $d0
56 %2:_(s64) = COPY $d1
57 %4:_(s64) = G_SELECT %0(s1), %1, %2
58 $d0 = COPY %4(s64)
59 RET_ReallyLR implicit $d0
0 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 # RUN: llc -mtriple=aarch64-apple-darwin -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
2
3 ...
4 ---
5 name: select_f32
6 alignment: 2
7 legalized: true
8 regBankSelected: true
9 tracksRegLiveness: true
10 machineFunctionInfo: {}
11 body: |
12 bb.0:
13 liveins: $s0, $s1, $w0
14
15 ; CHECK-LABEL: name: select_f32
16 ; CHECK: liveins: $s0, $s1, $w0
17 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
18 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
19 ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1
20 ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
21 ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
22 ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
23 ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
24 ; CHECK: $s0 = COPY [[FCSELSrrr]]
25 ; CHECK: RET_ReallyLR implicit $s0
26 %3:gpr(s32) = COPY $w0
27 %0:gpr(s1) = G_TRUNC %3(s32)
28 %1:fpr(s32) = COPY $s0
29 %2:fpr(s32) = COPY $s1
30 %5:fpr(s1) = COPY %0(s1)
31 %4:fpr(s32) = G_SELECT %5(s1), %1, %2
32 $s0 = COPY %4(s32)
33 RET_ReallyLR implicit $s0
34
35 ...
36 ---
37 name: select_f64
38 alignment: 2
39 legalized: true
40 regBankSelected: true
41 tracksRegLiveness: true
42 machineFunctionInfo: {}
43 body: |
44 bb.0:
45 liveins: $d0, $d1, $w0
46
47 ; CHECK-LABEL: name: select_f64
48 ; CHECK: liveins: $d0, $d1, $w0
49 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
50 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0
51 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
52 ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
53 ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
54 ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
55 ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
56 ; CHECK: $d0 = COPY [[FCSELDrrr]]
57 ; CHECK: RET_ReallyLR implicit $d0
58 %3:gpr(s32) = COPY $w0
59 %0:gpr(s1) = G_TRUNC %3(s32)
60 %1:fpr(s64) = COPY $d0
61 %2:fpr(s64) = COPY $d1
62 %5:fpr(s1) = COPY %0(s1)
63 %4:fpr(s64) = G_SELECT %5(s1), %1, %2
64 $d0 = COPY %4(s64)
65 RET_ReallyLR implicit $d0
0 ; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
11 ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
2 ; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
23
34 ; First test the different supported value types for select.
45 define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
4546 ; CHECK-LABEL: select_f32
4647 ; CHECK: {{cmp w0, #0|tst w0, #0x1}}
4748 ; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
49 ; GISEL-LABEL: select_f32
50 ; GISEL: {{cmp w0, #0|tst w0, #0x1}}
51 ; GISEL-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
4852 %1 = select i1 %c, float %a, float %b
4953 ret float %1
5054 }
5357 ; CHECK-LABEL: select_f64
5458 ; CHECK: {{cmp w0, #0|tst w0, #0x1}}
5559 ; CHECK-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
60 ; GISEL-LABEL: select_f64
61 ; GISEL: {{cmp w0, #0|tst w0, #0x1}}
62 ; GISEL-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
5663 %1 = select i1 %c, double %a, double %b
5764 ret double %1
5865 }