llvm.org GIT mirror llvm / 416cdcc
[PPC] Use xxbrd to speed up bswap64 Power doesn't have bswap instructions, so llvm generates following code sequence for bswap64. rotldi 5, 3, 16 rotldi 4, 3, 8 rotldi 9, 3, 24 rotldi 10, 3, 32 rotldi 11, 3, 48 rotldi 12, 3, 56 rldimi 4, 5, 8, 48 rldimi 4, 9, 16, 40 rldimi 4, 10, 24, 32 rldimi 4, 11, 40, 16 rldimi 4, 12, 48, 8 rldimi 4, 3, 56, 0 But Power9 has vector bswap instructions, they can also be used to speed up scalar bswap intrinsic. With this patch, bswap64 can be translated to: mtvsrdd 34, 3, 3 xxbrd 34, 34 mfvsrld 3, 34 Differential Revision: https://reviews.llvm.org/D39510 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317499 91177308-0d34-0410-b5e6-96231b3b80d8 Guozhi Wei 2 years ago
3 changed file(s) with 37 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
290290 setOperationAction(ISD::FROUND, MVT::f32, Legal);
291291 }
292292
293 // PowerPC does not have BSWAP
293 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
294 // to speed up scalar BSWAP64.
294295 // CTPOP or CTTZ were introduced in P8/P9 respectivelly
295296 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
296 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
297297 if (Subtarget.isISA3_0()) {
298 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
298299 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
299300 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
300301 } else {
302 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
301303 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
302304 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
303305 }
86748676 return Op;
86758677 }
86768678
8679 // Lower scalar BSWAP64 to xxbrd.
8680 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
8681 SDLoc dl(Op);
8682 // MTVSRDD
8683 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
8684 Op.getOperand(0));
8685 // XXBRD
8686 Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
8687 // MFVSRD
8688 int VectorIndex = 0;
8689 if (Subtarget.isLittleEndian())
8690 VectorIndex = 1;
8691 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
8692 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
8693 return Op;
8694 }
8695
86778696 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
86788697 SelectionDAG &DAG) const {
86798698 SDLoc dl(Op);
91459164 case ISD::SREM:
91469165 case ISD::UREM:
91479166 return LowerREM(Op, DAG);
9167 case ISD::BSWAP:
9168 return LowerBSWAP(Op, DAG);
91489169 }
91499170 }
91509171
952952 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
953953 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
954954 SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
955 SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
955956 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
956957 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
957958 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
0 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64le-- -mcpu=pwr9 | FileCheck %s
1
2 declare i64 @llvm.bswap.i64(i64)
3
4 ; CHECK: mtvsrdd
5 ; CHECK: xxbrd
6 ; CHECK: mfvsrd
7 define i64 @bswap64(i64 %x) {
8 entry:
9 %0 = call i64 @llvm.bswap.i64(i64 %x)
10 ret i64 %0
11 }
12