llvm.org GIT mirror llvm / 3ab10c1
[PowerPC] Remove zexts after i32 ctlz The 64-bit semantics of cntlzw are not special, the 32-bit population count is stored as a 64-bit value in the range [0,32]. As a result, it is always zero extended, and it can be added to the PPCISelDAGToDAG peephole optimization as a frontier instruction for the removal of unnecessary zero extensions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225192 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 5 years ago
3 changed file(s) with 31 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
37353735 return true;
37363736 }
37373737
3738 // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
3739 if (Op32.getMachineOpcode() == PPC::CNTLZW) {
3740 ToPromote.insert(Op32.getNode());
3741 return true;
3742 }
3743
37383744 // Next, check for those instructions we can look through.
37393745
37403746 // Assuming the mask does not wrap around, then the higher-order bits are
39243930 case PPC::LIS: NewOpcode = PPC::LIS8; break;
39253931 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
39263932 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
3933 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
39273934 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
39283935 case PPC::OR: NewOpcode = PPC::OR8; break;
39293936 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
550550 "srad", "$rA, $rS, $rB", IIC_IntRotateD,
551551 [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
552552
553 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
553 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
554 defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$rA), (ins g8rc:$rS),
555 "cntlzw", "$rA, $rS", IIC_IntGeneral, []>;
556
554557 defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
555558 "extsb", "$rA, $rS", IIC_IntSimple,
556559 [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
3939 }
4040
4141 ; Function Attrs: nounwind readnone
42 declare i32 @llvm.bswap.i32(i32) #1
42 declare i32 @llvm.bswap.i32(i32) #0
4343
4444 ; Function Attrs: nounwind readonly
45 define zeroext i32 @bs32(i32* nocapture readonly %x) #0 {
45 define zeroext i32 @bs32(i32* nocapture readonly %x) #1 {
4646 entry:
4747 %0 = load i32* %x, align 4
4848 %1 = tail call i32 @llvm.bswap.i32(i32 %0)
5454 }
5555
5656 ; Function Attrs: nounwind readonly
57 define zeroext i16 @bs16(i16* nocapture readonly %x) #0 {
57 define zeroext i16 @bs16(i16* nocapture readonly %x) #1 {
5858 entry:
5959 %0 = load i16* %x, align 2
6060 %1 = tail call i16 @llvm.bswap.i16(i16 %0)
6666 }
6767
6868 ; Function Attrs: nounwind readnone
69 declare i16 @llvm.bswap.i16(i16) #1
69 declare i16 @llvm.bswap.i16(i16) #0
70
71 ; Function Attrs: nounwind readnone
72 define zeroext i32 @ctlz32(i32 zeroext %x) #0 {
73 entry:
74 %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
75 ret i32 %0
76
77 ; CHECK-LABEL: @ctlz32
78 ; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
79 ; CHECK: blr
80 }
81
82 ; Function Attrs: nounwind readnone
83 declare i32 @llvm.ctlz.i32(i32, i1) #0
84
7085
7186 attributes #0 = { nounwind readnone }
87 attributes #1 = { nounwind readonly }
7288