llvm.org GIT mirror llvm / a7d4828
[PowerPC] Eliminate sign- and zero-extensions if already sign- or zero-extended This patch enables redundant sign- and zero-extension elimination in PowerPC MI Peephole pass. If the input value of a sign- or zero-extension is known to be already sign- or zero-extended, the operation is redundant and can be eliminated. One common case is sign-extensions for a method parameter or for a method return value; they must be sign- or zero-extended as defined in PPC ELF ABI. For example of the following simple code, two extsw instructions are generated before the invocation of int_func and before the return. With this patch, both extsw are eliminated. void int_func(int); void ii_test(int a) { if (a & 1) return int_func(a); } Such redundant sign- or zero-extensions are quite common in many programs; e.g. I observed about 60,000 occurrences of the elimination while compiling the LLVM+CLANG. Differential Revision: https://reviews.llvm.org/D31319 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315888 91177308-0d34-0410-b5e6-96231b3b80d8 Hiroshi Inoue 2 years ago
10 changed file(s) with 534 addition(s) and 86 deletion(s). Raw diff Collapse all Expand all
36173617
36183618 if (GPR_idx != Num_GPR_Regs) {
36193619 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3620 FuncInfo->addLiveInAttr(VReg, Flags);
36203621 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
36213622 SDValue Store;
36223623
36513652 break;
36523653
36533654 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3655 FuncInfo->addLiveInAttr(VReg, Flags);
36543656 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
36553657 SDValue Addr = FIN;
36563658 if (j) {
36873689 // types to avoid forcing arguments to memory unnecessarily.
36883690 if (GPR_idx != Num_GPR_Regs) {
36893691 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3692 FuncInfo->addLiveInAttr(VReg, Flags);
36903693 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
36913694
36923695 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
37323735 // since otherwise we never run out of FPRs before running out
37333736 // of GPRs.
37343737 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3738 FuncInfo->addLiveInAttr(VReg, Flags);
37353739 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
37363740
37373741 if (ObjectVT == MVT::f32) {
259259 switch (MI.getOpcode()) {
260260 default: return false;
261261 case PPC::EXTSW:
262 case PPC::EXTSW_32:
262263 case PPC::EXTSW_32_64:
263264 SrcReg = MI.getOperand(1).getReg();
264265 DstReg = MI.getOperand(0).getReg();
21022103 int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
21032104 return PPC::getRecordFormOpcode(Opcode);
21042105 }
2106
2107 // This function returns true if the machine instruction
2108 // always outputs a value by sign-extending a 32 bit value,
2109 // i.e. 0 to 31-th bits are same as 32-th bit.
2110 static bool isSignExtendingOp(const MachineInstr &MI) {
2111 int Opcode = MI.getOpcode();
2112 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
2113 Opcode == PPC::LIS || Opcode == PPC::LIS8 ||
2114 Opcode == PPC::SRAW || Opcode == PPC::SRAWo ||
2115 Opcode == PPC::SRAWI || Opcode == PPC::SRAWIo ||
2116 Opcode == PPC::LWA || Opcode == PPC::LWAX ||
2117 Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 ||
2118 Opcode == PPC::LHA || Opcode == PPC::LHAX ||
2119 Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 ||
2120 Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
2121 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
2122 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
2123 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
2124 Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
2125 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
2126 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
2127 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 ||
2128 Opcode == PPC::EXTSB || Opcode == PPC::EXTSBo ||
2129 Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo ||
2130 Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 ||
2131 Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo ||
2132 Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
2133 Opcode == PPC::EXTSB8_32_64)
2134 return true;
2135
2136 if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33)
2137 return true;
2138
2139 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
2140 Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo) &&
2141 MI.getOperand(3).getImm() > 0 &&
2142 MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
2143 return true;
2144
2145 return false;
2146 }
2147
2148 // This function returns true if the machine instruction
2149 // always outputs zeros in higher 32 bits.
2150 static bool isZeroExtendingOp(const MachineInstr &MI) {
2151 int Opcode = MI.getOpcode();
2152 // The 16-bit immediate is sign-extended in li/lis.
2153 // If the most significant bit is zero, all higher bits are zero.
2154 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
2155 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
2156 int64_t Imm = MI.getOperand(1).getImm();
2157 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
2158 return true;
2159 }
2160
2161 // We have some variations of rotate-and-mask instructions
2162 // that clear higher 32-bits.
2163 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
2164 Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo ||
2165 Opcode == PPC::RLDICL_32_64) &&
2166 MI.getOperand(3).getImm() >= 32)
2167 return true;
2168
2169 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
2170 MI.getOperand(3).getImm() >= 32 &&
2171 MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm())
2172 return true;
2173
2174 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
2175 Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
2176 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
2177 MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
2178 return true;
2179
2180 // There are other instructions that clear higher 32-bits.
2181 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
2182 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
2183 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 ||
2184 Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
2185 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo ||
2186 Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW ||
2187 Opcode == PPC::SLW || Opcode == PPC::SLWo ||
2188 Opcode == PPC::SRW || Opcode == PPC::SRWo ||
2189 Opcode == PPC::SLW8 || Opcode == PPC::SRW8 ||
2190 Opcode == PPC::SLWI || Opcode == PPC::SLWIo ||
2191 Opcode == PPC::SRWI || Opcode == PPC::SRWIo ||
2192 Opcode == PPC::LWZ || Opcode == PPC::LWZX ||
2193 Opcode == PPC::LWZU || Opcode == PPC::LWZUX ||
2194 Opcode == PPC::LWBRX || Opcode == PPC::LHBRX ||
2195 Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
2196 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
2197 Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
2198 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
2199 Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 ||
2200 Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8 ||
2201 Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 ||
2202 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
2203 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 ||
2204 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
2205 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
2206 Opcode == PPC::ANDIo || Opcode == PPC::ANDISo ||
2207 Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWIo ||
2208 Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWIo ||
2209 Opcode == PPC::MFVSRWZ)
2210 return true;
2211
2212 return false;
2213 }
2214
2215 // We limit the max depth to track incoming values of PHIs or binary ops
2216 // (e.g. AND) to avoid exsessive cost.
2217 const unsigned MAX_DEPTH = 1;
2218
2219 bool
2220 PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
2221 const unsigned Depth) const {
2222 const MachineFunction *MF = MI.getParent()->getParent();
2223 const MachineRegisterInfo *MRI = &MF->getRegInfo();
2224
2225 switch (MI.getOpcode()) {
2226 case PPC::COPY: {
2227 unsigned SrcReg = MI.getOperand(1).getReg();
2228
2229 // In both ELFv1 and v2 ABI, method parameters and the return value
2230 // are sign- or zero-extended.
2231 if (MF->getSubtarget().isSVR4ABI()) {
2232 const PPCFunctionInfo *FuncInfo = MF->getInfo();
2233 // We check the ZExt/SExt flags for a method parameter.
2234 if (MI.getParent()->getBasicBlock() ==
2235 &MF->getFunction()->getEntryBlock()) {
2236 unsigned VReg = MI.getOperand(0).getReg();
2237 if (MF->getRegInfo().isLiveIn(VReg))
2238 return SignExt ? FuncInfo->isLiveInSExt(VReg) :
2239 FuncInfo->isLiveInZExt(VReg);
2240 }
2241
2242 // For a method return value, we check the ZExt/SExt flags in attribute.
2243 // We assume the following code sequence for method call.
2244 // ADJCALLSTACKDOWN 32, %R1, %R1
2245 // BL8_NOP ,...
2246 // ADJCALLSTACKUP 32, 0, %R1, %R1
2247 // %vreg5 = COPY %X3; G8RC:%vreg5
2248 if (SrcReg == PPC::X3) {
2249 const MachineBasicBlock *MBB = MI.getParent();
2250 MachineBasicBlock::const_instr_iterator II =
2251 MachineBasicBlock::const_instr_iterator(&MI);
2252 if (II != MBB->instr_begin() &&
2253 (--II)->getOpcode() == PPC::ADJCALLSTACKUP) {
2254 const MachineInstr &CallMI = *(--II);
2255 if (CallMI.isCall() && CallMI.getOperand(0).isGlobal()) {
2256 const Function *CalleeFn =
2257 dyn_cast(CallMI.getOperand(0).getGlobal());
2258 const IntegerType *IntTy =
2259 dyn_cast(CalleeFn->getReturnType());
2260 const AttributeSet &Attrs =
2261 CalleeFn->getAttributes().getRetAttributes();
2262 if (IntTy && IntTy->getBitWidth() <= 32)
2263 return Attrs.hasAttribute(SignExt ? Attribute::SExt :
2264 Attribute::ZExt);
2265 }
2266 }
2267 }
2268 }
2269
2270 // If this is a copy from another register, we recursively check source.
2271 if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
2272 return false;
2273 const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
2274 if (SrcMI != NULL)
2275 return isSignOrZeroExtended(*SrcMI, SignExt, Depth);
2276
2277 return false;
2278 }
2279
2280 case PPC::ANDIo:
2281 case PPC::ANDISo:
2282 case PPC::ORI:
2283 case PPC::ORIS:
2284 case PPC::XORI:
2285 case PPC::XORIS:
2286 case PPC::ANDIo8:
2287 case PPC::ANDISo8:
2288 case PPC::ORI8:
2289 case PPC::ORIS8:
2290 case PPC::XORI8:
2291 case PPC::XORIS8: {
2292 // logical operation with 16-bit immediate does not change the upper bits.
2293 // So, we track the operand register as we do for register copy.
2294 unsigned SrcReg = MI.getOperand(1).getReg();
2295 if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
2296 return false;
2297 const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
2298 if (SrcMI != NULL)
2299 return isSignOrZeroExtended(*SrcMI, SignExt, Depth);
2300
2301 return false;
2302 }
2303
2304 // If all incoming values are sign-/zero-extended,
2305 // the output of AND, OR, ISEL or PHI is also sign-/zero-extended.
2306 case PPC::AND:
2307 case PPC::AND8:
2308 case PPC::OR:
2309 case PPC::OR8:
2310 case PPC::ISEL:
2311 case PPC::PHI: {
2312 if (Depth >= MAX_DEPTH)
2313 return false;
2314
2315 // The input registers for PHI are operand 1, 3, ...
2316 // The input registers for others are operand 1 and 2.
2317 unsigned E = 3, D = 1;
2318 if (MI.getOpcode() == PPC::PHI) {
2319 E = MI.getNumOperands();
2320 D = 2;
2321 }
2322
2323 for (unsigned I = 1; I != E; I += D) {
2324 if (MI.getOperand(I).isReg()) {
2325 unsigned SrcReg = MI.getOperand(I).getReg();
2326 if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
2327 return false;
2328 const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
2329 if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1))
2330 return false;
2331 }
2332 else
2333 return false;
2334 }
2335 return true;
2336 }
2337
2338 default:
2339 return SignExt?isSignExtendingOp(MI):
2340 isZeroExtendingOp(MI);
2341 }
2342 return false;
2343 }
292292 }
293293 const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
294294 static int getRecordFormOpcode(unsigned Opcode);
295
296 bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
297 const unsigned PhiDepth) const;
298
299 /// Return true if the output of the instruction is always a sign-extended,
300 /// i.e. 0 to 31-th bits are same as 32-th bit.
301 bool isSignExtended(const MachineInstr &MI, const unsigned depth = 0) const {
302 return isSignOrZeroExtended(MI, true, depth);
303 }
304
305 /// Return true if the output of the instruction is always zero-extended,
306 /// i.e. 0 to 31-th bits are all zeros
307 bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const {
308 return isSignOrZeroExtended(MI, false, depth);
309 }
295310 };
296311
297312 }
2828 #include "llvm/CodeGen/MachineInstrBuilder.h"
2929 #include "llvm/CodeGen/MachineRegisterInfo.h"
3030 #include "llvm/Support/Debug.h"
31 #include "llvm/ADT/Statistic.h"
3132 #include "MCTargetDesc/PPCPredicates.h"
3233
3334 using namespace llvm;
3435
3536 #define DEBUG_TYPE "ppc-mi-peepholes"
3637
38 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
39 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
3740 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
41
42 static cl::opt
43 EnableSExtElimination("ppc-eliminate-signext",
44 cl::desc("enable elimination of sign-extensions"),
45 cl::init(true), cl::Hidden);
46
47 static cl::opt
48 EnableZExtElimination("ppc-eliminate-zeroext",
49 cl::desc("enable elimination of zero-extensions"),
50 cl::init(true), cl::Hidden);
3851
3952 namespace llvm {
4053 void initializePPCMIPeepholePass(PassRegistry&);
107120 return nullptr;
108121
109122 return MRI->getVRegDef(Reg);
123 }
124
125 // This function returns number of known zero bits in output of MI
126 // starting from the most significant bit.
127 static unsigned
128 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
129 unsigned Opcode = MI->getOpcode();
130 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
131 Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
132 return MI->getOperand(3).getImm();
133
134 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
135 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
136 return MI->getOperand(3).getImm();
137
138 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
139 Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
140 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
141 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
142 return 32 + MI->getOperand(3).getImm();
143
144 if (Opcode == PPC::ANDIo) {
145 uint16_t Imm = MI->getOperand(2).getImm();
146 return 48 + countLeadingZeros(Imm);
147 }
148
149 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
150 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
151 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
152 // The result ranges from 0 to 32.
153 return 58;
154
155 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
156 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
157 // The result ranges from 0 to 64.
158 return 57;
159
160 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
161 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
162 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
163 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
164 return 48;
165
166 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
167 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
168 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
169 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
170 return 56;
171
172 if (TII->isZeroExtended(*MI))
173 return 32;
174
175 return 0;
110176 }
111177
112178 // Perform peephole optimizations.
366432 }
367433 break;
368434 }
435 case PPC::EXTSH:
436 case PPC::EXTSH8:
437 case PPC::EXTSH8_32_64: {
438 if (!EnableSExtElimination) break;
439 unsigned NarrowReg = MI.getOperand(1).getReg();
440 if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
441 break;
442
443 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
444 // If we've used a zero-extending load that we will sign-extend,
445 // just do a sign-extending load.
446 if (SrcMI->getOpcode() == PPC::LHZ ||
447 SrcMI->getOpcode() == PPC::LHZX) {
448 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
449 break;
450 auto is64Bit = [] (unsigned Opcode) {
451 return Opcode == PPC::EXTSH8;
452 };
453 auto isXForm = [] (unsigned Opcode) {
454 return Opcode == PPC::LHZX;
455 };
456 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
457 if (is64Bit)
458 if (isXForm) return PPC::LHAX8;
459 else return PPC::LHA8;
460 else
461 if (isXForm) return PPC::LHAX;
462 else return PPC::LHA;
463 };
464 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
465 isXForm(SrcMI->getOpcode()));
466 DEBUG(dbgs() << "Zero-extending load\n");
467 DEBUG(SrcMI->dump());
468 DEBUG(dbgs() << "and sign-extension\n");
469 DEBUG(MI.dump());
470 DEBUG(dbgs() << "are merged into sign-extending load\n");
471 SrcMI->setDesc(TII->get(Opc));
472 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
473 ToErase = &MI;
474 Simplified = true;
475 NumEliminatedSExt++;
476 }
477 break;
478 }
479 case PPC::EXTSW:
480 case PPC::EXTSW_32:
481 case PPC::EXTSW_32_64: {
482 if (!EnableSExtElimination) break;
483 unsigned NarrowReg = MI.getOperand(1).getReg();
484 if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
485 break;
486
487 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
488 // If we've used a zero-extending load that we will sign-extend,
489 // just do a sign-extending load.
490 if (SrcMI->getOpcode() == PPC::LWZ ||
491 SrcMI->getOpcode() == PPC::LWZX) {
492 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
493 break;
494 auto is64Bit = [] (unsigned Opcode) {
495 return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
496 };
497 auto isXForm = [] (unsigned Opcode) {
498 return Opcode == PPC::LWZX;
499 };
500 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
501 if (is64Bit)
502 if (isXForm) return PPC::LWAX;
503 else return PPC::LWA;
504 else
505 if (isXForm) return PPC::LWAX_32;
506 else return PPC::LWA_32;
507 };
508 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
509 isXForm(SrcMI->getOpcode()));
510 DEBUG(dbgs() << "Zero-extending load\n");
511 DEBUG(SrcMI->dump());
512 DEBUG(dbgs() << "and sign-extension\n");
513 DEBUG(MI.dump());
514 DEBUG(dbgs() << "are merged into sign-extending load\n");
515 SrcMI->setDesc(TII->get(Opc));
516 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
517 ToErase = &MI;
518 Simplified = true;
519 NumEliminatedSExt++;
520 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
521 TII->isSignExtended(*SrcMI)) {
522 // We can eliminate EXTSW if the input is known to be already
523 // sign-extended.
524 DEBUG(dbgs() << "Removing redundant sign-extension\n");
525 unsigned TmpReg =
526 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
527 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
528 TmpReg);
529 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
530 MI.getOperand(0).getReg())
531 .addReg(TmpReg)
532 .addReg(NarrowReg)
533 .addImm(PPC::sub_32);
534 ToErase = &MI;
535 Simplified = true;
536 NumEliminatedSExt++;
537 }
538 break;
539 }
540 case PPC::RLDICL: {
541 // We can eliminate RLDICL (e.g. for zero-extension)
542 // if all bits to clear are already zero in the input.
543 // This code assume following code sequence for zero-extension.
544 // %vreg6 = COPY %vreg5:sub_32; (optional)
545 // %vreg8 = IMPLICIT_DEF;
546 // %vreg7 = INSERT_SUBREG %vreg8, %vreg6, sub_32;
547 if (!EnableZExtElimination) break;
548
549 if (MI.getOperand(2).getImm() != 0)
550 break;
551
552 unsigned SrcReg = MI.getOperand(1).getReg();
553 if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
554 break;
555
556 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
557 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
558 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
559 break;
560
561 MachineInstr *ImpDefMI, *SubRegMI;
562 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
563 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
564 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
565
566 SrcMI = SubRegMI;
567 if (SubRegMI->getOpcode() == PPC::COPY) {
568 unsigned CopyReg = SubRegMI->getOperand(1).getReg();
569 if (TargetRegisterInfo::isVirtualRegister(CopyReg))
570 SrcMI = MRI->getVRegDef(CopyReg);
571 }
572
573 unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
574 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
575 DEBUG(dbgs() << "Removing redundant zero-extension\n");
576 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
577 MI.getOperand(0).getReg())
578 .addReg(SrcReg);
579 ToErase = &MI;
580 Simplified = true;
581 NumEliminatedZExt++;
582 }
583 break;
584 }
369585
370586 // TODO: Any instruction that has an immediate form fed only by a PHI
371587 // whose operands are all load immediate can be folded away. We currently
4242 "func_toc" +
4343 Twine(MF.getFunctionNumber()));
4444 }
45
46 bool PPCFunctionInfo::isLiveInSExt(unsigned VReg) const {
47 for (const std::pair &LiveIn : LiveInAttrs)
48 if (LiveIn.first == VReg)
49 return LiveIn.second.isSExt();
50 return false;
51 }
52
53 bool PPCFunctionInfo::isLiveInZExt(unsigned VReg) const {
54 for (const std::pair &LiveIn : LiveInAttrs)
55 if (LiveIn.first == VReg)
56 return LiveIn.second.isZExt();
57 return false;
58 }
1515
1616 #include "llvm/ADT/SmallVector.h"
1717 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/Target/TargetCallingConv.h"
1819
1920 namespace llvm {
2021
112113 /// copies
113114 bool IsSplitCSR = false;
114115
116 /// We keep track attributes for each live-in virtual registers
117 /// to use SExt/ZExt flags in later optimization.
118 std::vector> LiveInAttrs;
119
115120 public:
116121 explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {}
117122
174179 unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
175180 void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
176181
182 /// This function associates attributes for each live-in virtual register.
183 void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags) {
184 LiveInAttrs.push_back(std::make_pair(VReg, Flags));
185 }
186
187 /// This function returns true if the spesified vreg is
188 /// a live-in register and sign-extended.
189 bool isLiveInSExt(unsigned VReg) const;
190
191 /// This function returns true if the spesified vreg is
192 /// a live-in register and zero-extended.
193 bool isLiveInZExt(unsigned VReg) const;
194
177195 int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
178196 void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
179197
214214 ; CHECK-LABEL: @testComplexISEL
215215 ; CHECK-DAG: [[LI:r[0-9]+]], 1
216216 ; CHECK-DAG: cmplwi [[LD:r[0-9]+]], 0
217 ; CHECK: beq cr0, [[EQ:.LBB[0-9_]+]]
218 ; CHECK: blr
219 ; CHECK: [[EQ]]
217 ; CHECK: bnelr cr0
220218 ; CHECK: xor [[XOR:r[0-9]+]]
221219 ; CHECK: cntlzd [[CZ:r[0-9]+]], [[XOR]]
222220 ; CHECK: rldicl [[SH:r[0-9]+]], [[CZ]], 58, 63
4444 ; CHECK-NEXT: ld 4, 8(4)
4545 ; CHECK-NEXT: cmpld 3, 4
4646 ; CHECK-NEXT: li 3, 0
47 ; CHECK-NEXT: beq 0, .LBB1_3
47 ; CHECK-NEXT: beqlr 0
4848 ; CHECK-NEXT: .LBB1_2: # %res_block
4949 ; CHECK-NEXT: li 3, 1
50 ; CHECK-NEXT: clrldi 3, 3, 32
51 ; CHECK-NEXT: blr
52 ; CHECK-NEXT: .LBB1_3: # %endblock
53 ; CHECK-NEXT: clrldi 3, 3, 32
5450 ; CHECK-NEXT: blr
5551 %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16)
5652 %not.tobool = icmp ne i32 %call, 0
7672 ; CHECK-NEXT: lbz 4, 6(4)
7773 ; CHECK-NEXT: cmplw 3, 4
7874 ; CHECK-NEXT: li 3, 0
79 ; CHECK-NEXT: beq 0, .LBB2_4
75 ; CHECK-NEXT: beqlr 0
8076 ; CHECK-NEXT: .LBB2_3: # %res_block
8177 ; CHECK-NEXT: li 3, 1
82 ; CHECK-NEXT: clrldi 3, 3, 32
83 ; CHECK-NEXT: blr
84 ; CHECK-NEXT: .LBB2_4: # %endblock
85 ; CHECK-NEXT: clrldi 3, 3, 32
8678 ; CHECK-NEXT: blr
8779 %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7)
8880 %not.lnot = icmp ne i32 %call, 0
327327 ; CHECK-LABEL: @getuc0
328328 ; CHECK: mfvsrd 3, 34
329329 ; CHECK: rldicl 3, 3, 8, 56
330 ; CHECK: clrldi 3, 3, 56
331330 ; CHECK-LE-LABEL: @getuc0
332331 ; CHECK-LE: mfvsrd 3,
333332 ; CHECK-LE: clrldi 3, 3, 56
341340 ; CHECK-LABEL: @getuc1
342341 ; CHECK: mfvsrd 3, 34
343342 ; CHECK: rldicl 3, 3, 16, 56
344 ; CHECK: clrldi 3, 3, 56
345343 ; CHECK-LE-LABEL: @getuc1
346344 ; CHECK-LE: mfvsrd 3,
347345 ; CHECK-LE: rldicl 3, 3, 56, 56
348 ; CHECK-LE: clrldi 3, 3, 56
349346 }
350347
351348 ; Function Attrs: norecurse nounwind readnone
356353 ; CHECK-LABEL: @getuc2
357354 ; CHECK: mfvsrd 3, 34
358355 ; CHECK: rldicl 3, 3, 24, 56
359 ; CHECK: clrldi 3, 3, 56
360356 ; CHECK-LE-LABEL: @getuc2
361357 ; CHECK-LE: mfvsrd 3,
362358 ; CHECK-LE: rldicl 3, 3, 48, 56
363 ; CHECK-LE: clrldi 3, 3, 56
364359 }
365360
366361 ; Function Attrs: norecurse nounwind readnone
371366 ; CHECK-LABEL: @getuc3
372367 ; CHECK: mfvsrd 3, 34
373368 ; CHECK: rldicl 3, 3, 32, 56
374 ; CHECK: clrldi 3, 3, 56
375369 ; CHECK-LE-LABEL: @getuc3
376370 ; CHECK-LE: mfvsrd 3,
377371 ; CHECK-LE: rldicl 3, 3, 40, 56
378 ; CHECK-LE: clrldi 3, 3, 56
379372 }
380373
381374 ; Function Attrs: norecurse nounwind readnone
386379 ; CHECK-LABEL: @getuc4
387380 ; CHECK: mfvsrd 3, 34
388381 ; CHECK: rldicl 3, 3, 40, 56
389 ; CHECK: clrldi 3, 3, 56
390382 ; CHECK-LE-LABEL: @getuc4
391383 ; CHECK-LE: mfvsrd 3,
392384 ; CHECK-LE: rldicl 3, 3, 32, 56
393 ; CHECK-LE: clrldi 3, 3, 56
394385 }
395386
396387 ; Function Attrs: norecurse nounwind readnone
401392 ; CHECK-LABEL: @getuc5
402393 ; CHECK: mfvsrd 3, 34
403394 ; CHECK: rldicl 3, 3, 48, 56
404 ; CHECK: clrldi 3, 3, 56
405395 ; CHECK-LE-LABEL: @getuc5
406396 ; CHECK-LE: mfvsrd 3,
407397 ; CHECK-LE: rldicl 3, 3, 24, 56
408 ; CHECK-LE: clrldi 3, 3, 56
409398 }
410399
411400 ; Function Attrs: norecurse nounwind readnone
416405 ; CHECK-LABEL: @getuc6
417406 ; CHECK: mfvsrd 3, 34
418407 ; CHECK: rldicl 3, 3, 56, 56
419 ; CHECK: clrldi 3, 3, 56
420408 ; CHECK-LE-LABEL: @getuc6
421409 ; CHECK-LE: mfvsrd 3,
422410 ; CHECK-LE: rldicl 3, 3, 16, 56
423 ; CHECK-LE: clrldi 3, 3, 56
424411 }
425412
426413 ; Function Attrs: norecurse nounwind readnone
434421 ; CHECK-LE-LABEL: @getuc7
435422 ; CHECK-LE: mfvsrd 3,
436423 ; CHECK-LE: rldicl 3, 3, 8, 56
424 }
425
426 ; Function Attrs: norecurse nounwind readnone
427 define zeroext i8 @getuc8(<16 x i8> %vuc) {
428 entry:
429 %vecext = extractelement <16 x i8> %vuc, i32 8
430 ret i8 %vecext
431 ; CHECK-LABEL: @getuc8
432 ; CHECK: mfvsrd 3,
433 ; CHECK: rldicl 3, 3, 8, 56
434 ; CHECK-LE-LABEL: @getuc8
435 ; CHECK-LE: mfvsrd 3, 34
437436 ; CHECK-LE: clrldi 3, 3, 56
438437 }
439438
440439 ; Function Attrs: norecurse nounwind readnone
441 define zeroext i8 @getuc8(<16 x i8> %vuc) {
442 entry:
443 %vecext = extractelement <16 x i8> %vuc, i32 8
444 ret i8 %vecext
445 ; CHECK-LABEL: @getuc8
446 ; CHECK: mfvsrd 3,
447 ; CHECK: rldicl 3, 3, 8, 56
448 ; CHECK: clrldi 3, 3, 56
449 ; CHECK-LE-LABEL: @getuc8
450 ; CHECK-LE: mfvsrd 3, 34
451 ; CHECK-LE: clrldi 3, 3, 56
452 }
453
454 ; Function Attrs: norecurse nounwind readnone
455440 define zeroext i8 @getuc9(<16 x i8> %vuc) {
456441 entry:
457442 %vecext = extractelement <16 x i8> %vuc, i32 9
459444 ; CHECK-LABEL: @getuc9
460445 ; CHECK: mfvsrd 3,
461446 ; CHECK: rldicl 3, 3, 16, 56
462 ; CHECK: clrldi 3, 3, 56
463447 ; CHECK-LE-LABEL: @getuc9
464448 ; CHECK-LE: mfvsrd 3, 34
465449 ; CHECK-LE: rldicl 3, 3, 56, 56
466 ; CHECK-LE: clrldi 3, 3, 56
467450 }
468451
469452 ; Function Attrs: norecurse nounwind readnone
474457 ; CHECK-LABEL: @getuc10
475458 ; CHECK: mfvsrd 3,
476459 ; CHECK: rldicl 3, 3, 24, 56
477 ; CHECK: clrldi 3, 3, 56
478460 ; CHECK-LE-LABEL: @getuc10
479461 ; CHECK-LE: mfvsrd 3, 34
480462 ; CHECK-LE: rldicl 3, 3, 48, 56
481 ; CHECK-LE: clrldi 3, 3, 56
482463 }
483464
484465 ; Function Attrs: norecurse nounwind readnone
489470 ; CHECK-LABEL: @getuc11
490471 ; CHECK: mfvsrd 3,
491472 ; CHECK: rldicl 3, 3, 32, 56
492 ; CHECK: clrldi 3, 3, 56
493473 ; CHECK-LE-LABEL: @getuc11
494474 ; CHECK-LE: mfvsrd 3, 34
495475 ; CHECK-LE: rldicl 3, 3, 40, 56
496 ; CHECK-LE: clrldi 3, 3, 56
497476 }
498477
499478 ; Function Attrs: norecurse nounwind readnone
504483 ; CHECK-LABEL: @getuc12
505484 ; CHECK: mfvsrd 3,
506485 ; CHECK: rldicl 3, 3, 40, 56
507 ; CHECK: clrldi 3, 3, 56
508486 ; CHECK-LE-LABEL: @getuc12
509487 ; CHECK-LE: mfvsrd 3, 34
510488 ; CHECK-LE: rldicl 3, 3, 32, 56
511 ; CHECK-LE: clrldi 3, 3, 56
512489 }
513490
514491 ; Function Attrs: norecurse nounwind readnone
519496 ; CHECK-LABEL: @getuc13
520497 ; CHECK: mfvsrd 3,
521498 ; CHECK: rldicl 3, 3, 48, 56
522 ; CHECK: clrldi 3, 3, 56
523499 ; CHECK-LE-LABEL: @getuc13
524500 ; CHECK-LE: mfvsrd 3, 34
525501 ; CHECK-LE: rldicl 3, 3, 24, 56
526 ; CHECK-LE: clrldi 3, 3, 56
527502 }
528503
529504 ; Function Attrs: norecurse nounwind readnone
534509 ; CHECK-LABEL: @getuc14
535510 ; CHECK: mfvsrd 3,
536511 ; CHECK: rldicl 3, 3, 56, 56
537 ; CHECK: clrldi 3, 3, 56
538512 ; CHECK-LE-LABEL: @getuc14
539513 ; CHECK-LE: mfvsrd 3, 34
540514 ; CHECK-LE: rldicl 3, 3, 16, 56
541 ; CHECK-LE: clrldi 3, 3, 56
542515 }
543516
544517 ; Function Attrs: norecurse nounwind readnone
552525 ; CHECK-LE-LABEL: @getuc15
553526 ; CHECK-LE: mfvsrd 3, 34
554527 ; CHECK-LE: rldicl 3, 3, 8, 56
555 ; CHECK-LE: clrldi 3, 3, 56
556528 }
557529
558530 ; Function Attrs: norecurse nounwind readnone
738710 ; CHECK-LABEL: @getus0
739711 ; CHECK: mfvsrd 3, 34
740712 ; CHECK: rldicl 3, 3, 16, 48
741 ; CHECK: clrldi 3, 3, 48
742713 ; CHECK-LE-LABEL: @getus0
743714 ; CHECK-LE: mfvsrd 3,
744715 ; CHECK-LE: clrldi 3, 3, 48
752723 ; CHECK-LABEL: @getus1
753724 ; CHECK: mfvsrd 3, 34
754725 ; CHECK: rldicl 3, 3, 32, 48
755 ; CHECK: clrldi 3, 3, 48
756726 ; CHECK-LE-LABEL: @getus1
757727 ; CHECK-LE: mfvsrd 3,
758728 ; CHECK-LE: rldicl 3, 3, 48, 48
759 ; CHECK-LE: clrldi 3, 3, 48
760729 }
761730
762731 ; Function Attrs: norecurse nounwind readnone
767736 ; CHECK-LABEL: @getus2
768737 ; CHECK: mfvsrd 3, 34
769738 ; CHECK: rldicl 3, 3, 48, 48
770 ; CHECK: clrldi 3, 3, 48
771739 ; CHECK-LE-LABEL: @getus2
772740 ; CHECK-LE: mfvsrd 3,
773741 ; CHECK-LE: rldicl 3, 3, 32, 48
774 ; CHECK-LE: clrldi 3, 3, 48
775742 }
776743
777744 ; Function Attrs: norecurse nounwind readnone
785752 ; CHECK-LE-LABEL: @getus3
786753 ; CHECK-LE: mfvsrd 3,
787754 ; CHECK-LE: rldicl 3, 3, 16, 48
755 }
756
757 ; Function Attrs: norecurse nounwind readnone
758 define zeroext i16 @getus4(<8 x i16> %vus) {
759 entry:
760 %vecext = extractelement <8 x i16> %vus, i32 4
761 ret i16 %vecext
762 ; CHECK-LABEL: @getus4
763 ; CHECK: mfvsrd 3,
764 ; CHECK: rldicl 3, 3, 16, 48
765 ; CHECK-LE-LABEL: @getus4
766 ; CHECK-LE: mfvsrd 3, 34
788767 ; CHECK-LE: clrldi 3, 3, 48
789768 }
790769
791770 ; Function Attrs: norecurse nounwind readnone
792 define zeroext i16 @getus4(<8 x i16> %vus) {
793 entry:
794 %vecext = extractelement <8 x i16> %vus, i32 4
795 ret i16 %vecext
796 ; CHECK-LABEL: @getus4
797 ; CHECK: mfvsrd 3,
798 ; CHECK: rldicl 3, 3, 16, 48
799 ; CHECK: clrldi 3, 3, 48
800 ; CHECK-LE-LABEL: @getus4
801 ; CHECK-LE: mfvsrd 3, 34
802 ; CHECK-LE: clrldi 3, 3, 48
803 }
804
805 ; Function Attrs: norecurse nounwind readnone
806771 define zeroext i16 @getus5(<8 x i16> %vus) {
807772 entry:
808773 %vecext = extractelement <8 x i16> %vus, i32 5
810775 ; CHECK-LABEL: @getus5
811776 ; CHECK: mfvsrd 3,
812777 ; CHECK: rldicl 3, 3, 32, 48
813 ; CHECK: clrldi 3, 3, 48
814778 ; CHECK-LE-LABEL: @getus5
815779 ; CHECK-LE: mfvsrd 3, 34
816780 ; CHECK-LE: rldicl 3, 3, 48, 48
817 ; CHECK-LE: clrldi 3, 3, 48
818781 }
819782
820783 ; Function Attrs: norecurse nounwind readnone
825788 ; CHECK-LABEL: @getus6
826789 ; CHECK: mfvsrd 3,
827790 ; CHECK: rldicl 3, 3, 48, 48
828 ; CHECK: clrldi 3, 3, 48
829791 ; CHECK-LE-LABEL: @getus6
830792 ; CHECK-LE: mfvsrd 3, 34
831793 ; CHECK-LE: rldicl 3, 3, 32, 48
832 ; CHECK-LE: clrldi 3, 3, 48
833794 }
834795
835796 ; Function Attrs: norecurse nounwind readnone
843804 ; CHECK-LE-LABEL: @getus7
844805 ; CHECK-LE: mfvsrd 3, 34
845806 ; CHECK-LE: rldicl 3, 3, 16, 48
846 ; CHECK-LE: clrldi 3, 3, 48
847807 }
848808
849809 ; Function Attrs: norecurse nounwind readnone
972932 ; CHECK-LABEL: @getui0
973933 ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
974934 ; CHECK: mfvsrwz 3, [[SHL]]
975 ; CHECK: clrldi 3, 3, 32
976935 ; CHECK-LE-LABEL: @getui0
977936 ; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
978937 ; CHECK-LE: mfvsrwz 3, [[SHL]]
979 ; CHECK-LE: clrldi 3, 3, 32
980938 }
981939
982940 ; Function Attrs: norecurse nounwind readnone
986944 ret i32 %vecext
987945 ; CHECK-LABEL: @getui1
988946 ; CHECK: mfvsrwz 3, 34
989 ; CHECK: clrldi 3, 3, 32
990947 ; CHECK-LE-LABEL: @getui1
991948 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
992949 ; CHECK-LE: mfvsrwz 3, [[SHL]]
993 ; CHECK-LE: clrldi 3, 3, 32
994950 }
995951
996952 ; Function Attrs: norecurse nounwind readnone
1001957 ; CHECK-LABEL: @getui2
1002958 ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
1003959 ; CHECK: mfvsrwz 3, [[SHL]]
1004 ; CHECK: clrldi 3, 3, 32
1005960 ; CHECK-LE-LABEL: @getui2
1006961 ; CHECK-LE: mfvsrwz 3, 34
1007 ; CHECK-LE: clrldi 3, 3, 32
1008962 }
1009963
1010964 ; Function Attrs: norecurse nounwind readnone
1015969 ; CHECK-LABEL: @getui3
1016970 ; CHECK: xxswapd [[SHL:[0-9]+]], 34
1017971 ; CHECK: mfvsrwz 3, [[SHL]]
1018 ; CHECK: clrldi 3, 3, 32
1019972 ; CHECK-LE-LABEL: @getui3
1020973 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
1021974 ; CHECK-LE: mfvsrwz 3, [[SHL]]
1022 ; CHECK-LE: clrldi 3, 3, 32
1023975 }
1024976
1025977 ; Function Attrs: norecurse nounwind readnone
3030 ; CHECK-LABEL: limit_loop
3131 ; CHECK: mtctr
3232 ; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, 1
33 ; CHECK: bdnz
33 ; CHECK: bdzlr
3434 ; CHECK: blr
3535 }
3636