llvm.org GIT mirror llvm / 8c06aa1
Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors. We could implement a DAGCombine to turn x * 0x0101 back into logic operations on targets that doesn't support the multiply or it is slow (p4) if someone cares enough. Example code: void test(char *s, int a) { __builtin_memset(s, a, 4); } before: _test: ## @test movzbl 8(%esp), %eax movl %eax, %ecx shll $8, %ecx orl %eax, %ecx movl %ecx, %eax shll $16, %eax orl %ecx, %eax movl 4(%esp), %ecx movl %eax, 4(%ecx) movl %eax, (%ecx) ret after: _test: ## @test movzbl 8(%esp), %eax imull $16843009, %eax, %eax ## imm = 0x1010101 movl 4(%esp), %ecx movl %eax, 4(%ecx) movl %eax, (%ecx) ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122707 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 9 years ago
2 changed file(s) with 28 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
31313131 &ArgChains[0], ArgChains.size());
31323132 }
31333133
3134 /// SplatByte - Distribute ByteVal over NumBits bits.
3135 static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
3136 APInt Val = APInt(NumBits, ByteVal);
3137 unsigned Shift = 8;
3138 for (unsigned i = NumBits; i > 8; i >>= 1) {
3139 Val = (Val << Shift) | Val;
3140 Shift <<= 1;
3141 }
3142 return Val;
3143 }
3144
31343145 /// getMemsetValue - Vectorized representation of the memset value
31353146 /// operand.
31363147 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
31393150
31403151 unsigned NumBits = VT.getScalarType().getSizeInBits();
31413152 if (ConstantSDNode *C = dyn_cast(Value)) {
3142 APInt Val = APInt(NumBits, C->getZExtValue() & 255);
3143 unsigned Shift = 8;
3144 for (unsigned i = NumBits; i > 8; i >>= 1) {
3145 Val = (Val << Shift) | Val;
3146 Shift <<= 1;
3147 }
3153 APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
31483154 if (VT.isInteger())
31493155 return DAG.getConstant(Val, VT);
31503156 return DAG.getConstantFP(APFloat(Val), VT);
31513157 }
31523158
3153 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
31543159 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
3155 unsigned Shift = 8;
3156 for (unsigned i = NumBits; i > 8; i >>= 1) {
3157 Value = DAG.getNode(ISD::OR, dl, VT,
3158 DAG.getNode(ISD::SHL, dl, VT, Value,
3159 DAG.getConstant(Shift,
3160 TLI.getShiftAmountTy())),
3161 Value);
3162 Shift <<= 1;
3160 if (NumBits > 8) {
3161 // Use a multiplication with 0x010101... to extend the input to the
3162 // required length.
3163 APInt Magic = SplatByte(NumBits, 0x01);
3164 Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
31633165 }
31643166
31653167 return Value;
1616 call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
1717 unreachable
1818 }
19
20 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
21
22 define void @t3(i8* nocapture %s, i8 %a) nounwind {
23 entry:
24 tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
25 ret void
26 ; CHECK: t3:
27 ; CHECK: imull $16843009
28 }
29