llvm.org GIT mirror llvm / 8022036
Try to reuse the value when lowering memset. This allows us to compile: void test(char *s, int a) { __builtin_memset(s, a, 15); } into 1 mul + 3 stores instead of 3 muls + 3 stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122710 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 9 years ago
3 changed file(s) with 30 addition(s) and 47 deletion(s). Raw diff Collapse all Expand all
35263526 SmallVector OutChains;
35273527 uint64_t DstOff = 0;
35283528 unsigned NumMemOps = MemOps.size();
3529
3530 // Find the largest store and generate the bit pattern for it.
3531 EVT LargestVT = MemOps[0];
3532 for (unsigned i = 1; i < NumMemOps; i++)
3533 if (MemOps[i].bitsGT(LargestVT))
3534 LargestVT = MemOps[i];
3535 SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
3536
35293537 for (unsigned i = 0; i < NumMemOps; i++) {
35303538 EVT VT = MemOps[i];
3531 unsigned VTSize = VT.getSizeInBits() / 8;
3532 SDValue Value = getMemsetValue(Src, VT, DAG, dl);
3539
3540 // If this store is smaller than the largest store see whether we can get
3541 // the smaller value for free with a truncate.
3542 SDValue Value = MemSetValue;
3543 if (VT.bitsLT(LargestVT)) {
3544 if (!LargestVT.isVector() && !VT.isVector() &&
3545 TLI.isTruncateFree(LargestVT, VT))
3546 Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
3547 else
3548 Value = getMemsetValue(Src, VT, DAG, dl);
3549 }
3550 assert(Value.getValueType() == VT && "Value with wrong type.");
35333551 SDValue Store = DAG.getStore(Chain, dl, Value,
35343552 getMemBasePlusOffset(Dst, DstOff, DAG),
35353553 DstPtrInfo.getWithOffset(DstOff),
35363554 isVol, false, Align);
35373555 OutChains.push_back(Store);
3538 DstOff += VTSize;
3556 DstOff += VT.getSizeInBits() / 8;
35393557 }
35403558
35413559 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3737
3838 Seems like the jb branch has high likelyhood of being taken. It would have
3939 saved a few instructions.
40
41 //===---------------------------------------------------------------------===//
42
43 Poor codegen:
44
45 int X[2];
46 int b;
47 void test(void) {
48 memset(X, b, 2*sizeof(X[0]));
49 }
50
51 llc:
52 movq _b@GOTPCREL(%rip), %rax
53 movzbq (%rax), %rax
54 movq %rax, %rcx
55 shlq $8, %rcx
56 orq %rax, %rcx
57 movq %rcx, %rax
58 shlq $16, %rax
59 orq %rcx, %rax
60 movq %rax, %rcx
61 shlq $32, %rcx
62 movq _X@GOTPCREL(%rip), %rdx
63 orq %rax, %rcx
64 movq %rcx, (%rdx)
65 ret
66
67 gcc:
68 movq _b@GOTPCREL(%rip), %rax
69 movabsq $72340172838076673, %rdx
70 movzbq (%rax), %rax
71 imulq %rdx, %rax
72 movq _X@GOTPCREL(%rip), %rdx
73 movq %rax, (%rdx)
74 ret
75
76 And the codegen is even worse for the following
77 (from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
78 void fill1(char *s, int a)
79 {
80 __builtin_memset(s, a, 15);
81 }
82
83 For this version, we duplicate the computation of the constant to store.
8440
8541 //===---------------------------------------------------------------------===//
8642
2727 ; CHECK: imull $16843009
2828 }
2929
30 define void @t4(i8* nocapture %s, i8 %a) nounwind {
31 entry:
32 tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
33 ret void
34 ; CHECK: t4:
35 ; CHECK: imull $16843009
36 ; CHECK-NOT: imul
37 ; CHECK: ret
38 }