llvm.org GIT mirror llvm / 61a9213
Implement an x86-64 ABI detail of passing structs by hidden first argument. The x86-64 ABI requires the incoming value of %rdi to be copied to %rax on exit from a function that is returning a large C struct. Also, add a README-X86-64 entry detailing the missed optimization opportunity and proposing an alternative approach. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50075 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 12 years ago
4 changed file(s) with 121 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
235235
236236 //===---------------------------------------------------------------------===//
237237
238 The x86-64 ABI for hidden-argument struct returns requires that the
239 incoming value of %rdi be copied into %rax by the callee upon return.
240
241 The idea is that it saves callers from having to remember this value,
242 which would often require a callee-saved register. Callees usually
243 need to keep this value live for most of their body anyway, so it
244 doesn't add a significant burden on them.
245
246 We currently implement this in codegen, however this is suboptimal
247 because it means that it would be quite awkward to implement the
248 optimization for callers.
249
250 A better implementation would be to relax the LLVM IR rules for sret
251 arguments to allow a function with an sret argument to have a non-void
252 return type, and to have the front-end to set up the sret argument value
253 as the return value of the function. The front-end could more easily
254 emit uses of the returned struct value to be in terms of the function's
255 lowered return value, and it would free non-C frontends from a
256 complication only required by a C-based ABI.
257
258 //===---------------------------------------------------------------------===//
874874 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag);
875875 Flag = Chain.getValue(1);
876876 }
877
878 // The x86-64 ABI for returning structs by value requires that we copy
879 // the sret argument into %rax for the return. We saved the argument into
880 // a virtual register in the entry block, so now we copy the value out
881 // and into %rax.
882 if (Subtarget->is64Bit() &&
883 DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
884 MachineFunction &MF = DAG.getMachineFunction();
885 X86MachineFunctionInfo *FuncInfo = MF.getInfo();
886 unsigned Reg = FuncInfo->getSRetReturnReg();
887 if (!Reg) {
888 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
889 FuncInfo->setSRetReturnReg(Reg);
890 }
891 SDOperand Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy());
892
893 Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag);
894 Flag = Chain.getValue(1);
895 }
877896
878897 RetOps[0] = Chain; // Update chain.
879898
12221241 assert(VA.isMemLoc());
12231242 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
12241243 }
1244 }
1245
1246 // The x86-64 ABI for returning structs by value requires that we copy
1247 // the sret argument into %rax for the return. Save the argument into
1248 // a virtual register so that we can access it from the return points.
1249 if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
1250 MachineFunction &MF = DAG.getMachineFunction();
1251 X86MachineFunctionInfo *FuncInfo = MF.getInfo();
1252 unsigned Reg = FuncInfo->getSRetReturnReg();
1253 if (!Reg) {
1254 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
1255 FuncInfo->setSRetReturnReg(Reg);
1256 }
1257 SDOperand Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]);
1258 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root);
12251259 }
12261260
12271261 unsigned StackSize = CCInfo.getNextStackOffset();
5252 /// the returnaddr can be savely move to this area
5353 int TailCallReturnAddrDelta;
5454
55 /// SRetReturnReg - Some subtargets require that sret lowering includes
56 /// returning the value of the returned struct in a register. This field
57 /// holds the virtual register into which the sret argument is passed.
58 unsigned SRetReturnReg;
59
5560 public:
5661 X86MachineFunctionInfo() : ForceFramePointer(false),
5762 CalleeSavedFrameSize(0),
5863 BytesToPopOnReturn(0),
5964 DecorationStyle(None),
6065 ReturnAddrIndex(0),
61 TailCallReturnAddrDelta(0) {}
66 TailCallReturnAddrDelta(0),
67 SRetReturnReg(0) {}
6268
6369 X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
6470 CalleeSavedFrameSize(0),
6571 BytesToPopOnReturn(0),
6672 DecorationStyle(None),
6773 ReturnAddrIndex(0),
68 TailCallReturnAddrDelta(0) {}
74 TailCallReturnAddrDelta(0),
75 SRetReturnReg(0) {}
6976
7077 bool getForceFramePointer() const { return ForceFramePointer;}
7178 void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
8491
8592 int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
8693 void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
94
95 unsigned getSRetReturnReg() const { return SRetReturnReg; }
96 void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
8797 };
8898 } // End llvm namespace
8999
0 ; RUN: llvm-as < %s | llc | grep {movq %rdi, %rax}
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
3 target triple = "x86_64-apple-darwin8"
4 %struct.foo = type { [4 x i64] }
5
6 define void @bar(%struct.foo* noalias sret %agg.result, %struct.foo* %d) nounwind {
7 entry:
8 %d_addr = alloca %struct.foo* ; <%struct.foo**> [#uses=2]
9 %memtmp = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=1]
10 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
11 store %struct.foo* %d, %struct.foo** %d_addr
12 %tmp = load %struct.foo** %d_addr, align 8 ; <%struct.foo*> [#uses=1]
13 %tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
14 %tmp2 = getelementptr %struct.foo* %tmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
15 %tmp3 = getelementptr [4 x i64]* %tmp1, i32 0, i32 0 ; [#uses=1]
16 %tmp4 = getelementptr [4 x i64]* %tmp2, i32 0, i32 0 ; [#uses=1]
17 %tmp5 = load i64* %tmp4, align 8 ; [#uses=1]
18 store i64 %tmp5, i64* %tmp3, align 8
19 %tmp6 = getelementptr [4 x i64]* %tmp1, i32 0, i32 1 ; [#uses=1]
20 %tmp7 = getelementptr [4 x i64]* %tmp2, i32 0, i32 1 ; [#uses=1]
21 %tmp8 = load i64* %tmp7, align 8 ; [#uses=1]
22 store i64 %tmp8, i64* %tmp6, align 8
23 %tmp9 = getelementptr [4 x i64]* %tmp1, i32 0, i32 2 ; [#uses=1]
24 %tmp10 = getelementptr [4 x i64]* %tmp2, i32 0, i32 2 ; [#uses=1]
25 %tmp11 = load i64* %tmp10, align 8 ; [#uses=1]
26 store i64 %tmp11, i64* %tmp9, align 8
27 %tmp12 = getelementptr [4 x i64]* %tmp1, i32 0, i32 3 ; [#uses=1]
28 %tmp13 = getelementptr [4 x i64]* %tmp2, i32 0, i32 3 ; [#uses=1]
29 %tmp14 = load i64* %tmp13, align 8 ; [#uses=1]
30 store i64 %tmp14, i64* %tmp12, align 8
31 %tmp15 = getelementptr %struct.foo* %memtmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
32 %tmp16 = getelementptr %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
33 %tmp17 = getelementptr [4 x i64]* %tmp15, i32 0, i32 0 ; [#uses=1]
34 %tmp18 = getelementptr [4 x i64]* %tmp16, i32 0, i32 0 ; [#uses=1]
35 %tmp19 = load i64* %tmp18, align 8 ; [#uses=1]
36 store i64 %tmp19, i64* %tmp17, align 8
37 %tmp20 = getelementptr [4 x i64]* %tmp15, i32 0, i32 1 ; [#uses=1]
38 %tmp21 = getelementptr [4 x i64]* %tmp16, i32 0, i32 1 ; [#uses=1]
39 %tmp22 = load i64* %tmp21, align 8 ; [#uses=1]
40 store i64 %tmp22, i64* %tmp20, align 8
41 %tmp23 = getelementptr [4 x i64]* %tmp15, i32 0, i32 2 ; [#uses=1]
42 %tmp24 = getelementptr [4 x i64]* %tmp16, i32 0, i32 2 ; [#uses=1]
43 %tmp25 = load i64* %tmp24, align 8 ; [#uses=1]
44 store i64 %tmp25, i64* %tmp23, align 8
45 %tmp26 = getelementptr [4 x i64]* %tmp15, i32 0, i32 3 ; [#uses=1]
46 %tmp27 = getelementptr [4 x i64]* %tmp16, i32 0, i32 3 ; [#uses=1]
47 %tmp28 = load i64* %tmp27, align 8 ; [#uses=1]
48 store i64 %tmp28, i64* %tmp26, align 8
49 br label %return
50
51 return: ; preds = %entry
52 ret void
53 }