llvm.org GIT mirror llvm / 661afe7
Issue description: SchedulerDAGInstrs::buildSchedGraph ignores dependencies between FixedStack objects and byval parameters. So loading byval parameters from stack may be inserted *before* it will be stored, since these operations are treated as independent. Fix: Currently ARMTargetLowering::LowerFormalArguments saves byval registers with FixedStack MachinePointerInfo. To fix the problem we need to store byval registers with MachinePointerInfo referenced to first the "byval" parameter. Also commit adds two new fields to the InputArg structure: Function's argument index and InputArg's part offset in bytes relative to the start position of Function's argument. E.g.: If function's argument is 128 bit width and it was splitted onto 32 bit regs, then we got 4 InputArg structs with same arg index, but different offset values. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165616 91177308-0d34-0410-b5e6-96231b3b80d8 Stepan Dyatkovskiy 7 years ago
5 changed file(s) with 50 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
112112 MVT VT;
113113 bool Used;
114114
115 /// Index original Function's argument.
116 unsigned OrigArgIndex;
117
118 /// Offset in bytes of current input value relative to the beginning of
119 /// original argument. E.g. if argument was splitted into four 32 bit
120 /// registers, we got 4 InputArgs with PartOffsets 0, 4, 8 and 12.
121 unsigned PartOffset;
122
115123 InputArg() : VT(MVT::Other), Used(false) {}
116 InputArg(ArgFlagsTy flags, EVT vt, bool used)
117 : Flags(flags), Used(used) {
124 InputArg(ArgFlagsTy flags, EVT vt, bool used,
125 unsigned origIdx, unsigned partOffs)
126 : Flags(flags), Used(used), OrigArgIndex(origIdx), PartOffset(partOffs) {
118127 VT = vt.getSimpleVT();
119128 }
120129 };
66806680 ISD::ArgFlagsTy Flags;
66816681 Flags.setSRet();
66826682 EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
6683 ISD::InputArg RetArg(Flags, RegisterVT, true);
6683 ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
66846684 Ins.push_back(RetArg);
66856685 }
66866686
67286728 EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
67296729 unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
67306730 for (unsigned i = 0; i != NumRegs; ++i) {
6731 ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
6731 ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
6732 Idx-1, i*RegisterVT.getStoreSize());
67326733 if (NumRegs > 1 && i == 0)
67336734 MyFlags.Flags.setSplit();
67346735 // if it isn't first piece, alignment must be 1
25332533 void
25342534 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
25352535 DebugLoc dl, SDValue &Chain,
2536 const Value *OrigArg,
2537 unsigned OffsetFromOrigArg,
25362538 unsigned ArgOffset) const {
25372539 MachineFunction &MF = DAG.getMachineFunction();
25382540 MachineFrameInfo *MFI = MF.getFrameInfo();
25602562 getPointerTy());
25612563
25622564 SmallVector MemOps;
2563 for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
2565 for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
25642566 const TargetRegisterClass *RC;
25652567 if (AFI->isThumb1OnlyFunction())
25662568 RC = &ARM::tGPRRegClass;
25712573 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
25722574 SDValue Store =
25732575 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2574 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
2576 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
25752577 false, false, 0);
25762578 MemOps.push_back(Store);
25772579 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
26052607 CCInfo.AnalyzeFormalArguments(Ins,
26062608 CCAssignFnForNode(CallConv, /* Return*/ false,
26072609 isVarArg));
2608
2610
26092611 SmallVector ArgValues;
26102612 int lastInsIndex = -1;
2611
26122613 SDValue ArgValue;
2614 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2615 unsigned CurArgIdx = 0;
26132616 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
26142617 CCValAssign &VA = ArgLocs[i];
2615
2618 std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
2619 CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
26162620 // Arguments stored in registers.
26172621 if (VA.isRegLoc()) {
26182622 EVT RegVT = VA.getLocVT();
27082712 if (Flags.isByVal()) {
27092713 unsigned VARegSize, VARegSaveSize;
27102714 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
2711 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
2715 VarArgStyleRegisters(CCInfo, DAG,
2716 dl, Chain, CurOrigArg, Ins[VA.getValNo()].PartOffset, 0);
27122717 unsigned Bytes = Flags.getByValSize() - VARegSize;
27132718 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
27142719 int FI = MFI->CreateFixedObject(Bytes,
27312736
27322737 // varargs
27332738 if (isVarArg)
2734 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
2739 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
2740 CCInfo.getNextStackOffset());
27352741
27362742 return Chain;
27372743 }
465465 SmallVectorImpl &InVals) const;
466466
467467 void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
468 DebugLoc dl, SDValue &Chain, unsigned ArgOffset)
468 DebugLoc dl, SDValue &Chain,
469 const Value *OrigArg,
470 unsigned OffsetFromOrigArg,
471 unsigned ArgOffset)
469472 const;
470473
471474 void computeRegArea(CCState &CCInfo, MachineFunction &MF,
0 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
1
2 @.str = private unnamed_addr constant [12 x i8] c"val.a = %f\0A\00"
3 %struct_t = type { double, double, double }
4 @static_val = constant %struct_t { double 1.0, double 2.0, double 3.0 }
5
6 declare i32 @printf(i8*, ...)
7
8 ; CHECK: test_byval_usage_scheduling:
9 ; CHECK: str r3, [sp, #12]
10 ; CHECK: str r2, [sp, #8]
11 ; CHECK: vldr d16, [sp, #8]
12 define void @test_byval_usage_scheduling(i32 %n1, i32 %n2, %struct_t* byval %val) nounwind {
13 entry:
14 %a = getelementptr inbounds %struct_t* %val, i32 0, i32 0
15 %0 = load double* %a
16 %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %0)
17 ret void
18 }