llvm.org GIT mirror llvm / bf7bfe3
[PowerPC] Fix "byval align" arguments Arguments passed as "byval align" should get the specified alignment in the parameter save area. There was some code in PPCISelLowering.cpp that attempted to implement this, but this didn't work correctly: while code did update the ArgOffset value, it neglected to update the PtrOff value (which was already computed from the old ArgOffset), and it also neglected to update GPR_idx -- fields skipped due to alignment in the save area must likewise be skipped in GPRs. This patch fixes and simplifies this logic by: - handling argument offset alignment right at the beginning of argument processing, using a new helper routine CalculateStackSlotAlignment (this avoids having to update PtrOff and other derived values later on) - not tracking GPR_idx separately, but always computing the correct GPR_idx for each argument *from* its ArgOffset - removing some redundant computation in LowerFormalArguments: MinReservedArea must equal ArgOffset after argument processing, so there's no use in computing it twice. [This doesn't change the behavior of the current clang front-end, since that never creates "byval align" arguments at the moment. This will change with a follow-on patch, however.] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212476 91177308-0d34-0410-b5e6-96231b3b80d8 Ulrich Weigand 5 years ago
2 changed file(s) with 118 addition(s) and 67 deletion(s). Raw diff Collapse all Expand all
21302130
21312131 return ArgSize;
21322132 }
2133
2134 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
2135 /// on the stack.
2136 static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
2137 unsigned PtrByteSize) {
2138 unsigned Align = PtrByteSize;
2139
2140 // Altivec parameters are padded to a 16 byte boundary.
2141 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2142 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2143 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
2144 Align = 16;
2145
2146 // ByVal parameters are aligned as requested.
2147 if (Flags.isByVal()) {
2148 unsigned BVAlign = Flags.getByValAlign();
2149 if (BVAlign > PtrByteSize) {
2150 if (BVAlign % PtrByteSize != 0)
2151 llvm_unreachable(
2152 "ByVal alignment is not a multiple of the pointer size");
2153
2154 Align = BVAlign;
2155 }
2156 }
2157
2158 return Align;
2159 }
2160
21332161 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
21342162 /// ensure minimum alignment required for target.
21352163 static unsigned EnsureStackAlignment(const TargetMachine &Target,
24212449
24222450 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
24232451 unsigned ArgOffset = LinkageSize;
2424 // Area that is at least reserved in caller of this function.
2425 unsigned MinReservedArea = ArgOffset;
24262452
24272453 static const MCPhysReg GPR[] = {
24282454 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
24442470 const unsigned Num_FPR_Regs = 13;
24452471 const unsigned Num_VR_Regs = array_lengthof(VR);
24462472
2447 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2473 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
24482474
24492475 // Add DAG nodes to load the arguments or copy them out of registers. On
24502476 // entry to a function on PPC, the arguments start after the linkage area,
24632489 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
24642490 CurArgIdx = Ins[ArgNo].OrigArgIndex;
24652491
2492 /* Respect alignment of argument on the stack. */
2493 unsigned Align =
2494 CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
2495 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
24662496 unsigned CurArgOffset = ArgOffset;
24672497
2468 // Altivec parameters are padded to a 16 byte boundary.
2469 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
2470 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
2471 ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64)
2472 MinReservedArea = ((MinReservedArea+15)/16)*16;
2473
2474 // Calculate min reserved area.
2475 MinReservedArea += CalculateStackSlotSize(ObjectVT, Flags, PtrByteSize);
2498 /* Compute GPR index associated with argument offset. */
2499 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
2500 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
24762501
24772502 // FIXME the codegen can be much improved in some cases.
24782503 // We do not have to keep everything in memory.
24942519 continue;
24952520 }
24962521
2497 unsigned BVAlign = Flags.getByValAlign();
2498 if (BVAlign > 8) {
2499 ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
2500 CurArgOffset = ArgOffset;
2501 }
2502
25032522 // All aggregates smaller than 8 bytes must be passed right-justified.
25042523 if (ObjSize < PtrByteSize && !isLittleEndian)
25052524 CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
25352554 }
25362555
25372556 MemOps.push_back(Store);
2538 ++GPR_idx;
25392557 }
25402558 // Whether we copied from a register or not, advance the offset
25412559 // into the parameter save area by a full doubleword.
25802598 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
25812599 // value to MVT::i64 and then truncate to the correct register size.
25822600 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
2583
2584 ++GPR_idx;
25852601 } else {
25862602 needsLoad = true;
25872603 ArgSize = PtrByteSize;
25912607
25922608 case MVT::f32:
25932609 case MVT::f64:
2594 // Every 8 bytes of argument space consumes one of the GPRs available for
2595 // argument passing.
2596 if (GPR_idx != Num_GPR_Regs) {
2597 ++GPR_idx;
2598 }
25992610 if (FPR_idx != Num_FPR_Regs) {
26002611 unsigned VReg;
26012612
26212632 case MVT::v16i8:
26222633 case MVT::v2f64:
26232634 case MVT::v2i64:
2624 // Vectors are aligned to a 16-byte boundary in the argument save area.
2625 while ((ArgOffset % 16) != 0) {
2626 ArgOffset += PtrByteSize;
2627 if (GPR_idx != Num_GPR_Regs)
2628 GPR_idx++;
2629 }
26302635 if (VR_idx != Num_VR_Regs) {
26312636 unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
26322637 MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
26342639 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
26352640 ++VR_idx;
26362641 } else {
2637 CurArgOffset = ArgOffset;
26382642 needsLoad = true;
26392643 }
26402644 ArgOffset += 16;
2641 GPR_idx = std::min(GPR_idx + 2, Num_GPR_Regs);
26422645 break;
26432646 }
26442647
26572660 }
26582661
26592662 // Area that is at least reserved in the caller of this function.
2660 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
2663 unsigned MinReservedArea;
2664 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
26612665
26622666 // Set the size that is at least reserved in caller of this function. Tail
26632667 // call optimized functions' reserved stack space needs to be aligned so that
26782682 // If this function is vararg, store any remaining integer argument regs
26792683 // to their spots on the stack so that they may be loaded by deferencing the
26802684 // result of va_next.
2681 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
2685 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
2686 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
26822687 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
26832688 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
26842689 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
39703975 ISD::ArgFlagsTy Flags = Outs[i].Flags;
39713976 EVT ArgVT = Outs[i].VT;
39723977
3973 // Altivec parameters are padded to a 16 byte boundary.
3974 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3975 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3976 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
3977 NumBytes = ((NumBytes+15)/16)*16;
3978 /* Respect alignment of argument on the stack. */
3979 unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
3980 NumBytes = ((NumBytes + Align - 1) / Align) * Align;
39783981
39793982 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
39803983 }
3984
3985 unsigned NumBytesActuallyUsed = NumBytes;
39813986
39823987 // The prolog code of the callee may store up to 8 GPR argument registers to
39833988 // the stack, allowing va_start to index over them in memory if its varargs.
40224027 // must be stored to our stack, and loaded into integer regs as well, if
40234028 // any integer regs are available for argument passing.
40244029 unsigned ArgOffset = LinkageSize;
4025 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4030 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
40264031
40274032 static const MCPhysReg GPR[] = {
40284033 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
40504055 for (unsigned i = 0; i != NumOps; ++i) {
40514056 SDValue Arg = OutVals[i];
40524057 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4058
4059 /* Respect alignment of argument on the stack. */
4060 unsigned Align =
4061 CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
4062 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
4063
4064 /* Compute GPR index associated with argument offset. */
4065 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4066 GPR_idx = std::min(GPR_idx, NumGPRs);
40534067
40544068 // PtrOff will be used to store the current argument to the stack if a
40554069 // register cannot be found for it.
40824096 if (Size == 0)
40834097 continue;
40844098
4085 unsigned BVAlign = Flags.getByValAlign();
4086 if (BVAlign > 8) {
4087 if (BVAlign % PtrByteSize != 0)
4088 llvm_unreachable(
4089 "ByVal alignment is not a multiple of the pointer size");
4090
4091 ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
4092 }
4093
40944099 // All aggregates smaller than 8 bytes must be passed right-justified.
40954100 if (Size==1 || Size==2 || Size==4) {
40964101 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
40994104 MachinePointerInfo(), VT,
41004105 false, false, 0);
41014106 MemOpChains.push_back(Load.getValue(1));
4102 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4107 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
41034108
41044109 ArgOffset += PtrByteSize;
41054110 continue;
41614166 MachinePointerInfo(),
41624167 false, false, false, 0);
41634168 MemOpChains.push_back(Load.getValue(1));
4164 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4169 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
41654170
41664171 // Done with this argument.
41674172 ArgOffset += PtrByteSize;
41944199 case MVT::i32:
41954200 case MVT::i64:
41964201 if (GPR_idx != NumGPRs) {
4197 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
4202 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
41984203 } else {
41994204 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
42004205 true, isTailCall, false, MemOpChains,
42294234 MachinePointerInfo(), false, false,
42304235 false, 0);
42314236 MemOpChains.push_back(Load.getValue(1));
4232 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4237 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
42334238 }
4234 } else if (GPR_idx != NumGPRs)
4235 // If we have any FPRs remaining, we may also have GPRs remaining.
4236 ++GPR_idx;
4239 }
42374240 } else {
42384241 // Single-precision floating-point values are mapped to the
42394242 // second (rightmost) word of the stack doubleword.
42544257 case MVT::v16i8:
42554258 case MVT::v2f64:
42564259 case MVT::v2i64:
4257 // Vectors are aligned to a 16-byte boundary in the argument save area.
4258 while (ArgOffset % 16 !=0) {
4259 ArgOffset += PtrByteSize;
4260 if (GPR_idx != NumGPRs)
4261 GPR_idx++;
4262 }
4263
42644260 // For a varargs call, named arguments go into VRs or on the stack as
42654261 // usual; unnamed arguments always go to the stack or the corresponding
42664262 // GPRs when within range. For now, we always put the value in both
42684264 if (isVarArg) {
42694265 // We could elide this store in the case where the object fits
42704266 // entirely in R registers. Maybe later.
4271 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4272 DAG.getConstant(ArgOffset, PtrVT));
42734267 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
42744268 MachinePointerInfo(), false, false, 0);
42754269 MemOpChains.push_back(Store);
43144308 TailCallArguments, dl);
43154309 }
43164310 ArgOffset += 16;
4317 GPR_idx = std::min(GPR_idx + 2, NumGPRs);
43184311 break;
43194312 }
43204313 }
4314
4315 assert(NumBytesActuallyUsed == ArgOffset);
43214316
43224317 if (!MemOpChains.empty())
43234318 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
0 ; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s
1
2 target datalayout = "E-m:e-i64:64-n32:64"
3 target triple = "powerpc64-unknown-linux-gnu"
4
5 %struct.test = type { i64, [8 x i8] }
6 %struct.pad = type { [8 x i64] }
7
8 @gt = common global %struct.test zeroinitializer, align 16
9 @gp = common global %struct.pad zeroinitializer, align 8
10
11 define signext i32 @callee1(i32 signext %x, %struct.test* byval align 16 nocapture readnone %y, i32 signext %z) {
12 entry:
13 ret i32 %z
14 }
15 ; CHECK-LABEL: @callee1
16 ; CHECK: mr 3, 7
17 ; CHECK: blr
18
19 declare signext i32 @test1(i32 signext, %struct.test* byval align 16, i32 signext)
20 define void @caller1(i32 signext %z) {
21 entry:
22 %call = tail call signext i32 @test1(i32 signext 0, %struct.test* byval align 16 @gt, i32 signext %z)
23 ret void
24 }
25 ; CHECK-LABEL: @caller1
26 ; CHECK: mr [[REG:[0-9]+]], 3
27 ; CHECK: mr 7, [[REG]]
28 ; CHECK: bl test1
29
30 define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) {
31 entry:
32 %x1 = getelementptr inbounds %struct.test* %z, i64 0, i32 0
33 %0 = load i64* %x1, align 16
34 ret i64 %0
35 }
36 ; CHECK-LABEL: @callee2
37 ; CHECK: ld [[REG:[0-9]+]], 128(1)
38 ; CHECK: mr 3, [[REG]]
39 ; CHECK: blr
40
41 declare i64 @test2(%struct.pad* byval, i32 signext, %struct.test* byval align 16)
42 define void @caller2(i64 %z) {
43 entry:
44 %tmp = alloca %struct.test, align 16
45 %.compoundliteral.sroa.0.0..sroa_idx = getelementptr inbounds %struct.test* %tmp, i64 0, i32 0
46 store i64 %z, i64* %.compoundliteral.sroa.0.0..sroa_idx, align 16
47 %call = call i64 @test2(%struct.pad* byval @gp, i32 signext 0, %struct.test* byval align 16 %tmp)
48 ret void
49 }
50 ; CHECK-LABEL: @caller2
51 ; CHECK: std 3, [[OFF:[0-9]+]](1)
52 ; CHECK: ld [[REG:[0-9]+]], [[OFF]](1)
53 ; CHECK: std [[REG]], 128(1)
54 ; CHECK: bl test2
55