llvm.org GIT mirror llvm / 67e73ac
[NVPTX] Force minimum alignment of 4 for byval arguments of device-side functions. Taking address of a byval variable in PTX is legal, but currently runs into miscompilation by ptxas on sm_50+ (NVIDIA issue 1789042). Work around the issue by enforcing minimum alignment on byval arguments of device functions. The change is a no-op on SASS level for sm_3x where ptxas already aligns local copy by at least 4. Differential Revision: https://reviews.llvm.org/D22428 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275893 91177308-0d34-0410-b5e6-96231b3b80d8 Artem Belevich 3 years ago
3 changed file(s) with 31 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
15881588 unsigned align = PAL.getParamAlignment(paramIndex + 1);
15891589 if (align == 0)
15901590 align = DL.getABITypeAlignment(ETy);
1591
1591 // Work around a bug in ptxas. When PTX code takes address of
1592 // byval parameter with alignment < 4, ptxas generates code to
1593 // spill argument into memory. Alas on sm_50+ ptxas generates
1594 // SASS code that fails with misaligned access. To work around
1595 // the problem, make sure that we align byval parameters by at
1596 // least 4. Matching change must be made in LowerCall() where we
1597 // prepare parameters for the call.
1598 //
1599 // TODO: this will need to be undone when we get to support multi-TU
1600 // device-side compilation as it breaks ABI compatibility with nvcc.
1601 // Hopefully ptxas bug is fixed by then.
1602 if (!isKernelFunc && align < 4)
1603 align = 4;
15921604 unsigned sz = DL.getTypeAllocSize(ETy);
15931605 O << "\t.param .align " << align << " .b8 ";
15941606 printParamName(I, paramIndex, O);
10711071 MachineFunction &MF = DAG.getMachineFunction();
10721072 const Function *F = MF.getFunction();
10731073 auto &DL = MF.getDataLayout();
1074 bool isKernel = llvm::isKernelFunction(*F);
10741075
10751076 SDValue tempChain = Chain;
10761077 Chain = DAG.getCALLSEQ_START(Chain,
13361337 // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
13371338 // so we don't need to worry about natural alignment or not.
13381339 // See TargetLowering::LowerCallTo().
1339 SDValue DeclareParamOps[] = {
1340 Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32),
1341 DAG.getConstant(paramCount, dl, MVT::i32),
1342 DAG.getConstant(sz, dl, MVT::i32), InFlag
1343 };
1340
1341 // Enforce minumum alignment of 4 to work around ptxas miscompile
1342 // for sm_50+. See corresponding alignment adjustment in
1343 // emitFunctionParamList() for details.
1344 if (!isKernel && ArgAlign < 4)
1345 ArgAlign = 4;
1346 SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
1347 DAG.getConstant(paramCount, dl, MVT::i32),
1348 DAG.getConstant(sz, dl, MVT::i32), InFlag};
13441349 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
13451350 DeclareParamOps);
13461351 InFlag = Chain.getValue(1);
2222 ; CHECK: .param .align 4 .b8 t3_param_0[8]
2323 ret void
2424 }
25
26 ;;; Need at least 4-byte alignment in order to avoid miscompilation by
27 ;;; ptxas for sm_50+
28 define ptx_device void @t4(i8* byval %x) {
29 ; CHECK: .func t4
30 ; CHECK: .param .align 4 .b8 t4_param_0[1]
31 ret void
32 }