llvm.org GIT mirror llvm / 655a52d
[NVPTX] Allow libcalls that are defined in the current module. The patch adds a possibility to make library calls on NVPTX. An important thing about library functions - they must be defined within the current module. This basically should guarantee that we produce a valid PTX assembly (without calls to not defined functions). The one who wants to use the libcalls is probably will have to link against compiler-rt or any other implementation. Currently, it's completely impossible to make library calls because of error LLVM ERROR: Cannot select: i32 = ExternalSymbol '...'. But we can lower ExternalSymbol to TargetExternalSymbol and verify if the function definition is available. Also, there was an issue with a DAG during legalisation. When we expand instruction into libcall, the inner call-chain isn't being "integrated" into outer chain. Since the last "data-flow" (call retval load) node is located in call-chain earlier than CALLSEQ_END node, the latter becomes a leaf and therefore a dead node (and is being removed quite fast). Proposed here solution relies on another data-flow pseudo nodes (ProxyReg) which purpose is only to keep CALLSEQ_END at legalisation and instruction selection phases - we remove the pseudo instructions before register scheduling phase. Patch by Denys Zariaiev! Differential Revision: https://reviews.llvm.org/D34708 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350069 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Lebar 1 year, 10 months ago
17 changed file(s) with 508 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
11271127 /// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
11281128 SDValue expandVACopy(SDNode *Node);
11291129
1130 /// Returs an GlobalAddress of the function from the current module with
1131 /// name matching the given ExternalSymbol. Additionally can provide the
1132 /// matched function.
1133 /// Panics the function doesn't exists.
1134 SDValue getSymbolFunctionGlobalAddress(SDValue Op,
1135 Function **TargetFunction = nullptr);
1136
11301137 /// *Mutate* the specified node in-place to have the
11311138 /// specified operands. If the resultant node already exists in the DAG,
11321139 /// this does not modify the specified node, instead it returns the node that
84638463 return TokenFactor;
84648464 }
84658465
8466 SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
8467 Function **OutFunction) {
8468 assert(isa(Op) && "Node should be an ExternalSymbol");
8469
8470 auto *Symbol = cast(Op)->getSymbol();
8471 auto *Module = MF->getFunction().getParent();
8472 auto *Function = Module->getFunction(Symbol);
8473
8474 if (OutFunction != nullptr)
8475 *OutFunction = Function;
8476
8477 if (Function != nullptr) {
8478 auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace());
8479 return getGlobalAddress(Function, SDLoc(Op), PtrTy);
8480 }
8481
8482 std::string ErrorStr;
8483 raw_string_ostream ErrorFormatter(ErrorStr);
8484
8485 ErrorFormatter << "Undefined external symbol ";
8486 ErrorFormatter << '"' << Symbol << '"';
8487 ErrorFormatter.flush();
8488
8489 report_fatal_error(ErrorStr);
8490 }
8491
84668492 //===----------------------------------------------------------------------===//
84678493 // SDNode Class
84688494 //===----------------------------------------------------------------------===//
3131 NVPTXUtilities.cpp
3232 NVVMIntrRange.cpp
3333 NVVMReflect.cpp
34 NVPTXProxyRegErasure.cpp
3435 )
3536
3637 add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
5252 FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
5353 BasicBlockPass *createNVPTXLowerAllocaPass();
5454 MachineFunctionPass *createNVPTXPeephole();
55 MachineFunctionPass *createNVPTXProxyRegErasurePass();
5556
5657 Target &getTheNVPTXTarget32();
5758 Target &getTheNVPTXTarget64();
729729 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
730730 const Function *F = &*FI;
731731
732 if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
733 emitDeclaration(F, O);
734 continue;
735 }
736
732737 if (F->isDeclaration()) {
733738 if (F->use_empty())
734739 continue;
662662 return "NVPTXISD::CallSeqEnd";
663663 case NVPTXISD::CallPrototype:
664664 return "NVPTXISD::CallPrototype";
665 case NVPTXISD::ProxyReg:
666 return "NVPTXISD::ProxyReg";
665667 case NVPTXISD::LoadV2:
666668 return "NVPTXISD::LoadV2";
667669 case NVPTXISD::LoadV4:
16651667 // indirect calls but is always null for libcalls.
16661668 bool isIndirectCall = !Func && CS;
16671669
1670 if (isa(Callee)) {
1671 Function* CalleeFunc = nullptr;
1672
1673 // Try to find the callee in the current module.
1674 Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
1675 assert(CalleeFunc != nullptr && "Libcall callee must be set.");
1676
1677 // Set the "libcall callee" attribute to indicate that the function
1678 // must always have a declaration.
1679 CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
1680 }
1681
16681682 if (isIndirectCall) {
16691683 // This is indirect function call case : PTX requires a prototype of the
16701684 // form
17361750 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
17371751 InFlag = Chain.getValue(1);
17381752 }
1753
1754 SmallVector ProxyRegOps;
1755 SmallVector, 16> ProxyRegTruncates;
17391756
17401757 // Generate loads from param memory/moves from registers for result
17411758 if (Ins.size() > 0) {
18071824 MachineMemOperand::MOLoad);
18081825
18091826 for (unsigned j = 0; j < NumElts; ++j) {
1810 SDValue Ret = RetVal.getValue(j);
1827 ProxyRegOps.push_back(RetVal.getValue(j));
1828
18111829 if (needTruncate)
1812 Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
1813 InVals.push_back(Ret);
1830 ProxyRegTruncates.push_back(Optional(Ins[VecIdx + j].VT));
1831 else
1832 ProxyRegTruncates.push_back(Optional());
18141833 }
1834
18151835 Chain = RetVal.getValue(NumElts);
18161836 InFlag = RetVal.getValue(NumElts + 1);
18171837
18271847 DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
18281848 true),
18291849 InFlag, dl);
1850 InFlag = Chain.getValue(1);
18301851 uniqueCallSite++;
1852
1853 // Append ProxyReg instructions to the chain to make sure that `callseq_end`
1854 // will not get lost. Otherwise, during libcalls expansion, the nodes can become
1855 // dangling.
1856 for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
1857 SDValue Ret = DAG.getNode(
1858 NVPTXISD::ProxyReg, dl,
1859 DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
1860 { Chain, ProxyRegOps[i], InFlag }
1861 );
1862
1863 Chain = Ret.getValue(1);
1864 InFlag = Ret.getValue(2);
1865
1866 if (ProxyRegTruncates[i].hasValue()) {
1867 Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
1868 }
1869
1870 InVals.push_back(Ret);
1871 }
18311872
18321873 // set isTailCall to false for now, until we figure out how to express
18331874 // tail call optimization in PTX
5050 CallSeqBegin,
5151 CallSeqEnd,
5252 CallPrototype,
53 ProxyReg,
5354 FUN_SHFL_CLAMP,
5455 FUN_SHFR_CLAMP,
5556 MUL_WIDE_SIGNED,
18841884 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
18851885 def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
18861886 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1887 def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
18871888
18881889 def DeclareParam :
18891890 SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
19711972 def RETURNNode :
19721973 SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
19731974 [SDNPHasChain, SDNPSideEffect]>;
1975 def ProxyReg :
1976 SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
1977 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
19741978
19751979 let mayLoad = 1 in {
19761980 class LoadParamMemInst :
22482252 def PseudoUseParamF64 : PseudoUseParamInst;
22492253 def PseudoUseParamF32 : PseudoUseParamInst;
22502254
2255 class ProxyRegInst :
2256 NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2257 !strconcat("mov.", SzStr, " \t$dst, $src;"),
2258 [(set regclass:$dst, (ProxyReg regclass:$src))]>;
2259
2260 let isCodeGenOnly=1, isPseudo=1 in {
2261 def ProxyRegI1 : ProxyRegInst<"pred", Int1Regs>;
2262 def ProxyRegI16 : ProxyRegInst<"b16", Int16Regs>;
2263 def ProxyRegI32 : ProxyRegInst<"b32", Int32Regs>;
2264 def ProxyRegI64 : ProxyRegInst<"b64", Int64Regs>;
2265 def ProxyRegF16 : ProxyRegInst<"b16", Float16Regs>;
2266 def ProxyRegF32 : ProxyRegInst<"f32", Float32Regs>;
2267 def ProxyRegF64 : ProxyRegInst<"f64", Float64Regs>;
2268 def ProxyRegF16x2 : ProxyRegInst<"b32", Float16x2Regs>;
2269 }
22512270
22522271 //
22532272 // Load / Store Handling
25402559 class F_BITCONVERT
25412560 NVPTXRegClass regclassOut> :
25422561 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2543 !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")),
2562 !strconcat("mov.b", SzStr, " \t$d, $a;"),
25442563 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
25452564
25462565 def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>;
0 //===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The pass is needed to remove ProxyReg instructions and restore related
10 // registers. The instructions were needed at instruction selection stage to
11 // make sure that callseq_end nodes won't be removed as "dead nodes". This can
12 // happen when we expand instructions into libcalls and the call site doesn't
13 // care about the libcall chain. Call site cares about data flow only, and the
14 // latest data flow node happens to be before callseq_end. Therefore the node
15 // becomes dangling and "dead". The ProxyReg acts like an additional data flow
16 // node *after* the callseq_end in the chain and ensures that everything will be
17 // preserved.
18 //
19 //===----------------------------------------------------------------------===//
20
21 #include "NVPTX.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetRegisterInfo.h"
27
28 using namespace llvm;
29
30 namespace llvm {
31 void initializeNVPTXProxyRegErasurePass(PassRegistry &);
32 }
33
34 namespace {
35
36 struct NVPTXProxyRegErasure : public MachineFunctionPass {
37 public:
38 static char ID;
39 NVPTXProxyRegErasure() : MachineFunctionPass(ID) {
40 initializeNVPTXProxyRegErasurePass(*PassRegistry::getPassRegistry());
41 }
42
43 bool runOnMachineFunction(MachineFunction &MF) override;
44
45 StringRef getPassName() const override {
46 return "NVPTX Proxy Register Instruction Erasure";
47 }
48
49 void getAnalysisUsage(AnalysisUsage &AU) const override {
50 MachineFunctionPass::getAnalysisUsage(AU);
51 }
52
53 private:
54 void replaceMachineInstructionUsage(MachineFunction &MF, MachineInstr &MI);
55
56 void replaceRegisterUsage(MachineInstr &Instr, MachineOperand &From,
57 MachineOperand &To);
58 };
59
60 } // namespace
61
62 char NVPTXProxyRegErasure::ID = 0;
63
64 INITIALIZE_PASS(NVPTXProxyRegErasure, "nvptx-proxyreg-erasure", "NVPTX ProxyReg Erasure", false, false)
65
66 bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
67 SmallVector RemoveList;
68
69 for (auto &BB : MF) {
70 for (auto &MI : BB) {
71 switch (MI.getOpcode()) {
72 case NVPTX::ProxyRegI1:
73 case NVPTX::ProxyRegI16:
74 case NVPTX::ProxyRegI32:
75 case NVPTX::ProxyRegI64:
76 case NVPTX::ProxyRegF16:
77 case NVPTX::ProxyRegF16x2:
78 case NVPTX::ProxyRegF32:
79 case NVPTX::ProxyRegF64:
80 replaceMachineInstructionUsage(MF, MI);
81 RemoveList.push_back(&MI);
82 break;
83 }
84 }
85 }
86
87 for (auto *MI : RemoveList) {
88 MI->eraseFromParent();
89 }
90
91 return !RemoveList.empty();
92 }
93
94 void NVPTXProxyRegErasure::replaceMachineInstructionUsage(MachineFunction &MF,
95 MachineInstr &MI) {
96 auto &InOp = *MI.uses().begin();
97 auto &OutOp = *MI.defs().begin();
98
99 assert(InOp.isReg() && "ProxyReg input operand should be a register.");
100 assert(OutOp.isReg() && "ProxyReg output operand should be a register.");
101
102 for (auto &BB : MF) {
103 for (auto &I : BB) {
104 replaceRegisterUsage(I, OutOp, InOp);
105 }
106 }
107 }
108
109 void NVPTXProxyRegErasure::replaceRegisterUsage(MachineInstr &Instr,
110 MachineOperand &From,
111 MachineOperand &To) {
112 for (auto &Op : Instr.uses()) {
113 if (Op.isReg() && Op.getReg() == From.getReg()) {
114 Op.setReg(To.getReg());
115 }
116 }
117 }
118
119 MachineFunctionPass *llvm::createNVPTXProxyRegErasurePass() {
120 return new NVPTXProxyRegErasure();
121 }
6767 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
6868 void initializeNVPTXLowerArgsPass(PassRegistry &);
6969 void initializeNVPTXLowerAllocaPass(PassRegistry &);
70 void initializeNVPTXProxyRegErasurePass(PassRegistry &);
7071
7172 } // end namespace llvm
7273
8687 initializeNVPTXLowerArgsPass(PR);
8788 initializeNVPTXLowerAllocaPass(PR);
8889 initializeNVPTXLowerAggrCopiesPass(PR);
90 initializeNVPTXProxyRegErasurePass(PR);
8991 }
9092
9193 static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
159161
160162 void addIRPasses() override;
161163 bool addInstSelector() override;
164 void addPreRegAlloc() override;
162165 void addPostRegAlloc() override;
163166 void addMachineSSAOptimization() override;
164167
300303 return false;
301304 }
302305
306 void NVPTXPassConfig::addPreRegAlloc() {
307 // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
308 addPass(createNVPTXProxyRegErasurePass());
309 }
310
303311 void NVPTXPassConfig::addPostRegAlloc() {
304312 addPass(createNVPTXPrologEpilogPass(), false);
305313 if (getOptLevel() != CodeGenOpt::None) {
0 ; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
1 ; Make sure the example doesn't crash with segfault
2
3 ; CHECK: .visible .func ({{.*}}) loop
4 define i32 @loop(i32, i32) {
5 entry:
6 br label %loop
7
8 loop:
9 %i = phi i32 [ %0, %entry ], [ %res, %loop ]
10 %res = call i32 @div(i32 %i, i32 %1)
11
12 %exitcond = icmp eq i32 %res, %0
13 br i1 %exitcond, label %exit, label %loop
14
15 exit:
16 ret i32 %res
17 }
18
19 define i32 @div(i32, i32) {
20 ret i32 0
21 }
0 ; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
1 ; Allow to make libcalls that are defined in the current module
2
3 ; Underlying libcall declaration
4 ; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3
5
6 define i128 @remainder(i128, i128) {
7 bb0:
8 ; CHECK: { // callseq 0, 0
9 ; CHECK: call.uni (retval0),
10 ; CHECK-NEXT: __umodti3,
11 ; CHECK-NEXT: (
12 ; CHECK-NEXT: param0,
13 ; CHECK-NEXT: param1
14 ; CHECK-NEXT: );
15 ; CHECK-NEXT: ld.param.v2.b64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [retval0+0];
16 ; CHECK-NEXT: } // callseq 0
17 %a = urem i128 %0, %1
18 br label %bb1
19
20 bb1:
21 ; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]};
22 ; CHECK-NEXT: ret;
23 ret i128 %a
24 }
25
26 ; Underlying libcall definition
27 ; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3(
28 define i128 @__umodti3(i128, i128) {
29 ret i128 0
30 }
0 ; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
1 ; used to panic on failed assetion and now fails with a "Cannot select"
1 ; used to panic on failed assertion and now fails with an "Undefined external symbol"
22
3 ; CHECK: LLVM ERROR: Cannot select: {{t28|0x[0-9a-f]+}}: i32 = ExternalSymbol'__umodti3'
3 ; CHECK: LLVM ERROR: Undefined external symbol "__umodti3"
44 define hidden i128 @remainder(i128, i128) {
55 %3 = urem i128 %0, %1
66 ret i128 %3
0 ; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
1 ; used to seqfault and now fails with an "Undefined external symbol"
2
3 ; CHECK: LLVM ERROR: Undefined external symbol "__powidf2"
4 define double @powi(double, i32) {
5 %a = call double @llvm.powi.f64(double %0, i32 %1)
6 ret double %a
7 }
8
9 declare double @llvm.powi.f64(double, i32) nounwind readnone
0 ; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
1 ; RUN: | FileCheck %s --check-prefix=MIR --check-prefix=MIR-BEFORE
2
3 ; RUN: llc -march=nvptx64 -stop-after=nvptx-proxyreg-erasure < %s 2>&1 \
4 ; RUN: | FileCheck %s --check-prefix=MIR --check-prefix=MIR-AFTER
5
6 ; Check ProxyRegErasure pass MIR manipulation.
7
8 declare <4 x i32> @callee_vec_i32()
9 define <4 x i32> @check_vec_i32() {
10 ; MIR: body:
11 ; MIR-DAG: Callseq_Start {{[0-9]+}}, {{[0-9]+}}
12 ; MIR-DAG: %0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0
13 ; MIR-DAG: Callseq_End {{[0-9]+}}
14
15 ; MIR-BEFORE-DAG: %4:int32regs = ProxyRegI32 killed %0
16 ; MIR-BEFORE-DAG: %5:int32regs = ProxyRegI32 killed %1
17 ; MIR-BEFORE-DAG: %6:int32regs = ProxyRegI32 killed %2
18 ; MIR-BEFORE-DAG: %7:int32regs = ProxyRegI32 killed %3
19 ; MIR-BEFORE-DAG: StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0
20 ; MIR-AFTER-DAG: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3, 0
21
22 %ret = call <4 x i32> @callee_vec_i32()
23 ret <4 x i32> %ret
24 }
0 ; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
1 ; RUN: | llc -x mir -march=nvptx64 -start-before=nvptx-proxyreg-erasure 2>&1 \
2 ; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITH
3
4 ; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
5 ; RUN: | llc -x mir -march=nvptx64 -start-after=nvptx-proxyreg-erasure 2>&1 \
6 ; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITHOUT
7
8 ; Thorough testing of ProxyRegErasure: PTX assembly with and without the pass.
9
10 declare i1 @callee_i1()
11 define i1 @check_i1() {
12 ; PTX-LABEL: check_i1
13 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
14 ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
15 ; PTX-DAG: } // callseq {{[0-9]+}}
16
17 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
18 ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 1;
19 ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 1;
20
21 ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]];
22
23 %ret = call i1 @callee_i1()
24 ret i1 %ret
25 }
26
27 declare i16 @callee_i16()
28 define i16 @check_i16() {
29 ; PTX-LABEL: check_i16
30 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
31 ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
32 ; PTX-DAG: } // callseq {{[0-9]+}}
33
34 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
35 ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 65535;
36 ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 65535;
37
38 ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]];
39
40 %ret = call i16 @callee_i16()
41 ret i16 %ret
42 }
43
44 declare i32 @callee_i32()
45 define i32 @check_i32() {
46 ; PTX-LABEL: check_i32
47 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
48 ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
49 ; PTX-DAG: } // callseq {{[0-9]+}}
50
51 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
52 ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
53 ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
54
55 %ret = call i32 @callee_i32()
56 ret i32 %ret
57 }
58
59 declare i64 @callee_i64()
60 define i64 @check_i64() {
61 ; PTX-LABEL: check_i64
62 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
63 ; PTX-DAG: ld.param.b64 [[LD:%rd[0-9]+]], [retval0+0];
64 ; PTX-DAG: } // callseq {{[0-9]+}}
65
66 ; PTX-WITHOUT-DAG: mov.b64 [[PROXY:%rd[0-9]+]], [[LD]];
67 ; PTX-WITHOUT-DAG: st.param.b64 [func_retval0+0], [[PROXY]];
68 ; PTX-WITH-DAG: st.param.b64 [func_retval0+0], [[LD]];
69
70 %ret = call i64 @callee_i64()
71 ret i64 %ret
72 }
73
74 declare i128 @callee_i128()
75 define i128 @check_i128() {
76 ; PTX-LABEL: check_i128
77 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
78 ; PTX-DAG: ld.param.v2.b64 {[[LD0:%rd[0-9]+]], [[LD1:%rd[0-9]+]]}, [retval0+0];
79 ; PTX-DAG: } // callseq {{[0-9]+}}
80
81 ; PTX-WITHOUT-DAG: mov.b64 [[PROXY0:%rd[0-9]+]], [[LD0]];
82 ; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%rd[0-9]+]], [[LD1]];
83 ; PTX-WITHOUT-DAG: st.param.v2.b64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]};
84 ; PTX-WITH-DAG: st.param.v2.b64 [func_retval0+0], {[[LD0]], [[LD1]]};
85
86 %ret = call i128 @callee_i128()
87 ret i128 %ret
88 }
89
90 declare half @callee_f16()
91 define half @check_f16() {
92 ; PTX-LABEL: check_f16
93 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
94 ; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0];
95 ; PTX-DAG: } // callseq {{[0-9]+}}
96
97 ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]];
98 ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]];
99 ; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]];
100
101 %ret = call half @callee_f16()
102 ret half %ret
103 }
104
105 declare float @callee_f32()
106 define float @check_f32() {
107 ; PTX-LABEL: check_f32
108 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
109 ; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0+0];
110 ; PTX-DAG: } // callseq {{[0-9]+}}
111
112 ; PTX-WITHOUT-DAG: mov.f32 [[PROXY:%f[0-9]+]], [[LD]];
113 ; PTX-WITHOUT-DAG: st.param.f32 [func_retval0+0], [[PROXY]];
114 ; PTX-WITH-DAG: st.param.f32 [func_retval0+0], [[LD]];
115
116 %ret = call float @callee_f32()
117 ret float %ret
118 }
119
120 declare double @callee_f64()
121 define double @check_f64() {
122 ; PTX-LABEL: check_f64
123 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
124 ; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0+0];
125 ; PTX-DAG: } // callseq {{[0-9]+}}
126
127 ; PTX-WITHOUT-DAG: mov.f64 [[PROXY:%fd[0-9]+]], [[LD]];
128 ; PTX-WITHOUT-DAG: st.param.f64 [func_retval0+0], [[PROXY]];
129 ; PTX-WITH-DAG: st.param.f64 [func_retval0+0], [[LD]];
130
131 %ret = call double @callee_f64()
132 ret double %ret
133 }
134
135 declare <4 x i32> @callee_vec_i32()
136 define <4 x i32> @check_vec_i32() {
137 ; PTX-LABEL: check_vec_i32
138 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
139 ; PTX-DAG: ld.param.v4.b32 {[[LD0:%r[0-9]+]], [[LD1:%r[0-9]+]], [[LD2:%r[0-9]+]], [[LD3:%r[0-9]+]]}, [retval0+0];
140 ; PTX-DAG: } // callseq {{[0-9]+}}
141
142 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY0:%r[0-9]+]], [[LD0]];
143 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY1:%r[0-9]+]], [[LD1]];
144 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY2:%r[0-9]+]], [[LD2]];
145 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY3:%r[0-9]+]], [[LD3]];
146 ; PTX-WITHOUT-DAG: st.param.v4.b32 [func_retval0+0], {[[PROXY0]], [[PROXY1]], [[PROXY2]], [[PROXY3]]};
147 ; PTX-WITH-DAG: st.param.v4.b32 [func_retval0+0], {[[LD0]], [[LD1]], [[LD2]], [[LD3]]};
148
149 %ret = call <4 x i32> @callee_vec_i32()
150 ret <4 x i32> %ret
151 }
152
153 declare <2 x half> @callee_vec_f16()
154 define <2 x half> @check_vec_f16() {
155 ; PTX-LABEL: check_vec_f16
156 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
157 ; PTX-DAG: ld.param.b32 [[LD:%hh[0-9]+]], [retval0+0];
158 ; PTX-DAG: } // callseq {{[0-9]+}}
159
160 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%hh[0-9]+]], [[LD]];
161 ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
162 ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
163
164 %ret = call <2 x half> @callee_vec_f16()
165 ret <2 x half> %ret
166 }
167
168 declare <2 x double> @callee_vec_f64()
169 define <2 x double> @check_vec_f64() {
170 ; PTX-LABEL: check_vec_f64
171 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
172 ; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0+0];
173 ; PTX-DAG: } // callseq {{[0-9]+}}
174
175 ; PTX-WITHOUT-DAG: mov.f64 [[PROXY0:%fd[0-9]+]], [[LD0]];
176 ; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]];
177 ; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]};
178 ; PTX-WITH-DAG: st.param.v2.f64 [func_retval0+0], {[[LD0]], [[LD1]]};
179
180 %ret = call <2 x double> @callee_vec_f64()
181 ret <2 x double> %ret
182 }
+0
-10
test/CodeGen/NVPTX/zero-cs.ll less more
None ; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
1 ; used to seqfault and now fails with a "Cannot select"
2
3 ; CHECK: LLVM ERROR: Cannot select: {{t7|0x[0-9a-f]+}}: i32 = ExternalSymbol'__powidf2'
4 define double @powi() {
5 %1 = call double @llvm.powi.f64(double 1.000000e+00, i32 undef)
6 ret double %1
7 }
8
9 declare double @llvm.powi.f64(double, i32) nounwind readnone