59#define DEBUG_TYPE "openmp-ir-builder"
66 cl::desc(
"Use optimistic attributes describing "
67 "'as-if' properties of runtime calls."),
71 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
72 cl::desc(
"Factor for the unroll threshold to account for code "
73 "simplifications still taking place"),
84 if (!IP1.isSet() || !IP2.isSet())
86 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
91 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
92 case OMPScheduleType::UnorderedStaticChunked:
93 case OMPScheduleType::UnorderedStatic:
94 case OMPScheduleType::UnorderedDynamicChunked:
95 case OMPScheduleType::UnorderedGuidedChunked:
96 case OMPScheduleType::UnorderedRuntime:
97 case OMPScheduleType::UnorderedAuto:
98 case OMPScheduleType::UnorderedTrapezoidal:
99 case OMPScheduleType::UnorderedGreedy:
100 case OMPScheduleType::UnorderedBalanced:
101 case OMPScheduleType::UnorderedGuidedIterativeChunked:
102 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
103 case OMPScheduleType::UnorderedSteal:
104 case OMPScheduleType::UnorderedStaticBalancedChunked:
105 case OMPScheduleType::UnorderedGuidedSimd:
106 case OMPScheduleType::UnorderedRuntimeSimd:
107 case OMPScheduleType::OrderedStaticChunked:
108 case OMPScheduleType::OrderedStatic:
109 case OMPScheduleType::OrderedDynamicChunked:
110 case OMPScheduleType::OrderedGuidedChunked:
111 case OMPScheduleType::OrderedRuntime:
112 case OMPScheduleType::OrderedAuto:
113 case OMPScheduleType::OrderdTrapezoidal:
114 case OMPScheduleType::NomergeUnorderedStaticChunked:
115 case OMPScheduleType::NomergeUnorderedStatic:
116 case OMPScheduleType::NomergeUnorderedDynamicChunked:
117 case OMPScheduleType::NomergeUnorderedGuidedChunked:
118 case OMPScheduleType::NomergeUnorderedRuntime:
119 case OMPScheduleType::NomergeUnorderedAuto:
120 case OMPScheduleType::NomergeUnorderedTrapezoidal:
121 case OMPScheduleType::NomergeUnorderedGreedy:
122 case OMPScheduleType::NomergeUnorderedBalanced:
123 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
124 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
125 case OMPScheduleType::NomergeUnorderedSteal:
126 case OMPScheduleType::NomergeOrderedStaticChunked:
127 case OMPScheduleType::NomergeOrderedStatic:
128 case OMPScheduleType::NomergeOrderedDynamicChunked:
129 case OMPScheduleType::NomergeOrderedGuidedChunked:
130 case OMPScheduleType::NomergeOrderedRuntime:
131 case OMPScheduleType::NomergeOrderedAuto:
132 case OMPScheduleType::NomergeOrderedTrapezoidal:
140 SchedType & OMPScheduleType::MonotonicityMask;
141 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
152 if (Features.
count(
"+wavefrontsize64"))
153 return omp::getAMDGPUGridValues<64>();
154 return omp::getAMDGPUGridValues<32>();
165 bool HasSimdModifier) {
167 switch (ClauseKind) {
168 case OMP_SCHEDULE_Default:
169 case OMP_SCHEDULE_Static:
170 return HasChunks ? OMPScheduleType::BaseStaticChunked
171 : OMPScheduleType::BaseStatic;
172 case OMP_SCHEDULE_Dynamic:
173 return OMPScheduleType::BaseDynamicChunked;
174 case OMP_SCHEDULE_Guided:
175 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
176 : OMPScheduleType::BaseGuidedChunked;
177 case OMP_SCHEDULE_Auto:
179 case OMP_SCHEDULE_Runtime:
180 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
181 : OMPScheduleType::BaseRuntime;
189 bool HasOrderedClause) {
190 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
191 OMPScheduleType::None &&
192 "Must not have ordering nor monotonicity flags already set");
195 ? OMPScheduleType::ModifierOrdered
196 : OMPScheduleType::ModifierUnordered;
197 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
200 if (OrderingScheduleType ==
201 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
202 return OMPScheduleType::OrderedGuidedChunked;
203 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
204 OMPScheduleType::ModifierOrdered))
205 return OMPScheduleType::OrderedRuntime;
207 return OrderingScheduleType;
213 bool HasSimdModifier,
bool HasMonotonic,
214 bool HasNonmonotonic,
bool HasOrderedClause) {
215 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
216 OMPScheduleType::None &&
217 "Must not have monotonicity flags already set");
218 assert((!HasMonotonic || !HasNonmonotonic) &&
219 "Monotonic and Nonmonotonic are contradicting each other");
222 return ScheduleType | OMPScheduleType::ModifierMonotonic;
223 }
else if (HasNonmonotonic) {
224 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
234 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
235 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
241 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
249 bool HasSimdModifier,
bool HasMonotonicModifier,
250 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
256 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
257 HasNonmonotonicModifier, HasOrderedClause);
271 auto *Br = cast<BranchInst>(Term);
272 assert(!Br->isConditional() &&
273 "BB's terminator must be an unconditional branch (or degenerate)");
276 Br->setSuccessor(0,
Target);
281 NewBr->setDebugLoc(
DL);
286 assert(New->getFirstInsertionPt() == New->begin() &&
287 "Target BB must not have PHI nodes");
291 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
319 New->replaceSuccessorsPhiUsesWith(Old, New);
362 std::stack<Instruction *> &ToBeDeleted,
364 const Twine &
Name =
"",
bool AsPtr =
true) {
369 ToBeDeleted.push(FakeValAddr);
372 FakeVal = FakeValAddr;
376 ToBeDeleted.push(FakeVal);
389 ToBeDeleted.push(UseFakeVal);
400enum OpenMPOffloadingRequiresDirFlags {
402 OMP_REQ_UNDEFINED = 0x000,
404 OMP_REQ_NONE = 0x001,
406 OMP_REQ_REVERSE_OFFLOAD = 0x002,
408 OMP_REQ_UNIFIED_ADDRESS = 0x004,
410 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
412 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
419 : RequiresFlags(OMP_REQ_UNDEFINED) {}
422 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
423 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
424 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
425 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
426 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
427 RequiresFlags(OMP_REQ_UNDEFINED) {
428 if (HasRequiresReverseOffload)
429 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
430 if (HasRequiresUnifiedAddress)
431 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
432 if (HasRequiresUnifiedSharedMemory)
433 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
434 if (HasRequiresDynamicAllocators)
435 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
439 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
443 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
447 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
451 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
456 :
static_cast<int64_t
>(OMP_REQ_NONE);
461 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
463 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
468 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
470 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
475 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
477 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
482 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
484 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
502 Value *NumThreads3D =
525 auto FnAttrs = Attrs.getFnAttrs();
526 auto RetAttrs = Attrs.getRetAttrs();
528 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
533 bool Param =
true) ->
void {
534 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
535 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
536 if (HasSignExt || HasZeroExt) {
537 assert(AS.getNumAttributes() == 1 &&
538 "Currently not handling extension attr combined with others.");
540 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
543 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
550#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
551#include "llvm/Frontend/OpenMP/OMPKinds.def"
555#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
557 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
558 addAttrSet(RetAttrs, RetAttrSet, false); \
559 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
560 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
561 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
563#include "llvm/Frontend/OpenMP/OMPKinds.def"
577#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
579 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
581 Fn = M.getFunction(Str); \
583#include "llvm/Frontend/OpenMP/OMPKinds.def"
589#define OMP_RTL(Enum, Str, ...) \
591 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
593#include "llvm/Frontend/OpenMP/OMPKinds.def"
597 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
607 LLVMContext::MD_callback,
609 2, {-1, -1},
true)}));
622 assert(Fn &&
"Failed to create OpenMP runtime function");
629 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
630 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
645 for (
auto Inst =
Block->getReverseIterator()->begin();
646 Inst !=
Block->getReverseIterator()->end();) {
647 if (
auto *
AllocaInst = dyn_cast_if_present<llvm::AllocaInst>(Inst)) {
671 ParallelRegionBlockSet.
clear();
673 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
692 ".omp_par", ArgsInZeroAddressSpace);
696 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
698 "Expected OpenMP outlining to be possible!");
700 for (
auto *V : OI.ExcludeArgsFromAggregate)
707 if (TargetCpuAttr.isStringAttribute())
710 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
711 if (TargetFeaturesAttr.isStringAttribute())
712 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
715 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
717 "OpenMP outlined functions should not return a value!");
729 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
736 "Expected instructions to add in the outlined region entry");
743 if (
I.isTerminator())
746 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
749 OI.EntryBB->moveBefore(&ArtificialEntry);
756 if (OI.PostOutlineCB)
757 OI.PostOutlineCB(*OutlinedFn);
788 errs() <<
"Error of kind: " << Kind
789 <<
" when emitting offload entries and metadata during "
790 "OMPIRBuilder finalization \n";
815 unsigned Reserve2Flags) {
817 LocFlags |= OMP_IDENT_FLAG_KMPC;
825 ConstantInt::get(
Int32, Reserve2Flags),
826 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
833 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
834 if (
GV.getInitializer() == Initializer)
839 M, OpenMPIRBuilder::Ident,
854 SrcLocStrSize = LocStr.
size();
863 if (
GV.isConstant() &&
GV.hasInitializer() &&
864 GV.getInitializer() == Initializer)
875 unsigned Line,
unsigned Column,
881 Buffer.
append(FunctionName);
883 Buffer.
append(std::to_string(Line));
885 Buffer.
append(std::to_string(Column));
893 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
904 if (
DIFile *DIF = DIL->getFile())
905 if (std::optional<StringRef> Source = DIF->getSource())
911 DIL->getColumn(), SrcLocStrSize);
923 "omp_global_thread_num");
928 bool ForceSimpleCall,
bool CheckCancelFlag) {
936 bool ForceSimpleCall,
bool CheckCancelFlag) {
943 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
946 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
949 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
952 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
955 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
968 bool UseCancelBarrier =
973 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
974 : OMPRTL___kmpc_barrier),
977 if (UseCancelBarrier && CheckCancelFlag)
986 omp::Directive CanceledDirective) {
998 Value *CancelKind =
nullptr;
999 switch (CanceledDirective) {
1000#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1001 case DirectiveEnum: \
1002 CancelKind = Builder.getInt32(Value); \
1004#include "llvm/Frontend/OpenMP/OMPKinds.def"
1015 auto ExitCB = [
this, CanceledDirective, Loc](
InsertPointTy IP) {
1016 if (CanceledDirective == OMPD_parallel) {
1020 omp::Directive::OMPD_unknown,
false,
1030 UI->eraseFromParent();
1043 auto *KernelArgsPtr =
1056 NumThreads, HostPtr, KernelArgsPtr};
1084 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1088 Value *Return =
nullptr;
1108 Args.NumTeams, Args.NumThreads,
1109 OutlinedFnID, ArgsVector));
1122 emitBlock(OffloadContBlock, CurFn,
true);
1127 omp::Directive CanceledDirective,
1130 "Unexpected cancellation!");
1180 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1183 "Expected at least tid and bounded tid as arguments");
1184 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1187 assert(CI &&
"Expected call instruction to outlined function");
1191 Type *PtrTy = OMPIRBuilder->VoidPtr;
1195 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.
saveIP();
1199 Value *Args = ArgsAlloca;
1207 for (
unsigned Idx = 0;
Idx < NumCapturedVars;
Idx++) {
1219 Value *Parallel51CallArgs[] = {
1223 NumThreads ? NumThreads : Builder.
getInt32(-1),
1226 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
1229 Builder.
getInt64(NumCapturedVars)};
1234 Builder.
CreateCall(RTLFn, Parallel51CallArgs);
1249 I->eraseFromParent();
1271 if (
auto *
F = dyn_cast<Function>(RTLFn.
getCallee())) {
1272 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1280 F->addMetadata(LLVMContext::MD_callback,
1289 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1292 "Expected at least tid and bounded tid as arguments");
1293 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1300 Value *ForkCallArgs[] = {
1301 Ident, Builder.
getInt32(NumCapturedVars),
1302 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
1305 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1314 auto PtrTy = OMPIRBuilder->VoidPtr;
1315 if (IfCondition && NumCapturedVars == 0) {
1319 if (IfCondition && RealArgs.
back()->getType() != PtrTy)
1337 I->eraseFromParent();
1345 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1372 if (ProcBind != OMP_PROC_BIND_default) {
1376 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1403 TIDAddrAlloca, PointerType ::get(
M.
getContext(), 0),
"tid.addr.ascast");
1408 "zero.addr.ascast");
1432 if (IP.getBlock()->end() == IP.getPoint()) {
1438 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1439 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1440 "Unexpected insertion point for finalization call!");
1476 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1479 assert(BodyGenCB &&
"Expected body generation callback!");
1481 BodyGenCB(InnerAllocaIP, CodeGenIP);
1483 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1489 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1491 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1492 ThreadID, ToBeDeletedVec);
1497 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1499 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1516 PRegOutlinedExitBB->
setName(
"omp.par.outlined.exit");
1517 Blocks.push_back(PRegOutlinedExitBB);
1528 ".omp_par", ArgsInZeroAddressSpace);
1533 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1536 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1541 auto PrivHelper = [&](
Value &V) {
1542 if (&V == TIDAddr || &V == ZeroAddr) {
1548 for (
Use &U : V.uses())
1549 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1550 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1560 if (!V.getType()->isPointerTy()) {
1579 Value *ReplacementValue =
nullptr;
1580 CallInst *CI = dyn_cast<CallInst>(&V);
1582 ReplacementValue = PrivTID;
1585 PrivCB(InnerAllocaIP,
Builder.
saveIP(), V, *Inner, ReplacementValue));
1586 assert(ReplacementValue &&
1587 "Expected copy/create callback to set replacement value!");
1588 if (ReplacementValue == &V)
1593 UPtr->set(ReplacementValue);
1610 for (
Value *Input : Inputs) {
1615 for (
Value *Output : Outputs)
1619 "OpenMP outlining should not produce live-out values!");
1621 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1624 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1632 assert(FiniInfo.DK == OMPD_parallel &&
1633 "Unexpected finalization stack state!");
1643 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1644 UI->eraseFromParent();
1737 BodyGenCB(TaskAllocaIP, TaskBodyIP);
1745 std::stack<Instruction *> ToBeDeleted;
1747 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1749 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1750 TaskAllocaBB, ToBeDeleted](
Function &OutlinedFn)
mutable {
1752 assert(OutlinedFn.getNumUses() == 1 &&
1753 "there must be a single user for the outlined function");
1754 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1758 bool HasShareds = StaleCI->
arg_size() > 1;
1797 assert(ArgStructAlloca &&
1798 "Unable to find the alloca instruction corresponding to arguments "
1799 "for extracted function");
1802 assert(ArgStructType &&
"Unable to find struct type corresponding to "
1803 "arguments for extracted function");
1811 TaskAllocFn, {Ident, ThreadID,
Flags,
1812 TaskSize, SharedsSize,
1824 Value *DepArray =
nullptr;
1825 if (Dependencies.
size()) {
1840 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1847 static_cast<unsigned int>(RTLDependInfoFields::Len));
1854 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1857 static_cast<unsigned int>(Dep.DepKind)),
1888 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
1894 if (Dependencies.
size()) {
1918 if (Dependencies.
size()) {
1939 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
1942 while (!ToBeDeleted.empty()) {
1943 ToBeDeleted.top()->eraseFromParent();
1993 if (IP.getBlock()->end() != IP.getPoint())
2004 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
2005 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2006 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2037 unsigned CaseNumber = 0;
2038 for (
auto SectionCB : SectionCBs) {
2054 Value *LB = ConstantInt::get(I32Ty, 0);
2055 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2056 Value *ST = ConstantInt::get(I32Ty, 1);
2058 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2060 applyStaticWorkshareLoop(Loc.
DL,
LoopInfo, AllocaIP, !IsNowait);
2064 assert(FiniInfo.DK == OMPD_sections &&
2065 "Unexpected finalization stack state!");
2071 AfterIP = {FiniBB, FiniBB->
begin()};
2085 if (IP.getBlock()->end() != IP.getPoint())
2104 Directive OMPD = Directive::OMPD_sections;
2107 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2120 M.getDataLayout().getDefaultGlobalsAddressSpace(),
2121 ".omp.reduction.func", &M);
2132 assert(RI.Variable &&
"expected non-null variable");
2133 assert(RI.PrivateVariable &&
"expected non-null private variable");
2134 assert(RI.ReductionGen &&
"expected non-null reduction generator callback");
2135 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
2136 "expected variables and their private equivalents to have the same "
2138 assert(RI.Variable->getType()->isPointerTy() &&
2139 "expected variables to be pointers");
2152 unsigned NumReductions = ReductionInfos.
size();
2159 for (
auto En :
enumerate(ReductionInfos)) {
2160 unsigned Index = En.index();
2173 bool CanGenerateAtomic =
2179 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
2184 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
2187 Value *Lock = getOMPCriticalRegionLock(
".reduction");
2189 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
2190 : RuntimeFunction::OMPRTL___kmpc_reduce);
2193 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
2194 ReductionFunc, Lock},
2213 for (
auto En :
enumerate(ReductionInfos)) {
2218 Value *RedValue =
nullptr;
2219 if (!IsByRef[En.index()]) {
2221 "red.value." +
Twine(En.index()));
2223 Value *PrivateRedValue =
2225 "red.private.value." +
Twine(En.index()));
2227 if (IsByRef[En.index()]) {
2229 PrivateRedValue, Reduced));
2232 PrivateRedValue, Reduced));
2237 if (!IsByRef[En.index()])
2241 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
2242 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
2250 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
2271 for (
auto En :
enumerate(ReductionInfos)) {
2274 RedArrayTy, LHSArrayPtr, 0, En.index());
2279 RedArrayTy, RHSArrayPtr, 0, En.index());
2289 if (!IsByRef[En.index()])
2306 Directive OMPD = Directive::OMPD_master;
2311 Value *Args[] = {Ident, ThreadId};
2319 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2330 Directive OMPD = Directive::OMPD_masked;
2336 Value *ArgsEnd[] = {Ident, ThreadId};
2344 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2379 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
2392 "omp_" +
Name +
".next",
true);
2403 CL->Header = Header;
2422 NextBB, NextBB,
Name);
2446 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
2456 auto *IndVarTy = cast<IntegerType>(Start->getType());
2457 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
2458 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
2464 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
2492 Value *CountIfLooping;
2493 if (InclusiveStop) {
2503 "omp_" +
Name +
".tripcount");
2524 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
2527 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
2533 InsertPointTy AllocaIP,
2534 bool NeedsBarrier) {
2535 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
2537 "Require dedicated allocate IP");
2549 Type *IVTy =
IV->getType();
2568 Constant *One = ConstantInt::get(IVTy, 1);
2576 Constant *SchedulingType = ConstantInt::get(
2577 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStatic));
2582 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
2583 PUpperBound, PStride, One,
Zero});
2588 CLI->setTripCount(TripCount);
2609 omp::Directive::OMPD_for,
false,
2620 bool NeedsBarrier,
Value *ChunkSize) {
2621 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
2622 assert(ChunkSize &&
"Chunk size is required");
2627 Type *IVTy =
IV->getType();
2629 "Max supported tripcount bitwidth is 64 bits");
2631 :
Type::getInt64Ty(Ctx);
2634 Constant *One = ConstantInt::get(InternalIVTy, 1);
2646 Value *PLowerBound =
2648 Value *PUpperBound =
2657 Value *CastedChunkSize =
2659 Value *CastedTripCount =
2662 Constant *SchedulingType = ConstantInt::get(
2663 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
2677 SchedulingType, PLastIter,
2678 PLowerBound, PUpperBound,
2683 Value *FirstChunkStart =
2685 Value *FirstChunkStop =
2690 Value *NextChunkStride =
2695 Value *DispatchCounter;
2699 FirstChunkStart, CastedTripCount, NextChunkStride,
2723 Value *IsLastChunk =
2725 Value *CountUntilOrigTripCount =
2728 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
2729 Value *BackcastedChunkTC =
2731 CLI->setTripCount(BackcastedChunkTC);
2736 Value *BackcastedDispatchCounter =
2770 case WorksharingLoopType::ForStaticLoop:
2773 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
2776 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
2778 case WorksharingLoopType::DistributeStaticLoop:
2781 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
2784 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
2786 case WorksharingLoopType::DistributeForStaticLoop:
2789 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
2792 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
2795 if (Bitwidth != 32 && Bitwidth != 64) {
2817 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
2818 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
2823 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
2824 Builder.
restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
2829 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
2830 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
2831 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
2867 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
2875 "Expected unique undroppable user of outlined function");
2876 CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
2877 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
2879 "Expected outlined function call to be located in loop preheader");
2881 if (OutlinedFnCallInstruction->
arg_size() > 1)
2888 LoopBodyArg, ParallelTaskPtr, TripCount,
2891 for (
auto &ToBeDeletedItem : ToBeDeleted)
2892 ToBeDeletedItem->eraseFromParent();
2898 InsertPointTy AllocaIP,
2911 OI.OuterAllocaBB = AllocaIP.getBlock();
2916 "omp.prelatch",
true);
2936 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
2938 ParallelRegionBlockSet.
end());
2958 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
2967 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
2968 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
2974 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
2981 OI.PostOutlineCB = [=, ToBeDeletedVec =
2982 std::move(ToBeDeleted)](
Function &OutlinedFn) {
2984 ToBeDeletedVec, LoopType);
2992 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
2993 bool HasSimdModifier,
bool HasMonotonicModifier,
2994 bool HasNonmonotonicModifier,
bool HasOrderedClause,
2997 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
2999 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
3000 HasNonmonotonicModifier, HasOrderedClause);
3002 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
3003 OMPScheduleType::ModifierOrdered;
3004 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
3005 case OMPScheduleType::BaseStatic:
3006 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
3008 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
3009 NeedsBarrier, ChunkSize);
3011 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier);
3013 case OMPScheduleType::BaseStaticChunked:
3015 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
3016 NeedsBarrier, ChunkSize);
3018 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
3021 case OMPScheduleType::BaseRuntime:
3022 case OMPScheduleType::BaseAuto:
3023 case OMPScheduleType::BaseGreedy:
3024 case OMPScheduleType::BaseBalanced:
3025 case OMPScheduleType::BaseSteal:
3026 case OMPScheduleType::BaseGuidedSimd:
3027 case OMPScheduleType::BaseRuntimeSimd:
3029 "schedule type does not support user-defined chunk sizes");
3031 case OMPScheduleType::BaseDynamicChunked:
3032 case OMPScheduleType::BaseGuidedChunked:
3033 case OMPScheduleType::BaseGuidedIterativeChunked:
3034 case OMPScheduleType::BaseGuidedAnalyticalChunked:
3035 case OMPScheduleType::BaseStaticBalancedChunked:
3036 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
3037 NeedsBarrier, ChunkSize);
3053 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
3056 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
3069 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
3072 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
3084 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
3087 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
3094 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
3096 "Require dedicated allocate IP");
3098 "Require valid schedule type");
3100 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
3101 OMPScheduleType::ModifierOrdered;
3112 Type *IVTy =
IV->getType();
3130 Constant *One = ConstantInt::get(IVTy, 1);
3151 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
3155 {SrcLoc, ThreadNum, SchedulingType, One,
3156 UpperBound, One, Chunk});
3166 PLowerBound, PUpperBound, PStride});
3167 Constant *Zero32 = ConstantInt::get(I32Type, 0);
3176 auto *PI = cast<PHINode>(Phi);
3177 PI->setIncomingBlock(0, OuterCond);
3178 PI->setIncomingValue(0, LowerBound);
3182 auto *Br = cast<BranchInst>(Term);
3183 Br->setSuccessor(0, OuterCond);
3191 auto *CI = cast<CmpInst>(Comp);
3192 CI->setOperand(1, UpperBound);
3195 auto *BI = cast<BranchInst>(Branch);
3196 assert(BI->getSuccessor(1) == Exit);
3197 BI->setSuccessor(1, OuterCond);
3210 omp::Directive::OMPD_for,
false,
3230 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
3231 for (
Use &U : BB->uses()) {
3232 auto *UseInst = dyn_cast<Instruction>(U.getUser());
3235 if (BBsToErase.count(UseInst->getParent()))
3243 bool Changed =
false;
3245 if (HasRemainingUses(BB)) {
3246 BBsToErase.erase(BB);
3261 assert(
Loops.size() >= 1 &&
"At least one loop required");
3262 size_t NumLoops =
Loops.size();
3266 return Loops.front();
3278 Loop->collectControlBlocks(OldControlBBs);
3282 if (ComputeIP.
isSet())
3289 Value *CollapsedTripCount =
nullptr;
3292 "All loops to collapse must be valid canonical loops");
3293 Value *OrigTripCount = L->getTripCount();
3294 if (!CollapsedTripCount) {
3295 CollapsedTripCount = OrigTripCount;
3307 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
3315 Value *Leftover = Result->getIndVar();
3317 NewIndVars.
resize(NumLoops);
3318 for (
int i = NumLoops - 1; i >= 1; --i) {
3319 Value *OrigTripCount =
Loops[i]->getTripCount();
3322 NewIndVars[i] = NewIndVar;
3327 NewIndVars[0] = Leftover;
3336 BasicBlock *ContinueBlock = Result->getBody();
3338 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
3345 ContinueBlock =
nullptr;
3346 ContinuePred = NextSrc;
3353 for (
size_t i = 0; i < NumLoops - 1; ++i)
3354 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
3360 for (
size_t i = NumLoops - 1; i > 0; --i)
3361 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
3364 ContinueWith(Result->getLatch(),
nullptr);
3371 for (
size_t i = 0; i < NumLoops; ++i)
3372 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
3386std::vector<CanonicalLoopInfo *>
3390 "Must pass as many tile sizes as there are loops");
3391 int NumLoops =
Loops.size();
3392 assert(NumLoops >= 1 &&
"At least one loop to tile required");
3404 Loop->collectControlBlocks(OldControlBBs);
3412 assert(L->isValid() &&
"All input loops must be valid canonical loops");
3413 OrigTripCounts.
push_back(L->getTripCount());
3424 for (
int i = 0; i < NumLoops - 1; ++i) {
3437 for (
int i = 0; i < NumLoops; ++i) {
3439 Value *OrigTripCount = OrigTripCounts[i];
3452 Value *FloorTripOverflow =
3458 "omp_floor" +
Twine(i) +
".tripcount",
true);
3466 std::vector<CanonicalLoopInfo *> Result;
3467 Result.reserve(NumLoops * 2);
3480 auto EmbeddNewLoop =
3481 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
3484 DL, TripCount,
F, InnerEnter, OutroInsertBefore,
Name);
3489 Enter = EmbeddedLoop->
getBody();
3491 OutroInsertBefore = EmbeddedLoop->
getLatch();
3492 return EmbeddedLoop;
3496 const Twine &NameBase) {
3499 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
3500 Result.push_back(EmbeddedLoop);
3504 EmbeddNewLoops(FloorCount,
"floor");
3510 for (
int i = 0; i < NumLoops; ++i) {
3514 Value *FloorIsEpilogue =
3516 Value *TileTripCount =
3523 EmbeddNewLoops(TileCounts,
"tile");
3528 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
3537 BodyEnter =
nullptr;
3538 BodyEntered = ExitBB;
3551 for (
int i = 0; i < NumLoops; ++i) {
3554 Value *OrigIndVar = OrigIndVars[i];
3582 if (Properties.
empty())
3605 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
3609 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
3617 if (
I.mayReadOrWriteMemory()) {
3621 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
3643 const Twine &NamePrefix) {
3649 SplitBefore = dyn_cast<Instruction>(IfCond);
3695 VMap[
Block] = NewBB;
3705 if (TargetTriple.
isX86()) {
3706 if (Features.
lookup(
"avx512f"))
3708 else if (Features.
lookup(
"avx"))
3712 if (TargetTriple.
isPPC())
3714 if (TargetTriple.
isWasm())
3721 Value *IfCond, OrderKind Order,
3740 if (AlignedVars.
size()) {
3743 for (
auto &AlignedItem : AlignedVars) {
3744 Value *AlignedPtr = AlignedItem.first;
3745 Value *Alignment = AlignedItem.second;
3747 AlignedPtr, Alignment);
3754 createIfVersion(CanonicalLoop, IfCond, VMap,
"simd");
3758 "Cannot find value which corresponds to original loop latch");
3759 assert(isa<BasicBlock>(MappedLatch) &&
3760 "Cannot cast mapped latch block value to BasicBlock");
3761 BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch);
3790 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
3798 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
3806 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
3808 if (Simdlen || Safelen) {
3812 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
3838static std::unique_ptr<TargetMachine>
3842 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
3843 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
3844 const std::string &
Triple = M->getTargetTriple();
3854 std::nullopt, OptLevel));
3878 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
3893 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
3898 nullptr, ORE,
static_cast<int>(OptLevel),
3919 <<
" Threshold=" << UP.
Threshold <<
"\n"
3922 <<
" PartialOptSizeThreshold="
3941 if (
auto *Load = dyn_cast<LoadInst>(&
I)) {
3942 Ptr = Load->getPointerOperand();
3943 }
else if (
auto *Store = dyn_cast<StoreInst>(&
I)) {
3944 Ptr = Store->getPointerOperand();
3948 Ptr =
Ptr->stripPointerCasts();
3950 if (
auto *Alloca = dyn_cast<AllocaInst>(
Ptr)) {
3951 if (Alloca->getParent() == &
F->getEntryBlock())
3971 int MaxTripCount = 0;
3972 bool MaxOrZero =
false;
3973 unsigned TripMultiple = 0;
3975 bool UseUpperBound =
false;
3977 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
3979 unsigned Factor = UP.
Count;
3980 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
3991 assert(Factor >= 0 &&
"Unroll factor must not be negative");
4007 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
4020 *UnrolledCLI =
Loop;
4025 "unrolling only makes sense with a factor of 2 or larger");
4027 Type *IndVarTy =
Loop->getIndVarType();
4034 std::vector<CanonicalLoopInfo *>
LoopNest =
4049 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
4052 (*UnrolledCLI)->assertOK();
4070 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
4089 if (!CPVars.
empty()) {
4094 Directive OMPD = Directive::OMPD_single;
4099 Value *Args[] = {Ident, ThreadId};
4125 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
4130 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
4133 ConstantInt::get(
Int64, 0), CPVars[
I],
4136 }
else if (!IsNowait)
4138 omp::Directive::OMPD_unknown,
false,
4150 Directive OMPD = Directive::OMPD_critical;
4155 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
4156 Value *Args[] = {Ident, ThreadId, LockVar};
4173 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4181 const Twine &
Name,
bool IsDependSource) {
4184 [](
Value *SV) {
return SV->
getType()->isIntegerTy(64); }) &&
4185 "OpenMP runtime requires depend vec with i64 type");
4198 for (
unsigned I = 0;
I < NumLoops; ++
I) {
4212 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
4230 Directive OMPD = Directive::OMPD_ordered;
4239 Value *Args[] = {Ident, ThreadId};
4249 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4255 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
4256 bool HasFinalize,
bool IsCancellable) {
4265 if (!isa_and_nonnull<BranchInst>(SplitPos))
4272 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
4282 "Unexpected control flow graph state!!");
4283 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
4285 "Unexpected Control Flow State!");
4291 "Unexpected Insertion point location!");
4294 auto InsertBB = merged ? ExitPredBB : ExitBB;
4295 if (!isa_and_nonnull<BranchInst>(SplitPos))
4305 if (!Conditional || !EntryCall)
4325 UI->eraseFromParent();
4333 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
4341 "Unexpected finalization stack state!");
4344 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
4394 if (isa_and_nonnull<BranchInst>(OMP_Entry->
getTerminator())) {
4396 "copyin.not.master.end");
4451 Value *DependenceAddress,
bool HaveNowaitClause) {
4459 if (Device ==
nullptr)
4460 Device = ConstantInt::get(
Int32, -1);
4461 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
4462 if (NumDependences ==
nullptr) {
4463 NumDependences = ConstantInt::get(
Int32, 0);
4467 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
4469 Ident, ThreadId, InteropVar, InteropTypeVal,
4470 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
4479 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
4487 if (Device ==
nullptr)
4488 Device = ConstantInt::get(
Int32, -1);
4489 if (NumDependences ==
nullptr) {
4490 NumDependences = ConstantInt::get(
Int32, 0);
4494 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
4496 Ident, ThreadId, InteropVar, Device,
4497 NumDependences, DependenceAddress, HaveNowaitClauseVal};
4506 Value *NumDependences,
4507 Value *DependenceAddress,
4508 bool HaveNowaitClause) {
4515 if (Device ==
nullptr)
4516 Device = ConstantInt::get(
Int32, -1);
4517 if (NumDependences ==
nullptr) {
4518 NumDependences = ConstantInt::get(
Int32, 0);
4522 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
4524 Ident, ThreadId, InteropVar, Device,
4525 NumDependences, DependenceAddress, HaveNowaitClauseVal};
4554 int32_t MinThreadsVal, int32_t MaxThreadsVal,
4555 int32_t MinTeamsVal, int32_t MaxTeamsVal) {
4572 if (MinTeamsVal > 1 || MaxTeamsVal > 0)
4576 if (MaxThreadsVal < 0)
4577 MaxThreadsVal = std::max(
4580 if (MaxThreadsVal > 0)
4592 const std::string DebugPrefix =
"_debug__";
4594 KernelName = KernelName.
drop_back(DebugPrefix.length());
4597 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
4600 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
4601 Constant *DynamicEnvironmentInitializer =
4605 DynamicEnvironmentInitializer, DynamicEnvironmentName,
4607 DL.getDefaultGlobalsAddressSpace());
4611 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
4612 ? DynamicEnvironmentGV
4614 DynamicEnvironmentPtr);
4617 ConfigurationEnvironment, {
4618 UseGenericStateMachineVal,
4619 MayUseNestedParallelismVal,
4626 ReductionBufferLength,
4629 KernelEnvironment, {
4630 ConfigurationEnvironmentInitializer,
4634 Twine KernelEnvironmentName = KernelName +
"_kernel_environment";
4637 KernelEnvironmentInitializer, KernelEnvironmentName,
4639 DL.getDefaultGlobalsAddressSpace());
4643 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
4644 ? KernelEnvironmentGV
4646 KernelEnvironmentPtr);
4652 ThreadKind, ConstantInt::get(ThreadKind->
getType(), -1),
4675 UI->eraseFromParent();
4683 int32_t TeamsReductionDataSize,
4684 int32_t TeamsReductionBufferLength) {
4689 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
4693 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
4699 const std::string DebugPrefix =
"_debug__";
4701 KernelName = KernelName.
drop_back(DebugPrefix.length());
4702 auto *KernelEnvironmentGV =
4704 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
4705 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
4707 KernelEnvironmentInitializer,
4708 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
4710 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
4712 KernelEnvironmentGV->setInitializer(NewInitializer);
4717 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
4721 auto *KernelOp = dyn_cast<ConstantAsMetadata>(
Op->getOperand(0));
4722 if (!KernelOp || KernelOp->getValue() != &
Kernel)
4724 auto *Prop = dyn_cast<MDString>(
Op->getOperand(1));
4725 if (!Prop || Prop->getString() !=
Name)
4737 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->
getOperand(2));
4738 int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
4741 OldVal->getValue()->getType(),
4742 Min ? std::min(OldLimit,
Value) : std::max(OldLimit,
Value))));
4751 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
4756std::pair<int32_t, int32_t>
4758 int32_t ThreadLimit =
4763 if (!Attr.isValid() || !Attr.isStringAttribute())
4764 return {0, ThreadLimit};
4767 if (!llvm::to_integer(UBStr, UB, 10))
4768 return {0, ThreadLimit};
4769 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
4770 if (!llvm::to_integer(LBStr, LB, 10))
4776 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
4777 int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
4778 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
4780 return {0, ThreadLimit};
4790 llvm::utostr(LB) +
"," + llvm::utostr(UB));
4797std::pair<int32_t, int32_t>
4804 int32_t LB, int32_t UB) {
4809 Kernel.
addFnAttr(
"amdgpu-max-num-workgroups", llvm::utostr(LB) +
",1,1");
4814void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
4829 assert(OutlinedFn &&
"The outlined function must exist if embedded");
4838Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
4844 "Named kernel already exists?");
4859 ? GenerateFunctionCallback(EntryFnName)
4865 if (!IsOffloadEntry)
4868 std::string EntryFnIDName =
4870 ? std::string(EntryFnName)
4874 EntryFnName, EntryFnIDName);
4881 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
4882 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
4883 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
4885 EntryInfo, EntryAddr, OutlinedFnID,
4887 return OutlinedFnID;
4910 bool IsStandAlone = !BodyGenCB;
4935 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
4942 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
4947 omp::OMPRTL___tgt_target_data_begin_mapper);
4951 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
4952 if (isa<AllocaInst>(DeviceMap.second.second)) {
4989 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
5006 emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
5016 emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
5022 emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
5033 bool IsGPUDistribute) {
5034 assert((IVSize == 32 || IVSize == 64) &&
5035 "IV size is not compatible with the omp runtime");
5037 if (IsGPUDistribute)
5039 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
5040 : omp::OMPRTL___kmpc_distribute_static_init_4u)
5041 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
5042 : omp::OMPRTL___kmpc_distribute_static_init_8u);
5044 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
5045 : omp::OMPRTL___kmpc_for_static_init_4u)
5046 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
5047 : omp::OMPRTL___kmpc_for_static_init_8u);
5054 assert((IVSize == 32 || IVSize == 64) &&
5055 "IV size is not compatible with the omp runtime");
5057 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
5058 : omp::OMPRTL___kmpc_dispatch_init_4u)
5059 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
5060 : omp::OMPRTL___kmpc_dispatch_init_8u);
5067 assert((IVSize == 32 || IVSize == 64) &&
5068 "IV size is not compatible with the omp runtime");
5070 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
5071 : omp::OMPRTL___kmpc_dispatch_next_4u)
5072 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
5073 : omp::OMPRTL___kmpc_dispatch_next_8u);
5080 assert((IVSize == 32 || IVSize == 64) &&
5081 "IV size is not compatible with the omp runtime");
5083 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
5084 : omp::OMPRTL___kmpc_dispatch_fini_4u)
5085 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
5086 : omp::OMPRTL___kmpc_dispatch_fini_8u);
5094 if (
auto *Instr = dyn_cast<Instruction>(
User)) {
5095 if (Instr->getFunction() == Func) {
5097 ConstInst->
insertBefore(*Instr->getParent(), Instr->getIterator());
5098 Instr->replaceUsesOfWith(ConstExpr, ConstInst);
5107 if (
auto *Const = dyn_cast<Constant>(
User))
5108 if (
auto *ConstExpr = dyn_cast<ConstantExpr>(Const))
5126 for (
auto &Arg : Inputs)
5127 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
5131 for (
auto &Arg : Inputs)
5132 ParameterTypes.
push_back(Arg->getType());
5141 auto OldInsertPoint = Builder.
saveIP();
5169 auto AllocaIP = Builder.
saveIP();
5174 const auto &ArgRange =
5176 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
5180 for (
auto InArg :
zip(Inputs, ArgRange)) {
5181 Value *Input = std::get<0>(InArg);
5182 Argument &Arg = std::get<1>(InArg);
5183 Value *InputCopy =
nullptr;
5186 ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.
saveIP()));
5204 if (
auto *Instr = dyn_cast<Instruction>(
User))
5205 if (Instr->getFunction() == Func)
5206 Instr->replaceUsesOfWith(Input, InputCopy);
5223 [&OMPBuilder, &Builder, &Inputs, &CBFunc,
5224 &ArgAccessorFuncCB](
StringRef EntryFnName) {
5226 CBFunc, ArgAccessorFuncCB);
5230 OutlinedFn, OutlinedFnID);
5236 int32_t NumTeams, int32_t NumThreads,
5253 auto &&EmitTargetCallFallbackCB =
5274 bool HasNoWait =
false;
5277 NumTeamsVal, NumThreadsVal,
5278 DynCGGroupMem, HasNoWait);
5281 Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
5282 DeviceID, RTLoc, AllocaIP));
5300 OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
5303 NumThreads, Args, GenMapInfoCB);
5318 return OS.str().str();
5332 assert(Elem.second->getValueType() == Ty &&
5333 "OMP internal variable has different type than requested");
5349 GV->setAlignment(std::max(TypeAlign, PtrAlign));
5356Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
5357 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
5358 std::string
Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
5369 return SizePtrToInt;
5374 std::string VarName) {
5382 return MaptypesArrayGlobal;
5387 unsigned NumOperands,
5396 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
5400 ArrI64Ty,
nullptr,
".offload_sizes");
5411 int64_t DeviceID,
unsigned NumOperands) {
5417 Value *ArgsBaseGEP =
5419 {Builder.getInt32(0), Builder.getInt32(0)});
5422 {Builder.getInt32(0), Builder.getInt32(0)});
5423 Value *ArgSizesGEP =
5425 {Builder.getInt32(0), Builder.getInt32(0)});
5431 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
5439 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
5440 "expected region end call to runtime only when end call is separate");
5442 auto VoidPtrTy = UnqualPtrTy;
5443 auto VoidPtrPtrTy = UnqualPtrTy;
5445 auto Int64PtrTy = UnqualPtrTy;
5447 if (!
Info.NumberOfPtrs) {
5459 Info.RTArgs.BasePointersArray,
5470 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
5471 :
Info.RTArgs.MapTypesArray,
5486 if (!
Info.HasMapper)
5511 "struct.descriptor_dim");
5513 enum { OffsetFD = 0, CountFD, StrideFD };
5517 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
5520 if (NonContigInfo.
Dims[
I] == 1)
5527 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I]; II < EE; ++II) {
5528 unsigned RevIdx = EE - II - 1;
5531 {Builder.getInt64(0), Builder.getInt64(II)});
5535 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
5540 NonContigInfo.
Counts[L][RevIdx], CountLVal,
5545 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
5554 Info.RTArgs.PointersArray, 0,
I);
5568 Info.clearArrayInfo();
5571 if (
Info.NumberOfPtrs == 0)
5581 PointerArrayType,
nullptr,
".offload_baseptrs");
5584 PointerArrayType,
nullptr,
".offload_ptrs");
5586 PointerArrayType,
nullptr,
".offload_mappers");
5587 Info.RTArgs.MappersArray = MappersArray;
5594 ConstantInt::get(Int64Ty, 0));
5596 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
5597 if (
auto *CI = dyn_cast<Constant>(CombinedInfo.
Sizes[
I])) {
5598 if (!isa<ConstantExpr>(CI) && !isa<GlobalValue>(CI)) {
5599 if (IsNonContiguous &&
5600 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
5602 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
5610 RuntimeSizes.
set(
I);
5613 if (RuntimeSizes.
all()) {
5616 SizeArrayType,
nullptr,
".offload_sizes");
5622 auto *SizesArrayGbl =
5627 if (!RuntimeSizes.
any()) {
5628 Info.RTArgs.SizesArray = SizesArrayGbl;
5634 SizeArrayType,
nullptr,
".offload_sizes");
5639 SizesArrayGbl, OffloadSizeAlign,
5644 Info.RTArgs.SizesArray = Buffer;
5652 for (
auto mapFlag : CombinedInfo.
Types)
5654 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
5658 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
5663 auto *MapNamesArrayGbl =
5665 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
5667 Info.RTArgs.MapNamesArray =
5673 if (
Info.separateBeginEndCalls()) {
5674 bool EndMapTypesDiffer =
false;
5676 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
5677 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
5678 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
5679 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
5680 EndMapTypesDiffer =
true;
5683 if (EndMapTypesDiffer) {
5685 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
5690 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
5698 if (
Info.requiresDevicePointerInfo()) {
5705 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
5707 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
5709 DeviceAddrCB(
I, BP);
5721 if (RuntimeSizes.
test(
I)) {
5735 if (
Value *CustomMFunc = CustomMapperCB(
I))
5739 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
5745 Info.NumberOfPtrs == 0)
5790 if (
auto *CI = dyn_cast<ConstantInt>(
Cond)) {
5791 auto CondConstant = CI->getSExtValue();
5821bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
5825 "Unexpected Atomic Ordering.");
5889 assert(
X.Var->getType()->isPointerTy() &&
5890 "OMP Atomic expects a pointer to target memory");
5891 Type *XElemTy =
X.ElemTy;
5894 "OMP atomic read expected a scalar type");
5896 Value *XRead =
nullptr;
5902 XRead = cast<Value>(XLD);
5916 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
5928 assert(
X.Var->getType()->isPointerTy() &&
5929 "OMP Atomic expects a pointer to target memory");
5930 Type *XElemTy =
X.ElemTy;
5933 "OMP atomic write expected a scalar type");
5948 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
5961 Type *XTy =
X.Var->getType();
5963 "OMP Atomic expects a pointer to target memory");
5964 Type *XElemTy =
X.ElemTy;
5967 "OMP atomic update expected a scalar type");
5970 "OpenMP atomic does not support LT or GT operations");
5973 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
5974 X.IsVolatile, IsXBinopExpr);
5975 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
5980Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
6012std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
6015 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr) {
6018 bool emitRMWOp =
false;
6026 emitRMWOp = XElemTy;
6029 emitRMWOp = (IsXBinopExpr && XElemTy);
6036 std::pair<Value *, Value *> Res;
6043 Res.second = Res.first;
6045 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
6063 X->getName() +
".atomic.cont");
6067 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
6070 PHI->addIncoming(OldVal, CurBB);
6076 X->getName() +
".atomic.fltCast");
6079 X->getName() +
".atomic.ptrCast");
6090 Result->setVolatile(VolatileX);
6096 Res.first = OldExprVal;
6116 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr) {
6121 Type *XTy =
X.Var->getType();
6123 "OMP Atomic expects a pointer to target memory");
6124 Type *XElemTy =
X.ElemTy;
6127 "OMP atomic capture expected a scalar type");
6129 "OpenMP atomic does not support LT or GT operations");
6135 std::pair<Value *, Value *> Result =
6136 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
6137 X.IsVolatile, IsXBinopExpr);
6139 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
6142 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
6154 IsPostfixUpdate, IsFailOnly, Failure);
6166 assert(
X.Var->getType()->isPointerTy() &&
6167 "OMP atomic expects a pointer to target memory");
6170 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
6171 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
6176 if (
Op == OMPAtomicCompareOp::EQ) {
6195 "OldValue and V must be of same type");
6196 if (IsPostfixUpdate) {
6214 CurBBTI,
X.Var->getName() +
".atomic.exit");
6234 Value *CapturedValue =
6242 assert(R.Var->getType()->isPointerTy() &&
6243 "r.var must be of pointer type");
6244 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
6247 Value *ResultCast = R.IsSigned
6253 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
6254 "Op should be either max or min at this point");
6255 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
6295 Value *CapturedValue =
nullptr;
6296 if (IsPostfixUpdate) {
6297 CapturedValue = OldValue;
6329 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
6376 bool SubClausesPresent =
6377 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
6380 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
6381 "if lowerbound is non-null, then upperbound must also be non-null "
6382 "for bounds on num_teams");
6384 if (NumTeamsUpper ==
nullptr)
6387 if (NumTeamsLower ==
nullptr)
6388 NumTeamsLower = NumTeamsUpper;
6392 "argument to if clause must be an integer value");
6397 ConstantInt::get(IfExpr->
getType(), 0));
6406 if (ThreadLimit ==
nullptr)
6412 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
6417 BodyGenCB(AllocaIP, CodeGenIP);
6425 std::stack<Instruction *> ToBeDeleted;
6428 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
6430 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
6432 auto HostPostOutlineCB = [
this, Ident,
6433 ToBeDeleted](
Function &OutlinedFn)
mutable {
6438 "there must be a single user for the outlined function");
6440 ToBeDeleted.push(StaleCI);
6443 "Outlined function must have two or three arguments only");
6445 bool HasShared = OutlinedFn.
arg_size() == 3;
6453 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
6454 "outlined function.");
6461 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
6464 while (!ToBeDeleted.empty()) {
6482 std::string VarName) {
6491 return MapNamesArrayGlobal;
6496void OpenMPIRBuilder::initializeTypes(
Module &M) {
6499#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
6500#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
6501 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
6502 VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
6503#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
6504 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
6505 VarName##Ptr = PointerType::getUnqual(VarName);
6506#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
6507 T = StructType::getTypeByName(Ctx, StructName); \
6509 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
6511 VarName##Ptr = PointerType::getUnqual(T);
6512#include "llvm/Frontend/OpenMP/OMPKinds.def"
6523 while (!Worklist.
empty()) {
6527 if (BlockSet.
insert(SuccBB).second)
6539 "omp_offloading_entries");
6563 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
6582 auto &&GetMDInt = [
this](
unsigned V) {
6590 auto &&TargetRegionMetadataEmitter =
6591 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
6606 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
6607 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
6608 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
6609 GetMDInt(E.getOrder())};
6612 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
6621 auto &&DeviceGlobalVarMetadataEmitter =
6622 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
6632 Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
6633 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
6637 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
6644 DeviceGlobalVarMetadataEmitter);
6646 for (
const auto &E : OrderedEntries) {
6647 assert(E.first &&
"All ordered entries must exist!");
6648 if (
const auto *CE =
6649 dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>(
6651 if (!CE->getID() || !CE->getAddress()) {
6663 }
else if (
const auto *CE =
dyn_cast<
6674 if (!CE->getAddress()) {
6679 if (CE->getVarSize() == 0)
6685 "Declaret target link address is set.");
6688 if (!CE->getAddress()) {
6700 if (
auto *
GV = dyn_cast<GlobalValue>(CE->getAddress()))
6701 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
6709 Flags, CE->getLinkage(), CE->getVarName());
6712 Flags, CE->getLinkage());
6733 unsigned FileID,
unsigned Line,
unsigned Count) {
6736 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
6743 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
6746 EntryInfo.
Line, NewCount);
6753 auto FileIDInfo = CallBack();
6756 "getTargetEntryUniqueInfo, error message: " +
6762 std::get<1>(FileIDInfo));
6768 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
6770 !(Remain & 1); Remain = Remain >> 1)
6788 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
6790 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
6797 Flags &= ~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
6798 Flags |= MemberOfFlag;
6804 bool IsDeclaration,
bool IsExternallyVisible,
6806 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
6807 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
6808 std::function<
Constant *()> GlobalInitializer,
6824 if (!IsExternallyVisible)
6826 OS <<
"_decl_tgt_ref_ptr";
6835 auto *
GV = cast<GlobalVariable>(
Ptr);
6839 if (GlobalInitializer)
6840 GV->setInitializer(GlobalInitializer());
6846 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
6847 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
6848 GlobalInitializer, VariableLinkage, LlvmPtrTy, cast<Constant>(
Ptr));
6851 return cast<Constant>(
Ptr);
6860 bool IsDeclaration,
bool IsExternallyVisible,
6862 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
6863 std::vector<Triple> TargetTriple,
6864 std::function<
Constant *()> GlobalInitializer,
6881 VarName = MangledName;
6889 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
6905 auto *GvAddrRef = cast<GlobalVariable>(AddrRef);
6906 GvAddrRef->setConstant(
true);
6908 GvAddrRef->setInitializer(
Addr);
6909 GeneratedRefs.push_back(GvAddrRef);
6919 VarName = (
Addr) ?
Addr->getName() :
"";
6923 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
6924 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
6925 LlvmPtrTy, GlobalInitializer, VariableLinkage);
6926 VarName = (
Addr) ?
Addr->getName() :
"";
6947 auto &&GetMDInt = [MN](
unsigned Idx) {
6948 auto *V = cast<ConstantAsMetadata>(MN->getOperand(
Idx));
6949 return cast<ConstantInt>(V->getValue())->getZExtValue();
6952 auto &&GetMDString = [MN](
unsigned Idx) {
6953 auto *V = cast<MDString>(MN->getOperand(
Idx));
6954 return V->getString();
6957 switch (GetMDInt(0)) {
6985 if (HostFilePath.
empty())
6989 if (std::error_code Err = Buf.getError()) {
6991 "OpenMPIRBuilder: " +
6999 if (std::error_code Err =
M.getError()) {
7001 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
7013 return OffloadEntriesTargetRegion.empty() &&
7014 OffloadEntriesDeviceGlobalVar.empty();
7017unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
7019 auto It = OffloadEntriesTargetRegionCount.find(
7020 getTargetRegionEntryCountKey(EntryInfo));
7021 if (It == OffloadEntriesTargetRegionCount.end())
7026void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
7028 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
7029 EntryInfo.
Count + 1;
7035 OffloadEntriesTargetRegion[EntryInfo] =
7037 OMPTargetRegionEntryTargetRegion);
7038 ++OffloadingEntriesNum;
7044 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
7047 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
7051 if (OMPBuilder->Config.isTargetDevice()) {
7053 if (!hasTargetRegionEntryInfo(EntryInfo)) {
7056 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
7057 Entry.setAddress(
Addr);
7059 Entry.setFlags(
Flags);
7062 hasTargetRegionEntryInfo(EntryInfo,
true))
7064 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
7065 "Target region entry already registered!");
7067 OffloadEntriesTargetRegion[EntryInfo] = Entry;
7068 ++OffloadingEntriesNum;
7070 incrementTargetRegionEntryInfoCount(EntryInfo);
7077 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
7079 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
7080 if (It == OffloadEntriesTargetRegion.end()) {
7084 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
7092 for (
const auto &It : OffloadEntriesTargetRegion) {
7093 Action(It.first, It.second);
7099 OffloadEntriesDeviceGlobalVar.try_emplace(
Name, Order,
Flags);
7100 ++OffloadingEntriesNum;
7106 if (OMPBuilder->Config.isTargetDevice()) {
7108 if (!hasDeviceGlobalVarEntryInfo(VarName))
7110 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
7111 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
7112 if (Entry.getVarSize() == 0) {
7113 Entry.setVarSize(VarSize);
7114 Entry.setLinkage(Linkage);
7118 Entry.setVarSize(VarSize);
7119 Entry.setLinkage(Linkage);
7120 Entry.setAddress(
Addr);
7122 if (hasDeviceGlobalVarEntryInfo(VarName)) {
7123 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
7124 assert(Entry.isValid() && Entry.getFlags() ==
Flags &&
7125 "Entry not initialized!");
7126 if (Entry.getVarSize() == 0) {
7127 Entry.setVarSize(VarSize);
7128 Entry.setLinkage(Linkage);
7133 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
7137 OffloadEntriesDeviceGlobalVar.try_emplace(
7138 VarName, OffloadingEntriesNum,
Addr, VarSize,
Flags, Linkage,
"");
7139 ++OffloadingEntriesNum;
7146 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
7147 Action(E.getKey(), E.getValue());
7154void CanonicalLoopInfo::collectControlBlocks(
7161 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
7173void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
7177 assert(isa<CmpInst>(CmpI) &&
"First inst must compare IV with TripCount");
7185void CanonicalLoopInfo::mapIndVar(
7195 for (
Use &U : OldIV->
uses()) {
7196 auto *
User = dyn_cast<Instruction>(U.getUser());
7199 if (
User->getParent() == getCond())
7201 if (
User->getParent() == getLatch())
7207 Value *NewIV = Updater(OldIV);
7210 for (
Use *U : ReplacableUses)
7231 "Preheader must terminate with unconditional branch");
7233 "Preheader must jump to header");
7236 assert(isa<BranchInst>(Header->getTerminator()) &&
7237 "Header must terminate with unconditional branch");
7238 assert(Header->getSingleSuccessor() ==
Cond &&
7239 "Header must jump to exiting block");
7242 assert(
Cond->getSinglePredecessor() == Header &&
7243 "Exiting block only reachable from header");
7245 assert(isa<BranchInst>(
Cond->getTerminator()) &&
7246 "Exiting block must terminate with conditional branch");
7248 "Exiting block must have two successors");
7249 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(0) == Body &&
7250 "Exiting block's first successor jump to the body");
7251 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(1) == Exit &&
7252 "Exiting block's second successor must exit the loop");
7256 "Body only reachable from exiting block");
7261 "Latch must terminate with unconditional branch");
7269 assert(isa<BranchInst>(Exit->getTerminator()) &&
7270 "Exit block must terminate with unconditional branch");
7272 "Exit block must jump to after block");
7276 "After block only reachable from exit block");
7280 assert(IndVar &&
"Canonical induction variable not found?");
7282 "Induction variable must be an integer");
7284 "Induction variable must be a PHI in the loop header");
7285 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
7287 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->
isZero());
7288 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
7290 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
7292 assert(cast<BinaryOperator>(NextIndVar)->
getOpcode() == BinaryOperator::Add);
7293 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
7294 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
7297 Value *TripCount = getTripCount();
7298 assert(TripCount &&
"Loop trip count not found?");
7300 "Trip count and induction variable must have the same type");
7302 auto *CmpI = cast<CmpInst>(&
Cond->front());
7304 "Exit condition must be a signed less-than comparison");
7306 "Exit condition must compare the induction variable");
7308 "Exit condition must compare with the trip count");
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static void emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static Function * createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, Constant *OutlinedFnID, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static MDNode * getNVPTXMDNode(Function &Kernel, StringRef Name)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static void replaceConstantValueUsesInFuncWithInstr(llvm::Value *Input, Function *Func)
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void replaceConstatExprUsesInFuncWithInstr(ConstantExpr *ConstExpr, Function *Func)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, Type *ParallelTaskPtr, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
Value * createFakeIntVal(IRBuilder<> &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, std::stack< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
Function * getFreshReductionFunc(Module &M)
Create a function with a unique name and a "void (i8*, i8*)" signature in the given module and return...
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
A container for analyses that lazily runs them and caches their results.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::reverse_iterator reverse_iterator
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
const Instruction & back() const
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
A constant value that is initialized with an expression using other constant values.
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Instruction * getAsInstruction() const
Returns an Instruction which implements the same operation as this ConstantExpr.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static ConstantInt * getFalse(LLVMContext &Context)
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
unsigned getDefaultGlobalsAddressSpace() const
Align getABIIntegerTypeAlignment(unsigned BitWidth) const
Returns the minimum ABI-required alignment for an integer type of the specified bitwidth.
unsigned getAllocaAddrSpace() const
unsigned getPointerSize(unsigned AS=0) const
Layout pointer size in bytes, rounded up to a whole number of bytes.
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Analysis pass which computes a DominatorTree.
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ ExternalLinkage
Externally visible function.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Constant * CreateGlobalStringPtr(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr)
Same as CreateGlobalString, but return a pointer with "i8*" type instead of a pointer to array of i8.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
UnreachableInst * CreateUnreachable()
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, unsigned Alignment, Value *OffsetValue=nullptr)
Create an assume intrinsic call that represents an alignment assumption on the provided pointer.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
InsertPoint saveIP() const
Returns the current insert point.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
DebugLoc getCurrentDebugLocation() const
Get location information used by debugging information.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void ClearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Value * CreateIsNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg == 0.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveBeforePreserving(Instruction *MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static bool classof(const Value *V)
Methods for support type inquiry through isa, cast, and dyn_cast:
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Analysis pass that exposes the LoopInfo for a function.
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
StringRef getName() const
Get a short "name" for the module.
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
iterator_range< global_iterator > globals()
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
StringRef separator() const
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
StringRef firstSeparator() const
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
void setHasRequiresDynamicAllocators(bool Value)
bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, int32_t MinThreadsVal=0, int32_t MaxThreadsVal=0, int32_t MinTeamsVal=0, int32_t MaxTeamsVal=0)
The omp target interface.
void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
void emitBranch(BasicBlock *Target)
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy emitKernelLaunch(const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool EmitDebug=false, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
InsertPointTy createTarget(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB)
Generator for '#omp target'.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false)
Generator for '#omp reduction'.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
unsigned getNumUses() const
This method computes the number of uses of this Value.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
iterator insertAfter(iterator where, pointer New)
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
void emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, int32_t Data, StringRef SectionName)
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
@ OMP_TGT_EXEC_MODE_GENERIC
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
CodeGenOptLevel
Code generation optimization level.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
DWARFExpression::Operation Op
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * NumTeams
The number of teams.
Value * DynCGGroupMem
The size of the dynamic shared memory.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
Value * NumThreads
The number of threads.
Data structure to contain the information needed to uniquely identify a target entry.
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static const Target * lookupTarget(StringRef Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...