llvm.org GIT mirror llvm / 688fc6b
Merging r351351: ------------------------------------------------------------------------ r351351 | mareko | 2019-01-16 16:43:53 +0100 (Wed, 16 Jan 2019) | 7 lines AMDGPU: Add llvm.amdgcn.ds.ordered.add & swap Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D52944 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@351443 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 10 months ago
15 changed file(s) with 278 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
390390 llvm_i1_ty], // isVolatile
391391 [IntrArgMemOnly, NoCapture<0>]
392392 >;
393
394 class AMDGPUDSOrderedIntrinsic : Intrinsic<
395 [llvm_i32_ty],
396 // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
397 // the bit packing can be optimized at the IR level.
398 [LLVMQualPointerType, // IntToPtr(M0)
399 llvm_i32_ty, // value to add or swap
400 llvm_i32_ty, // ordering
401 llvm_i32_ty, // scope
402 llvm_i1_ty, // isVolatile
403 llvm_i32_ty, // ordered count index (OA index), also added to the address
404 llvm_i1_ty, // wave release, usually set to 1
405 llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
406 [NoCapture<0>]
407 >;
408
409 def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
410 def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
393411
394412 def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
395413 def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
253253
254254 FLAT_ADDRESS = 0, ///< Address space for flat memory.
255255 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
256 REGION_ADDRESS = 2, ///< Address space for region memory.
256 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
257257
258258 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
259259 LOCAL_ADDRESS = 3, ///< Address space for local memory.
41914191 NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
41924192 NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
41934193 NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
4194 NODE_NAME_CASE(DS_ORDERED_COUNT)
41944195 NODE_NAME_CASE(ATOMIC_CMP_SWAP)
41954196 NODE_NAME_CASE(ATOMIC_INC)
41964197 NODE_NAME_CASE(ATOMIC_DEC)
473473 TBUFFER_STORE_FORMAT_D16,
474474 TBUFFER_LOAD_FORMAT,
475475 TBUFFER_LOAD_FORMAT_D16,
476 DS_ORDERED_COUNT,
476477 ATOMIC_CMP_SWAP,
477478 ATOMIC_INC,
478479 ATOMIC_DEC,
7171 def : SourceOfDivergence;
7272 def : SourceOfDivergence;
7373 def : SourceOfDivergence;
74 def : SourceOfDivergence;
75 def : SourceOfDivergence;
7476
7577 foreach intr = AMDGPUImageDimAtomicIntrinsics in
7678 def : SourceOfDivergence;
307307 switch (Inst->getIntrinsicID()) {
308308 case Intrinsic::amdgcn_atomic_inc:
309309 case Intrinsic::amdgcn_atomic_dec:
310 case Intrinsic::amdgcn_ds_ordered_add:
311 case Intrinsic::amdgcn_ds_ordered_swap:
310312 case Intrinsic::amdgcn_ds_fadd:
311313 case Intrinsic::amdgcn_ds_fmin:
312314 case Intrinsic::amdgcn_ds_fmax: {
815815 defm : DSAtomicRetPat_mc;
816816
817817 defm : DSAtomicCmpXChg_mc;
818
819 def : Pat <
820 (SIds_ordered_count i32:$value, i16:$offset),
821 (DS_ORDERED_COUNT $value, (as_i16imm $offset))
822 >;
818823
819824 //===----------------------------------------------------------------------===//
820825 // Real instructions
8787 }
8888 }
8989
90 static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
90 static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
91 const MachineInstr &MI) {
92 if (TII.isAlwaysGDS(MI.getOpcode()))
93 return true;
94
9195 switch (MI.getOpcode()) {
9296 case AMDGPU::S_SENDMSG:
9397 case AMDGPU::S_SENDMSGHALT:
9498 case AMDGPU::S_TTRACEDATA:
9599 return true;
100 // These DS opcodes don't support GDS.
101 case AMDGPU::DS_NOP:
102 case AMDGPU::DS_PERMUTE_B32:
103 case AMDGPU::DS_BPERMUTE_B32:
104 return false;
96105 default:
97 // TODO: GDS
106 if (TII.isDS(MI.getOpcode())) {
107 int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
108 AMDGPU::OpName::gds);
109 if (MI.getOperand(GDS).getImm())
110 return true;
111 }
98112 return false;
99113 }
100114 }
144158 checkReadM0Hazards(MI) > 0)
145159 return NoopHazard;
146160
147 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
161 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
148162 checkReadM0Hazards(MI) > 0)
149163 return NoopHazard;
150164
198212 isSMovRel(MI->getOpcode())))
199213 return std::max(WaitStates, checkReadM0Hazards(MI));
200214
201 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
215 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
202216 return std::max(WaitStates, checkReadM0Hazards(MI));
203217
204218 return WaitStates;
909909 switch (IntrID) {
910910 case Intrinsic::amdgcn_atomic_inc:
911911 case Intrinsic::amdgcn_atomic_dec:
912 case Intrinsic::amdgcn_ds_ordered_add:
913 case Intrinsic::amdgcn_ds_ordered_swap:
912914 case Intrinsic::amdgcn_ds_fadd:
913915 case Intrinsic::amdgcn_ds_fmin:
914916 case Intrinsic::amdgcn_ds_fmax: {
936938 switch (II->getIntrinsicID()) {
937939 case Intrinsic::amdgcn_atomic_inc:
938940 case Intrinsic::amdgcn_atomic_dec:
941 case Intrinsic::amdgcn_ds_ordered_add:
942 case Intrinsic::amdgcn_ds_ordered_swap:
939943 case Intrinsic::amdgcn_ds_fadd:
940944 case Intrinsic::amdgcn_ds_fmin:
941945 case Intrinsic::amdgcn_ds_fmax: {
54375441 SDLoc DL(Op);
54385442
54395443 switch (IntrID) {
5444 case Intrinsic::amdgcn_ds_ordered_add:
5445 case Intrinsic::amdgcn_ds_ordered_swap: {
5446 MemSDNode *M = cast(Op);
5447 SDValue Chain = M->getOperand(0);
5448 SDValue M0 = M->getOperand(2);
5449 SDValue Value = M->getOperand(3);
5450 unsigned OrderedCountIndex = M->getConstantOperandVal(7);
5451 unsigned WaveRelease = M->getConstantOperandVal(8);
5452 unsigned WaveDone = M->getConstantOperandVal(9);
5453 unsigned ShaderType;
5454 unsigned Instruction;
5455
5456 switch (IntrID) {
5457 case Intrinsic::amdgcn_ds_ordered_add:
5458 Instruction = 0;
5459 break;
5460 case Intrinsic::amdgcn_ds_ordered_swap:
5461 Instruction = 1;
5462 break;
5463 }
5464
5465 if (WaveDone && !WaveRelease)
5466 report_fatal_error("ds_ordered_count: wave_done requires wave_release");
5467
5468 switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
5469 case CallingConv::AMDGPU_CS:
5470 case CallingConv::AMDGPU_KERNEL:
5471 ShaderType = 0;
5472 break;
5473 case CallingConv::AMDGPU_PS:
5474 ShaderType = 1;
5475 break;
5476 case CallingConv::AMDGPU_VS:
5477 ShaderType = 2;
5478 break;
5479 case CallingConv::AMDGPU_GS:
5480 ShaderType = 3;
5481 break;
5482 default:
5483 report_fatal_error("ds_ordered_count unsupported for this calling conv");
5484 }
5485
5486 unsigned Offset0 = OrderedCountIndex << 2;
5487 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
5488 (Instruction << 4);
5489 unsigned Offset = Offset0 | (Offset1 << 8);
5490
5491 SDValue Ops[] = {
5492 Chain,
5493 Value,
5494 DAG.getTargetConstant(Offset, DL, MVT::i16),
5495 copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
5496 };
5497 return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
5498 M->getVTList(), Ops, M->getMemoryVT(),
5499 M->getMemOperand());
5500 }
54405501 case Intrinsic::amdgcn_atomic_inc:
54415502 case Intrinsic::amdgcn_atomic_dec:
54425503 case Intrinsic::amdgcn_ds_fadd:
535535 CurrScore);
536536 }
537537 if (Inst.mayStore()) {
538 setExpScore(
539 &Inst, TII, TRI, MRI,
540 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
541 CurrScore);
538 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
539 AMDGPU::OpName::data0) != -1) {
540 setExpScore(
541 &Inst, TII, TRI, MRI,
542 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
543 CurrScore);
544 }
542545 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
543546 AMDGPU::OpName::data1) != -1) {
544547 setExpScore(&Inst, TII, TRI, MRI,
10921095 // bracket and the destination operand scores.
10931096 // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
10941097 if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
1095 if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
1098 if (TII->isAlwaysGDS(Inst.getOpcode()) ||
1099 TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
10961100 ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
10971101 ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
10981102 } else {
23892389 changesVGPRIndexingMode(MI);
23902390 }
23912391
2392 bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
2393 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
2394 Opcode == AMDGPU::DS_GWS_INIT ||
2395 Opcode == AMDGPU::DS_GWS_SEMA_V ||
2396 Opcode == AMDGPU::DS_GWS_SEMA_BR ||
2397 Opcode == AMDGPU::DS_GWS_SEMA_P ||
2398 Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
2399 Opcode == AMDGPU::DS_GWS_BARRIER;
2400 }
2401
23922402 bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
23932403 unsigned Opcode = MI.getOpcode();
23942404
24022412 // EXEC = 0, but checking for that case here seems not worth it
24032413 // given the typical code patterns.
24042414 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
2405 Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
2415 Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
2416 Opcode == AMDGPU::DS_ORDERED_COUNT)
24062417 return true;
24072418
24082419 if (MI.isInlineAsm())
449449 return get(Opcode).TSFlags & SIInstrFlags::DS;
450450 }
451451
452 bool isAlwaysGDS(uint16_t Opcode) const;
453
452454 static bool isMIMG(const MachineInstr &MI) {
453455 return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
454456 }
4242 def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
4343 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>]>,
4444 [SDNPMayLoad, SDNPMemOperand]
45 >;
46
47 def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
48 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
49 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
4550 >;
4651
4752 def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
4
5 ; FUNC-LABEL: {{^}}ds_ordered_add:
6 ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
7 ; GCN-DAG: s_mov_b32 m0,
8 ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
9 define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
10 %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
11 store i32 %val, i32 addrspace(1)* %out
12 ret void
13 }
14
15 ; Below are various modifications of input operands and shader types.
16
17 ; FUNC-LABEL: {{^}}ds_ordered_add_counter2:
18 ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
19 ; GCN-DAG: s_mov_b32 m0,
20 ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:776 gds
21 define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
22 %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true)
23 store i32 %val, i32 addrspace(1)* %out
24 ret void
25 }
26
27 ; FUNC-LABEL: {{^}}ds_ordered_add_nodone:
28 ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
29 ; GCN-DAG: s_mov_b32 m0,
30 ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:260 gds
31 define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
32 %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false)
33 store i32 %val, i32 addrspace(1)* %out
34 ret void
35 }
36
37 ; FUNC-LABEL: {{^}}ds_ordered_add_norelease:
38 ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
39 ; GCN-DAG: s_mov_b32 m0,
40 ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:4 gds
41 define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
42 %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false)
43 store i32 %val, i32 addrspace(1)* %out
44 ret void
45 }
46
47 ; FUNC-LABEL: {{^}}ds_ordered_add_cs:
48 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
49 ; GCN: s_mov_b32 m0, s0
50 ; VIGFX9-NEXT: s_nop 0
51 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
52 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
53 define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
54 %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
55 %r = bitcast i32 %val to float
56 ret float %r
57 }
58
59 ; FUNC-LABEL: {{^}}ds_ordered_add_ps:
60 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
61 ; GCN: s_mov_b32 m0, s0
62 ; VIGFX9-NEXT: s_nop 0
63 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds
64 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
65 define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
66 %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
67 %r = bitcast i32 %val to float
68 ret float %r
69 }
70
71 ; FUNC-LABEL: {{^}}ds_ordered_add_vs:
72 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
73 ; GCN: s_mov_b32 m0, s0
74 ; VIGFX9-NEXT: s_nop 0
75 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds
76 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
77 define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
78 %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
79 %r = bitcast i32 %val to float
80 ret float %r
81 }
82
83 ; FUNC-LABEL: {{^}}ds_ordered_add_gs:
84 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
85 ; GCN: s_mov_b32 m0, s0
86 ; VIGFX9-NEXT: s_nop 0
87 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds
88 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
89 define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
90 %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
91 %r = bitcast i32 %val to float
92 ret float %r
93 }
94
95 declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
4
5 ; FUNC-LABEL: {{^}}ds_ordered_swap:
6 ; GCN: s_mov_b32 m0, s0
7 ; VIGFX9-NEXT: s_nop 0
8 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
9 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
10 define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value) {
11 %val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
12 %r = bitcast i32 %val to float
13 ret float %r
14 }
15
16 ; FUNC-LABEL: {{^}}ds_ordered_swap_conditional:
17 ; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
18 ; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc
19 ; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0
20 ; GCN: s_cbranch_execz [[BB:BB._.]]
21 ; GCN: s_mov_b32 m0, s0
22 ; VIGFX9-NEXT: s_nop 0
23 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
24 ; GCN-NEXT: [[BB]]:
25 ; // Wait for expcnt(0) before modifying EXEC
26 ; GCN-NEXT: s_waitcnt expcnt(0)
27 ; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]]
28 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
29 define amdgpu_cs float @ds_ordered_swap_conditional(i32 addrspace(2)* inreg %gds, i32 %value) {
30 entry:
31 %c = icmp ne i32 %value, 0
32 br i1 %c, label %if-true, label %endif
33
34 if-true:
35 %val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
36 br label %endif
37
38 endif:
39 %v = phi i32 [ %val, %if-true ], [ undef, %entry ]
40 %r = bitcast i32 %v to float
41 ret float %r
42 }
43
44 declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)