llvm.org GIT mirror llvm / b3dd66a
Merging r237152: ------------------------------------------------------------------------ r237152 | thomas.stellard | 2015-05-12 13:13:02 -0400 (Tue, 12 May 2015) | 20 lines R600/SI: add pass to mark CF live ranges as non-spillable Spilling can insert instructions almost anywhere, and this can mess up control flow lowering in a multitude of ways, due to instruction reordering. Let's sort this out the easy way: never spill registers involved with control flow, i.e. saved EXEC masks. Unfortunately, this does not work at all with optimizations disabled, as the register allocator ignores spill weights. This should be addressed in a future commit. The test was reduced from the "stacks" shader of [1]. Some issues trigger the machine verifier while another one is checked manually. [1] http://madebyevan.com/webgl-path-tracing/ v2: only insert pass with optimizations enabled, merge test runs. Patch by: Grigori Goronzy git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@240282 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 4 years ago
5 changed file(s) with 610 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
4242 FunctionPass *createSIShrinkInstructionsPass();
4343 FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
4444 FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
45 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
4546 FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
4647 FunctionPass *createSIFixSGPRLiveRangesPass();
4748 FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
6667 /// \brief Creates an AMDGPU-specific Target Transformation Info pass.
6768 ImmutablePass *
6869 createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM);
70
71 void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
72 extern char &SIFixControlFlowLiveIntervalsID;
6973
7074 void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
7175 extern char &SIFixSGPRLiveRangesID;
169169 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
170170 addPass(createR600VectorRegMerger(*TM));
171171 } else {
172 if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
172 // This needs to be run directly before register allocation because
173 // earlier passes might recompute live intervals.
174 // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
175 if (getOptLevel() > CodeGenOpt::None) {
176 initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
177 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
178 }
179 if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
173180 // Don't do this with no optimizations since it throws away debug info by
174181 // merging nonadjacent loads.
175182
4040 R600RegisterInfo.cpp
4141 R600TextureIntrinsicsReplacer.cpp
4242 SIAnnotateControlFlow.cpp
43 SIFixControlFlowLiveIntervals.cpp
4344 SIFixSGPRCopies.cpp
4445 SIFixSGPRLiveRanges.cpp
4546 SIFoldOperands.cpp
0 //===-- SIFixControlFlowLiveIntervals.cpp - Fix CF live intervals ---------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Spilling of EXEC masks used for control flow messes up control flow
11 /// lowering, so mark all live intervals associated with CF instructions as
12 /// non-spillable.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "AMDGPU.h"
17 #include "SIInstrInfo.h"
18 #include "SIRegisterInfo.h"
19 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachinePostDominators.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "llvm/Target/TargetMachine.h"
27
28 using namespace llvm;
29
30 #define DEBUG_TYPE "si-fix-cf-live-intervals"
31
32 namespace {
33
34 class SIFixControlFlowLiveIntervals : public MachineFunctionPass {
35 public:
36 static char ID;
37
38 public:
39 SIFixControlFlowLiveIntervals() : MachineFunctionPass(ID) {
40 initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
41 }
42
43 bool runOnMachineFunction(MachineFunction &MF) override;
44
45 const char *getPassName() const override {
46 return "SI Fix CF Live Intervals";
47 }
48
49 void getAnalysisUsage(AnalysisUsage &AU) const override {
50 AU.addRequired();
51 AU.setPreservesAll();
52 MachineFunctionPass::getAnalysisUsage(AU);
53 }
54 };
55
56 } // End anonymous namespace.
57
58 INITIALIZE_PASS_BEGIN(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
59 "SI Fix CF Live Intervals", false, false)
60 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
61 INITIALIZE_PASS_END(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
62 "SI Fix CF Live Intervals", false, false)
63
64 char SIFixControlFlowLiveIntervals::ID = 0;
65
66 char &llvm::SIFixControlFlowLiveIntervalsID = SIFixControlFlowLiveIntervals::ID;
67
68 FunctionPass *llvm::createSIFixControlFlowLiveIntervalsPass() {
69 return new SIFixControlFlowLiveIntervals();
70 }
71
72 bool SIFixControlFlowLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
73 LiveIntervals *LIS = &getAnalysis();
74
75 for (const MachineBasicBlock &MBB : MF) {
76 for (const MachineInstr &MI : MBB) {
77 switch (MI.getOpcode()) {
78 case AMDGPU::SI_IF:
79 case AMDGPU::SI_ELSE:
80 case AMDGPU::SI_BREAK:
81 case AMDGPU::SI_IF_BREAK:
82 case AMDGPU::SI_ELSE_BREAK:
83 case AMDGPU::SI_END_CF: {
84 unsigned Reg = MI.getOperand(0).getReg();
85 LIS->getInterval(Reg).markNotSpillable();
86 break;
87 }
88 default:
89 break;
90 }
91 }
92 }
93
94 return false;
95 }
0 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
2
3 ; If this occurs it is likely due to reordering and the restore was
4 ; originally supposed to happen before SI_END_CF.
5 ; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
6 ; SI-NOT: v_readlane_b32 [[SAVED]]
7
8 define void @main() #0 {
9 main_body:
10 %0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
11 %1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
12 %2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
13 %3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
14 %4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
15 %5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
16 %6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
17 %7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
18 %8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
19 %9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
20 %10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
21 %11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
22 %12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
23 %13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
24 %14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
25 %15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
26 %16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
27 %17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
28 %18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
29 %19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
30 %20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
31 %21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
32 %22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
33 %23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
34 %24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
35 %25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
36 %26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
37 %27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
38 %28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
39 %29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
40 %30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
41 %31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
42 %32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
43 %33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
44 %34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
45 %35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
46 %36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
47 %37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
48 %38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
49 %39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
50 %40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
51 %41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
52 %42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
53 %43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
54 %44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
55 %45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
56 %46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
57 %47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
58 %48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
59 %49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
60 %50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
61 %51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
62 %52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
63 %53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
64 %54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
65 %55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
66 %56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
67 %57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
68 %58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
69 %59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
70 %60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
71 %61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
72 %62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
73 %63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
74 %64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
75 %65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
76 %66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
77 br label %LOOP
78
79 LOOP: ; preds = %ENDIF2795, %main_body
80 %temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ]
81 %temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ]
82 %67 = icmp sgt i32 undef, 4
83 br i1 %67, label %ENDLOOP, label %ENDIF
84
85 ENDLOOP: ; preds = %ELSE2566, %LOOP
86 %68 = call float @llvm.AMDGPU.lrp(float %0, float undef, float undef)
87 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %68, float undef, float 1.000000e+00)
88 ret void
89
90 ENDIF: ; preds = %LOOP
91 %69 = fsub float %2, undef
92 %70 = fsub float %3, undef
93 %71 = fsub float %4, undef
94 %72 = fmul float %69, 0.000000e+00
95 %73 = fmul float %70, undef
96 %74 = fmul float %71, undef
97 %75 = fsub float %6, undef
98 %76 = fsub float %7, undef
99 %77 = fmul float %75, undef
100 %78 = fmul float %76, 0.000000e+00
101 %79 = call float @llvm.minnum.f32(float %74, float %78)
102 %80 = call float @llvm.maxnum.f32(float %72, float 0.000000e+00)
103 %81 = call float @llvm.maxnum.f32(float %73, float %77)
104 %82 = call float @llvm.maxnum.f32(float undef, float %79)
105 %83 = call float @llvm.minnum.f32(float %80, float %81)
106 %84 = call float @llvm.minnum.f32(float %83, float undef)
107 %85 = fsub float %14, undef
108 %86 = fsub float %15, undef
109 %87 = fsub float %16, undef
110 %88 = fmul float %85, undef
111 %89 = fmul float %86, undef
112 %90 = fmul float %87, undef
113 %91 = fsub float %17, undef
114 %92 = fsub float %18, undef
115 %93 = fsub float %19, undef
116 %94 = fmul float %91, 0.000000e+00
117 %95 = fmul float %92, undef
118 %96 = fmul float %93, undef
119 %97 = call float @llvm.minnum.f32(float %89, float %95)
120 %98 = call float @llvm.maxnum.f32(float %88, float %94)
121 %99 = call float @llvm.maxnum.f32(float %90, float %96)
122 %100 = call float @llvm.maxnum.f32(float undef, float %97)
123 %101 = call float @llvm.maxnum.f32(float %100, float undef)
124 %102 = call float @llvm.minnum.f32(float %98, float undef)
125 %103 = call float @llvm.minnum.f32(float %102, float %99)
126 %104 = fsub float %30, undef
127 %105 = fsub float %31, undef
128 %106 = fmul float %104, 0.000000e+00
129 %107 = fmul float %105, 0.000000e+00
130 %108 = call float @llvm.minnum.f32(float undef, float %106)
131 %109 = call float @llvm.maxnum.f32(float undef, float %107)
132 %110 = call float @llvm.maxnum.f32(float undef, float %108)
133 %111 = call float @llvm.maxnum.f32(float %110, float undef)
134 %112 = call float @llvm.minnum.f32(float undef, float %109)
135 %113 = fsub float %32, undef
136 %114 = fsub float %33, undef
137 %115 = fsub float %34, undef
138 %116 = fmul float %113, 0.000000e+00
139 %117 = fmul float %114, undef
140 %118 = fmul float %115, undef
141 %119 = fsub float %35, undef
142 %120 = fsub float %36, undef
143 %121 = fsub float %37, undef
144 %122 = fmul float %119, undef
145 %123 = fmul float %120, undef
146 %124 = fmul float %121, undef
147 %125 = call float @llvm.minnum.f32(float %116, float %122)
148 %126 = call float @llvm.minnum.f32(float %117, float %123)
149 %127 = call float @llvm.minnum.f32(float %118, float %124)
150 %128 = call float @llvm.maxnum.f32(float %125, float %126)
151 %129 = call float @llvm.maxnum.f32(float %128, float %127)
152 %130 = fsub float %38, undef
153 %131 = fsub float %39, undef
154 %132 = fsub float %40, undef
155 %133 = fmul float %130, 0.000000e+00
156 %134 = fmul float %131, undef
157 %135 = fmul float %132, undef
158 %136 = fsub float %41, undef
159 %137 = fsub float %42, undef
160 %138 = fsub float %43, undef
161 %139 = fmul float %136, undef
162 %140 = fmul float %137, undef
163 %141 = fmul float %138, undef
164 %142 = call float @llvm.minnum.f32(float %133, float %139)
165 %143 = call float @llvm.minnum.f32(float %134, float %140)
166 %144 = call float @llvm.minnum.f32(float %135, float %141)
167 %145 = call float @llvm.maxnum.f32(float %142, float %143)
168 %146 = call float @llvm.maxnum.f32(float %145, float %144)
169 %147 = fsub float %44, undef
170 %148 = fsub float %45, undef
171 %149 = fsub float %46, undef
172 %150 = fmul float %147, 0.000000e+00
173 %151 = fmul float %148, 0.000000e+00
174 %152 = fmul float %149, undef
175 %153 = fsub float %47, undef
176 %154 = fsub float %48, undef
177 %155 = fsub float %49, undef
178 %156 = fmul float %153, undef
179 %157 = fmul float %154, 0.000000e+00
180 %158 = fmul float %155, undef
181 %159 = call float @llvm.minnum.f32(float %150, float %156)
182 %160 = call float @llvm.minnum.f32(float %151, float %157)
183 %161 = call float @llvm.minnum.f32(float %152, float %158)
184 %162 = call float @llvm.maxnum.f32(float %159, float %160)
185 %163 = call float @llvm.maxnum.f32(float %162, float %161)
186 %164 = fsub float %50, undef
187 %165 = fsub float %51, undef
188 %166 = fsub float %52, undef
189 %167 = fmul float %164, undef
190 %168 = fmul float %165, 0.000000e+00
191 %169 = fmul float %166, 0.000000e+00
192 %170 = fsub float %53, undef
193 %171 = fsub float %54, undef
194 %172 = fsub float %55, undef
195 %173 = fdiv float 1.000000e+00, %temp18.0
196 %174 = fmul float %170, undef
197 %175 = fmul float %171, undef
198 %176 = fmul float %172, %173
199 %177 = call float @llvm.minnum.f32(float %167, float %174)
200 %178 = call float @llvm.minnum.f32(float %168, float %175)
201 %179 = call float @llvm.minnum.f32(float %169, float %176)
202 %180 = call float @llvm.maxnum.f32(float %177, float %178)
203 %181 = call float @llvm.maxnum.f32(float %180, float %179)
204 %182 = fsub float %62, undef
205 %183 = fsub float %63, undef
206 %184 = fsub float %64, undef
207 %185 = fmul float %182, 0.000000e+00
208 %186 = fmul float %183, undef
209 %187 = fmul float %184, undef
210 %188 = fsub float %65, undef
211 %189 = fsub float %66, undef
212 %190 = fmul float %188, undef
213 %191 = fmul float %189, undef
214 %192 = call float @llvm.maxnum.f32(float %185, float %190)
215 %193 = call float @llvm.maxnum.f32(float %186, float %191)
216 %194 = call float @llvm.maxnum.f32(float %187, float undef)
217 %195 = call float @llvm.minnum.f32(float %192, float %193)
218 %196 = call float @llvm.minnum.f32(float %195, float %194)
219 %.temp292.7 = select i1 undef, float %163, float undef
220 %temp292.9 = select i1 false, float %181, float %.temp292.7
221 %.temp292.9 = select i1 undef, float undef, float %temp292.9
222 %197 = fcmp ogt float undef, 0.000000e+00
223 %198 = fcmp olt float undef, %196
224 %199 = and i1 %197, %198
225 %200 = fcmp olt float undef, %.temp292.9
226 %201 = and i1 %199, %200
227 %temp292.11 = select i1 %201, float undef, float %.temp292.9
228 br i1 undef, label %IF2565, label %ELSE2566
229
230 IF2565: ; preds = %ENDIF
231 br i1 false, label %ENDIF2582, label %ELSE2584
232
233 ELSE2566: ; preds = %ENDIF
234 %202 = fcmp oeq float %temp292.11, 1.000000e+04
235 br i1 %202, label %ENDLOOP, label %ELSE2593
236
237 ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
238 %temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
239 %temp18.1 = phi float [ %219, %ENDIF2588 ], [ undef, %ENDIF2594 ]
240 %203 = fsub float %5, undef
241 %204 = fmul float %203, undef
242 %205 = call float @llvm.maxnum.f32(float undef, float %204)
243 %206 = call float @llvm.minnum.f32(float %205, float undef)
244 %207 = call float @llvm.minnum.f32(float %206, float undef)
245 %208 = fcmp ogt float undef, 0.000000e+00
246 %209 = fcmp olt float undef, 1.000000e+00
247 %210 = and i1 %208, %209
248 %211 = fcmp olt float undef, %207
249 %212 = and i1 %210, %211
250 br i1 %212, label %ENDIF2795, label %ELSE2797
251
252 ELSE2584: ; preds = %IF2565
253 br label %ENDIF2582
254
255 ENDIF2582: ; preds = %ELSE2584, %IF2565
256 %213 = fadd float %1, undef
257 %214 = fadd float 0.000000e+00, %213
258 %215 = call float @llvm.AMDIL.fraction.(float %214)
259 br i1 undef, label %IF2589, label %ELSE2590
260
261 IF2589: ; preds = %ENDIF2582
262 br label %ENDIF2588
263
264 ELSE2590: ; preds = %ENDIF2582
265 br label %ENDIF2588
266
267 ENDIF2588: ; preds = %ELSE2590, %IF2589
268 %216 = fsub float 1.000000e+00, %215
269 %217 = call float @llvm.sqrt.f32(float %216)
270 %218 = fmul float %217, undef
271 %219 = fadd float %218, undef
272 br label %ENDIF2564
273
274 ELSE2593: ; preds = %ELSE2566
275 %220 = fcmp oeq float %temp292.11, %82
276 %221 = fcmp olt float %82, %84
277 %222 = and i1 %220, %221
278 br i1 %222, label %ENDIF2594, label %ELSE2596
279
280 ELSE2596: ; preds = %ELSE2593
281 %223 = fcmp oeq float %temp292.11, %101
282 %224 = fcmp olt float %101, %103
283 %225 = and i1 %223, %224
284 br i1 %225, label %ENDIF2594, label %ELSE2632
285
286 ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
287 %temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
288 %226 = fmul float %temp894.2, undef
289 br label %ENDIF2564
290
291 ELSE2632: ; preds = %ELSE2596
292 br i1 undef, label %ENDIF2594, label %ELSE2650
293
294 ELSE2650: ; preds = %ELSE2632
295 %227 = fcmp oeq float %temp292.11, %111
296 %228 = fcmp olt float %111, %112
297 %229 = and i1 %227, %228
298 br i1 %229, label %IF2667, label %ELSE2668
299
300 IF2667: ; preds = %ELSE2650
301 br i1 undef, label %ENDIF2594, label %ELSE2671
302
303 ELSE2668: ; preds = %ELSE2650
304 %230 = fcmp oeq float %temp292.11, %129
305 %231 = fcmp olt float %129, undef
306 %232 = and i1 %230, %231
307 br i1 %232, label %ENDIF2594, label %ELSE2686
308
309 ELSE2671: ; preds = %IF2667
310 br label %ENDIF2594
311
312 ELSE2686: ; preds = %ELSE2668
313 %233 = fcmp oeq float %temp292.11, %146
314 %234 = fcmp olt float %146, undef
315 %235 = and i1 %233, %234
316 br i1 %235, label %ENDIF2594, label %ELSE2704
317
318 ELSE2704: ; preds = %ELSE2686
319 %236 = fcmp oeq float %temp292.11, %181
320 %237 = fcmp olt float %181, undef
321 %238 = and i1 %236, %237
322 br i1 %238, label %ENDIF2594, label %ELSE2740
323
324 ELSE2740: ; preds = %ELSE2704
325 br i1 undef, label %IF2757, label %ELSE2758
326
327 IF2757: ; preds = %ELSE2740
328 br i1 undef, label %ENDIF2594, label %ELSE2761
329
330 ELSE2758: ; preds = %ELSE2740
331 br i1 undef, label %IF2775, label %ENDIF2594
332
333 ELSE2761: ; preds = %IF2757
334 br label %ENDIF2594
335
336 IF2775: ; preds = %ELSE2758
337 %239 = fcmp olt float undef, undef
338 br i1 %239, label %ENDIF2594, label %ELSE2779
339
340 ELSE2779: ; preds = %IF2775
341 br i1 undef, label %ENDIF2594, label %ELSE2782
342
343 ELSE2782: ; preds = %ELSE2779
344 br i1 undef, label %ENDIF2594, label %ELSE2785
345
346 ELSE2785: ; preds = %ELSE2782
347 %240 = fcmp olt float undef, 0.000000e+00
348 br i1 %240, label %ENDIF2594, label %ELSE2788
349
350 ELSE2788: ; preds = %ELSE2785
351 %241 = fcmp olt float 0.000000e+00, undef
352 %.2848 = select i1 %241, float -1.000000e+00, float 1.000000e+00
353 br label %ENDIF2594
354
355 ELSE2797: ; preds = %ENDIF2564
356 %242 = fsub float %8, undef
357 %243 = fsub float %9, undef
358 %244 = fsub float %10, undef
359 %245 = fmul float %242, undef
360 %246 = fmul float %243, undef
361 %247 = fmul float %244, undef
362 %248 = fsub float %11, undef
363 %249 = fsub float %12, undef
364 %250 = fsub float %13, undef
365 %251 = fmul float %248, undef
366 %252 = fmul float %249, undef
367 %253 = fmul float %250, undef
368 %254 = call float @llvm.minnum.f32(float %245, float %251)
369 %255 = call float @llvm.minnum.f32(float %246, float %252)
370 %256 = call float @llvm.maxnum.f32(float %247, float %253)
371 %257 = call float @llvm.maxnum.f32(float %254, float %255)
372 %258 = call float @llvm.maxnum.f32(float %257, float undef)
373 %259 = call float @llvm.minnum.f32(float undef, float %256)
374 %260 = fcmp ogt float %258, 0.000000e+00
375 %261 = fcmp olt float %258, 1.000000e+00
376 %262 = and i1 %260, %261
377 %263 = fcmp olt float %258, %259
378 %264 = and i1 %262, %263
379 br i1 %264, label %ENDIF2795, label %ELSE2800
380
381 ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
382 br label %LOOP
383
384 ELSE2800: ; preds = %ELSE2797
385 br i1 undef, label %ENDIF2795, label %ELSE2803
386
387 ELSE2803: ; preds = %ELSE2800
388 %265 = fsub float %20, undef
389 %266 = fsub float %21, undef
390 %267 = fsub float %22, undef
391 %268 = fmul float %265, undef
392 %269 = fmul float %266, undef
393 %270 = fmul float %267, 0.000000e+00
394 %271 = fsub float %23, undef
395 %272 = fsub float %24, undef
396 %273 = fsub float %25, undef
397 %274 = fmul float %271, undef
398 %275 = fmul float %272, undef
399 %276 = fmul float %273, undef
400 %277 = call float @llvm.minnum.f32(float %268, float %274)
401 %278 = call float @llvm.maxnum.f32(float %269, float %275)
402 %279 = call float @llvm.maxnum.f32(float %270, float %276)
403 %280 = call float @llvm.maxnum.f32(float %277, float undef)
404 %281 = call float @llvm.maxnum.f32(float %280, float undef)
405 %282 = call float @llvm.minnum.f32(float undef, float %278)
406 %283 = call float @llvm.minnum.f32(float %282, float %279)
407 %284 = fcmp ogt float %281, 0.000000e+00
408 %285 = fcmp olt float %281, 1.000000e+00
409 %286 = and i1 %284, %285
410 %287 = fcmp olt float %281, %283
411 %288 = and i1 %286, %287
412 br i1 %288, label %ENDIF2795, label %ELSE2806
413
414 ELSE2806: ; preds = %ELSE2803
415 %289 = fsub float %26, undef
416 %290 = fsub float %27, undef
417 %291 = fsub float %28, undef
418 %292 = fmul float %289, undef
419 %293 = fmul float %290, 0.000000e+00
420 %294 = fmul float %291, undef
421 %295 = fsub float %29, undef
422 %296 = fmul float %295, undef
423 %297 = call float @llvm.minnum.f32(float %292, float %296)
424 %298 = call float @llvm.minnum.f32(float %293, float undef)
425 %299 = call float @llvm.maxnum.f32(float %294, float undef)
426 %300 = call float @llvm.maxnum.f32(float %297, float %298)
427 %301 = call float @llvm.maxnum.f32(float %300, float undef)
428 %302 = call float @llvm.minnum.f32(float undef, float %299)
429 %303 = fcmp ogt float %301, 0.000000e+00
430 %304 = fcmp olt float %301, 1.000000e+00
431 %305 = and i1 %303, %304
432 %306 = fcmp olt float %301, %302
433 %307 = and i1 %305, %306
434 br i1 %307, label %ENDIF2795, label %ELSE2809
435
436 ELSE2809: ; preds = %ELSE2806
437 br i1 undef, label %ENDIF2795, label %ELSE2812
438
439 ELSE2812: ; preds = %ELSE2809
440 br i1 undef, label %ENDIF2795, label %ELSE2815
441
442 ELSE2815: ; preds = %ELSE2812
443 br i1 undef, label %ENDIF2795, label %ELSE2818
444
445 ELSE2818: ; preds = %ELSE2815
446 br i1 undef, label %ENDIF2795, label %ELSE2821
447
448 ELSE2821: ; preds = %ELSE2818
449 %308 = fsub float %56, undef
450 %309 = fsub float %57, undef
451 %310 = fsub float %58, undef
452 %311 = fmul float %308, undef
453 %312 = fmul float %309, 0.000000e+00
454 %313 = fmul float %310, undef
455 %314 = fsub float %59, undef
456 %315 = fsub float %60, undef
457 %316 = fsub float %61, undef
458 %317 = fmul float %314, undef
459 %318 = fmul float %315, undef
460 %319 = fmul float %316, undef
461 %320 = call float @llvm.maxnum.f32(float %311, float %317)
462 %321 = call float @llvm.maxnum.f32(float %312, float %318)
463 %322 = call float @llvm.maxnum.f32(float %313, float %319)
464 %323 = call float @llvm.minnum.f32(float %320, float %321)
465 %324 = call float @llvm.minnum.f32(float %323, float %322)
466 %325 = fcmp ogt float undef, 0.000000e+00
467 %326 = fcmp olt float undef, 1.000000e+00
468 %327 = and i1 %325, %326
469 %328 = fcmp olt float undef, %324
470 %329 = and i1 %327, %328
471 br i1 %329, label %ENDIF2795, label %ELSE2824
472
473 ELSE2824: ; preds = %ELSE2821
474 %.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
475 br label %ENDIF2795
476 }
477
478 ; Function Attrs: nounwind readnone
479 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
480
481 ; Function Attrs: readnone
482 declare float @llvm.AMDIL.fraction.(float) #2
483
484 ; Function Attrs: nounwind readnone
485 declare float @llvm.sqrt.f32(float) #1
486
487 ; Function Attrs: nounwind readnone
488 declare float @llvm.minnum.f32(float, float) #1
489
490 ; Function Attrs: nounwind readnone
491 declare float @llvm.maxnum.f32(float, float) #1
492
493 ; Function Attrs: readnone
494 declare float @llvm.AMDGPU.lrp(float, float, float) #2
495
496 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
497
498 attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
499 attributes #1 = { nounwind readnone }
500 attributes #2 = { readnone }