llvm.org GIT mirror llvm / ada6595
CodeGen: Allow small copyable blocks to "break" the CFG. When choosing the best successor for a block, ordinarily we would have preferred a block that preserves the CFG unless there is a strong probability the other direction. For small blocks that can be duplicated we now skip that requirement as well. Differential revision: https://reviews.llvm.org/D27742 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291609 91177308-0d34-0410-b5e6-96231b3b80d8 Kyle Butt 3 years ago
58 changed file(s) with 485 addition(s) and 225 deletion(s). Raw diff Collapse all Expand all
402402 void buildCFGChains();
403403 void optimizeBranches();
404404 void alignBlocks();
405 bool shouldTailDuplicate(MachineBasicBlock *BB);
406 bool canTailDuplicateUnplacedPreds(
407 MachineBasicBlock *BB, MachineBasicBlock *Succ,
408 BlockChain &Chain, const BlockFilterSet *BlockFilter);
405409
406410 public:
407411 static char ID; // Pass identification, replacement for typeid
560564 return SuccProb;
561565 }
562566
567 /// Check if a block should be tail duplicated.
568 /// \p BB Block to check.
569 bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
570 // Blocks with single successors don't create additional fallthrough
571 // opportunities. Don't duplicate them. TODO: When conditional exits are
572 // analyzable, allow them to be duplicated.
573 bool IsSimple = TailDup.isSimpleBB(BB);
574
575 if (BB->succ_size() == 1)
576 return false;
577 return TailDup.shouldTailDuplicate(IsSimple, *BB);
578 }
579
580 /// When the option TailDupPlacement is on, this method checks if the
581 /// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
582 /// into all of its unplaced, unfiltered predecessors, that are not BB. In
583 /// addition we keep a set of blocks that have been tail-duplicated into and
584 /// allow those blocks to be unplaced as well. This allows the creation of a
585 /// second (larger) spine and a short fallthrough spine.
586 /// We also identify blocks with the CFG that would have been produced by
587 /// tail-duplication and lay them out in the same manner.
588 bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
589 MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
590 const BlockFilterSet *BlockFilter) {
591 if (!shouldTailDuplicate(Succ))
592 return false;
593
594 for (MachineBasicBlock *Pred : Succ->predecessors()) {
595 // Make sure all unplaced and unfiltered predecessors can be
596 // tail-duplicated into.
597 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
598 || BlockToChain[Pred] == &Chain)
599 continue;
600 if (!TailDup.canTailDuplicate(Succ, Pred))
601 return false;
602 }
603 return true;
604 }
605
563606 /// When the option OutlineOptionalBranches is on, this method
564607 /// checks if the fallthrough candidate block \p Succ (of block
565608 /// \p BB) also has other unscheduled predecessor blocks which
631674
632675 // There isn't a better layout when there are no unscheduled predecessors.
633676 if (SuccChain.UnscheduledPredecessors == 0)
677 return false;
678
679 // As a heuristic, if we can duplicate the block into all its unscheduled
680 // predecessors, we return false.
681 if (TailDupPlacement
682 && canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter))
634683 return false;
635684
636685 // There are two basic scenarios here:
19071956 DuplicatedToLPred = false;
19081957 DEBUG(dbgs() << "Redoing tail duplication for Succ#"
19091958 << BB->getNumber() << "\n");
1910 bool IsSimple = TailDup.isSimpleBB(BB);
1911 // Blocks with single successors don't create additional fallthrough
1912 // opportunities. Don't duplicate them. TODO: When conditional exits are
1913 // analyzable, allow them to be duplicated.
1914 if (!IsSimple && BB->succ_size() == 1)
1915 return false;
1916 if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
1959
1960 if (!shouldTailDuplicate(BB))
19171961 return false;
19181962 // This has to be a callback because none of it can be done after
19191963 // BB is deleted.
19662010 llvm::function_ref(RemovalCallback);
19672011
19682012 SmallVector DuplicatedPreds;
2013 bool IsSimple = TailDup.isSimpleBB(BB);
19692014 TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
19702015 &DuplicatedPreds, &RemovalCallbackRef);
19712016
139139
140140 test5:
141141 ; CHECK: cmn {{w[0-9]+}}, #444
142 ; CHECK: b.gt [[RET]]
142 ; CHECK: b.le [[TEST6:.?LBB[0-9]+_[0-9]+]]
143143 %newval5 = add i32 %val, 4
144144 store i32 %newval5, i32* @var_i32
145145 %cmp_neg_uge = icmp sgt i32 %val2, -444
146146 br i1 %cmp_neg_uge, label %ret, label %test6
147
148 ; CHECK: {{^}}[[RET]]:
149 ; CHECK: ret
150 ; CHECK: {{^}}[[TEST6]]:
151 ; CHECK: ret
147152
148153 test6:
149154 %newval6 = add i32 %val, 5
88 ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
99 ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
1010 ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
11 ; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
11 ; CHECK-NEXT: ret
1212 ; CHECK-NEXT: [[FAILBB]]:
1313 ; CHECK-NEXT: clrex
14 ; CHECK-NEXT: [[EXITBB]]:
14 ; CHECK-NEXT: ret
1515 %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
1616 %val = extractvalue { i32, i1 } %pair, 0
1717 ret i32 %val
2626 ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
2727 ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]
2828 ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
29 ; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
29 ; CHECK-NEXT: mov x0, x[[ADDR]]
30 ; CHECK-NEXT: ret
3031 ; CHECK-NEXT: [[FAILBB]]:
3132 ; CHECK-NEXT: clrex
32 ; CHECK-NEXT: [[EXITBB]]:
33 ; CHECK-NEXT: mov x0, x[[ADDR]]
34 ; CHECK-NEXT: ret
3335 %new = load i32, i32* %pnew
3436 %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
3537 %val = extractvalue { i32, i1 } %pair, 0
4042 ; CHECK-LABEL: val_compare_and_swap_rel:
4143 ; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
4244 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
43 ; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]
45 ; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]
4446 ; CHECK-NEXT: cmp [[RESULT]], w1
4547 ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
46 ; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]
48 ; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
4749 ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
48 ; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
50 ; CHECK-NEXT: ret
4951 ; CHECK-NEXT: [[FAILBB]]:
5052 ; CHECK-NEXT: clrex
51 ; CHECK-NEXT: [[EXITBB]]:
53 ; CHECK-NEXT: ret
5254 %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
5355 %val = extractvalue { i32, i1 } %pair, 0
5456 ret i32 %val
6365 ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
6466 ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
6567 ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
66 ; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
68 ; CHECK-NEXT: ret
6769 ; CHECK-NEXT: [[FAILBB]]:
6870 ; CHECK-NEXT: clrex
69 ; CHECK-NEXT: [[EXITBB]]:
71 ; CHECK-NEXT: ret
7072 %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
7173 %val = extractvalue { i64, i1 } %pair, 0
7274 ret i64 %val
107107 ; CHECK: cmp w0, #1
108108 ; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0
109109 ; CHECK: ccmp [[DIVRES]], #16, #0, ge
110 ; CHECK: b.gt [[BLOCK:LBB[0-9_]+]]
110 ; CHECK: b.le [[BLOCK:LBB[0-9_]+]]
111 ; CHECK: orr w0, wzr, #0x7
112 ; CHECK: [[BLOCK]]:
111113 ; CHECK: bl _foo
112 ; CHECK: [[BLOCK]]:
113 ; CHECK: orr w0, wzr, #0x7
114114 define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
115115 entry:
116116 %cmp = icmp sgt i32 %a, 0
134134 ; CHECK: cmp
135135 ; CHECK-NOT: b.
136136 ; CHECK: fccmp {{.*}}, #8, ge
137 ; CHECK: b.lt
137 ; CHECK: b.ge
138138 define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
139139 entry:
140140 %cmp = icmp sgt i32 %a, 0
345345 ; CHECK-NEXT: sub w1, w1, #1
346346 ; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]]
347347 ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
348 ; DISABLE-NEXT: b [[IFEND_LABEL]]
349 ;
350 ; DISABLE: [[ELSE_LABEL]]: ; %if.else
351 ; DISABLE: lsl w0, w1, #1
352 ;
353 ; CHECK: [[IFEND_LABEL]]:
348 ; CHECK-NEXT: [[IFEND_LABEL]]:
354349 ; Epilogue code.
355350 ; CHECK: add sp, sp, #16
356351 ; CHECK-NEXT: ret
357352 ;
358 ; ENABLE: [[ELSE_LABEL]]: ; %if.else
359 ; ENABLE-NEXT: lsl w0, w1, #1
360 ; ENABLE_NEXT: ret
353 ; CHECK: [[ELSE_LABEL]]: ; %if.else
354 ; CHECK-NEXT: lsl w0, w1, #1
355 ; DISABLE-NEXT: add sp, sp, #16
356 ; CHECK-NEXT: ret
361357 define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
362358 entry:
363359 %ap = alloca i8*, align 8
2626 %val4 = load volatile i64, i64* @var64
2727 %tst4 = icmp ne i64 %val4, 0
2828 br i1 %tst4, label %end, label %test5, !prof !1
29 ; CHECK: cbnz {{x[0-9]+}}, .LBB
29 ; CHECK: cbz {{x[0-9]+}}, .LBB
3030
3131 test5:
3232 store volatile i64 %val4, i64* @var64
209209
210210 test3:
211211 ; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12
212 ; CHECK: b.gt .L
212 ; CHECK: b.le .L
213213 %asr_op = ashr i64 %val2, 12
214214 %asr_and = and i64 %asr_op, %val1
215215 %tst3 = icmp sgt i64 %asr_and, 0
99 br i1 %cmp, label %if.then, label %if.end
1010
1111 ; CHECK: sub [[CMP:w[0-9]+]], w0, #12
12 ; CHECK: tbz [[CMP]], #31
12 ; CHECK: tbnz [[CMP]], #31
1313
1414 if.then:
1515 call void @t()
2727 br i1 %cmp, label %if.then, label %if.end
2828
2929 ; CHECK: sub [[CMP:x[0-9]+]], x0, #12
30 ; CHECK: tbz [[CMP]], #63
30 ; CHECK: tbnz [[CMP]], #63
3131
3232 if.then:
3333 call void @t()
117117 br i1 %cmp, label %if.then, label %if.end
118118
119119 ; CHECK: sub [[CMP:w[0-9]+]], w0, #12
120 ; CHECK: tbz [[CMP]], #31
120 ; CHECK: tbnz [[CMP]], #31
121121
122122 if.then:
123123 call void @t()
177177 br i1 %tst, label %if.then, label %if.end
178178
179179 ; CHECK-NOT: cmp
180 ; CHECK: tbz x0, #63
180 ; CHECK: tbnz x0, #63
181181
182182 if.then:
183183 call void @t()
193193 br i1 %tst, label %if.then, label %if.end
194194
195195 ; CHECK-NOT: cmp
196 ; CHECK: tbz x0, #63
196 ; CHECK: tbnz x0, #63
197197
198198 if.then:
199199 call void @t()
208208
209209 ; CHECK: ldr [[CMP:x[0-9]+]], [x1]
210210 ; CHECK-NOT: cmp
211 ; CHECK: tbz [[CMP]], #63
211 ; CHECK: tbnz [[CMP]], #63
212212
213213 %val = load i64, i64* %ptr
214214 %tst = icmp slt i64 %val, 0
228228 br i1 %tst, label %if.then, label %if.end
229229
230230 ; CHECK-NOT: cmp
231 ; CHECK: tbz x0, #63
231 ; CHECK: tbnz x0, #63
232232
233233 if.then:
234234 call void @t()
246246
247247 ; CHECK: orr [[CMP:x[0-9]+]], x0, x1
248248 ; CHECK-NOT: cmp
249 ; CHECK: tbz [[CMP]], #63
249 ; CHECK: tbnz [[CMP]], #63
250250
251251 if.then:
252252 call void @t()
261261 br i1 %cond, label %if.end, label %if.then
262262
263263 ; CHECK-NOT: and
264 ; CHECK: tbnz w0, #0
264 ; CHECK: tbz w0, #0
265265
266266 if.then:
267267 call void @t()
334334 ; GCN-NEXT: ;;#ASMEND
335335
336336 ; GCN-NEXT: [[BB3]]: ; %bb3
337 ; GCN-NEXT: ;;#ASMSTART
338 ; GCN-NEXT: v_nop_e64
339 ; GCN-NEXT: ;;#ASMEND
340 ; GCN-NEXT: ;;#ASMSTART
341 ; GCN-NEXT: v_nop_e64
342 ; GCN-NEXT: ;;#ASMEND
337343 ; GCN-NEXT: s_endpgm
338344 define void @expand_requires_expand(i32 %cond0) #0 {
339345 bb0:
355361 br label %bb3
356362
357363 bb3:
364 ; These NOPs prevent tail-duplication-based outlining
365 ; from firing, which defeats the need to expand the branches and this test.
366 call void asm sideeffect
367 "v_nop_e64", ""() #0
368 call void asm sideeffect
369 "v_nop_e64", ""() #0
358370 ret void
359371 }
360372
384396
385397 ; GCN-NEXT: [[ENDIF]]: ; %endif
386398 ; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
399 ; GCN-NEXT: s_sleep 5
387400 ; GCN-NEXT: s_endpgm
388401 define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 {
389402 entry:
401414 br label %endif
402415
403416 endif:
417 ; layout can remove the split branch if it can copy the return block.
418 ; This call makes the return block long enough that it doesn't get copied.
419 call void @llvm.amdgcn.s.sleep(i32 5);
404420 ret void
405421 }
406422
3636 ; OPT-NOT: call i1 @llvm.amdgcn.loop
3737
3838 ; GCN-LABEL: {{^}}annotate_ret_noloop:
39 ; GCN: s_cbranch_scc1
39 ; GCN: s_cbranch_scc0 [[BODY:BB[0-9]+_[0-9]+]]
40 ; GCN: s_endpgm
41
42 ; GCN: {{^}}[[BODY]]:
4043 ; GCN: s_endpgm
4144 ; GCN: .Lfunc_end1
4245 define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
262262 ; CHECK-NEXT: s_endpgm
263263
264264 ; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]:
265 ; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]]
266
267 ; CHECK: [[PHIBB]]:
265 ; CHECK-NEXT: s_cbranch_scc1 [[BB8:BB[0-9]+_[0-9]+]]
266
268267 ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
269 ; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]
270
271 ; CHECK: ; %bb10
268 ; CHECK-NEXT: s_cbranch_vccnz [[BB10:BB[0-9]+_[0-9]+]]
269 ; CHECK-NEXT: s_branch [[END:BB[0-9]+_[0-9]+]]
270
271 ; CHECK [[BB8]]: ; %BB8
272 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 8
273 ; CHECK: buffer_store_dword
274 ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
275 ; CHECK-NEXT: s_cbranch_vccz [[END]]
276
277 ; CHECK: [[BB10]]: ; %bb10
272278 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9
273279 ; CHECK: buffer_store_dword
274280
275 ; CHECK: [[ENDBB]]:
276 ; CHECK-NEXT: s_endpgm
281 ; CHECK: [[END:BB[0-9]+_[0-9]+]]: ; %end
282 ; CHECK-NEXT: s_endpgm
283
277284 define amdgpu_ps void @phi_use_def_before_kill() #0 {
278285 bb:
279286 %tmp = fadd float undef, 1.000000e+00
251251 ; GCN: s_cmp_lt_i32 [[COND]], 1
252252 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
253253 ; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}}
254 ; GCN: s_cbranch_vccnz [[EXIT]]
254 ; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]]
255 ; GCN: {{^}}[[EXIT]]:
256 ; GCN: s_endpgm
257 ; GCN: {{^}}[[BODY]]:
255258 ; GCN: buffer_store
256 ; GCN: {{^}}[[EXIT]]:
257259 ; GCN: s_endpgm
258260 define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {
259261 bb:
301303 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
302304 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
303305 ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
304 ; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
305306 ; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0
306 ; GCN: s_cbranch_scc1 [[ENDIF_LABEL]]
307 ; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]]
308 ; GCN: s_endpgm
309 ; GCN: {{^}}[[IF_UNIFORM_LABEL]]:
307310 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
308311 ; GCN: buffer_store_dword [[ONE]]
309312 define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) {
327330
328331 ; GCN-LABEL: {{^}}divergent_inside_uniform:
329332 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
330 ; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
333 ; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
334 ; GCN: [[IF_LABEL]]:
331335 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
332336 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
333337 ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
334338 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
335339 ; GCN: buffer_store_dword [[ONE]]
336 ; GCN: [[ENDIF_LABEL]]:
337 ; GCN: s_endpgm
338340 define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
339341 entry:
340342 %u_cmp = icmp eq i32 %cond, 0
362364 ; GCN: buffer_store_dword [[ONE]]
363365 ; GCN: s_or_b64 exec, exec, [[MASK]]
364366 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
365 ; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]]
367 ; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]]
368 ; GCN: s_endpgm
369 ; GCN: [[IF_UNIFORM]]:
366370 ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
367371 ; GCN: buffer_store_dword [[TWO]]
368 ; GCN: [[EXIT]]:
369 ; GCN: s_endpgm
370372 define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
371373 entry:
372374 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
397399 ; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks:
398400 ; GCN: s_load_dword [[COND:s[0-9]+]]
399401 ; GCN: s_cmp_lt_i32 [[COND]], 1
400 ; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3
402 ; GCN: s_cbranch_scc1 [[FN:BB[0-9_]+]]
401403
402404 ; GCN: BB#1:
403405 ; GCN-NOT: cmp
404406 ; GCN: buffer_load_dword
405407 ; GCN: buffer_store_dword
406 ; GCN: s_cbranch_scc1 BB[[FNNUM]]_3
407
408 ; GCN: BB[[FNNUM]]_3:
409 ; GCN: s_endpgm
408 ; GCN: s_cbranch_scc0 [[BB7:BB[0-9_]+]]
409
410 ; GCN: [[FN]]:
411 ; GCN: s_endpgm
412
413 ; GCN: [[BB7]]:
414 ; GCN: s_endpgm
415
410416 define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
411417 bb:
412418 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
4848 ; V8-NEXT: beq
4949 ; V8-NEXT: %tailrecurse.switch
5050 ; V8: cmp
51 ; V8-NEXT: bne
52 ; V8-NEXT: b
53 ; The trailing space in the last line checks that the branch is unconditional
51 ; V8-NEXT: beq
52 ; V8-NEXT: %sw.epilog
53 ; V8-NEXT: bx lr
5454 switch i32 %and, label %sw.epilog [
5555 i32 1, label %sw.bb
5656 i32 3, label %sw.bb6
319319 ; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
320320 ; CHECK: cmp [[SUCCESS]], #0
321321 ; CHECK: bne [[LOOP_BB]]
322 ; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]]
322 ; CHECK: dmb ish
323 ; CHECK: bx lr
323324 ; CHECK: [[FAIL_BB]]:
324325 ; CHECK-NEXT: clrex
325 ; CHECK-NEXT: [[END_BB]]:
326326 ; CHECK: dmb ish
327327 ; CHECK: bx lr
328328
10441044 ; function there.
10451045 ; CHECK-ARM-NEXT: cmp r[[OLD]], r0
10461046 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
1047 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
1047 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
10481048 ; CHECK-NEXT: BB#2:
10491049 ; As above, r1 is a reasonable guess.
10501050 ; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
10511051 ; CHECK-NEXT: cmp [[STATUS]], #0
10521052 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
1053 ; CHECK-NEXT: b .LBB{{[0-9]+}}_4
1054 ; CHECK-NEXT: .LBB{{[0-9]+}}_3:
1053 ; CHECK-ARM: mov r0, r[[OLD]]
1054 ; CHECK: bx lr
1055 ; CHECK-NEXT: .LBB{{[0-9]+}}_4:
10551056 ; CHECK-NEXT: clrex
1056 ; CHECK-NEXT: .LBB{{[0-9]+}}_4:
10571057 ; CHECK-NOT: dmb
10581058 ; CHECK-NOT: mcr
10591059
10601060 ; CHECK-ARM: mov r0, r[[OLD]]
1061 ; CHECK-ARM-NEXT: bx lr
10611062 ret i8 %old
10621063 }
10631064
10771078 ; function there.
10781079 ; CHECK-ARM-NEXT: cmp r[[OLD]], r0
10791080 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
1080 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
1081 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
10811082 ; CHECK-NEXT: BB#2:
10821083 ; As above, r1 is a reasonable guess.
10831084 ; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
10841085 ; CHECK-NEXT: cmp [[STATUS]], #0
10851086 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
1086 ; CHECK-NEXT: b .LBB{{[0-9]+}}_4
1087 ; CHECK-NEXT: .LBB{{[0-9]+}}_3:
1087 ; CHECK-ARM: mov r0, r[[OLD]]
1088 ; CHECK: bx lr
1089 ; CHECK-NEXT: .LBB{{[0-9]+}}_4:
10881090 ; CHECK-NEXT: clrex
1089 ; CHECK-NEXT: .LBB{{[0-9]+}}_4:
10901091 ; CHECK-NOT: dmb
10911092 ; CHECK-NOT: mcr
10921093
10931094 ; CHECK-ARM: mov r0, r[[OLD]]
1095 ; CHECK-ARM-NEXT: bx lr
10941096 ret i16 %old
10951097 }
10961098
11091111 ; r0 below is a reasonable guess but could change: it certainly comes into the
11101112 ; function there.
11111113 ; CHECK-NEXT: cmp r[[OLD]], r0
1112 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
1114 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
11131115 ; CHECK-NEXT: BB#2:
11141116 ; As above, r1 is a reasonable guess.
11151117 ; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
11161118 ; CHECK-NEXT: cmp [[STATUS]], #0
11171119 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
1118 ; CHECK-NEXT: b .LBB{{[0-9]+}}_4
1119 ; CHECK-NEXT: .LBB{{[0-9]+}}_3:
1120 ; CHECK: str{{(.w)?}} r[[OLD]],
1121 ; CHECK-NEXT: bx lr
1122 ; CHECK-NEXT: .LBB{{[0-9]+}}_4:
11201123 ; CHECK-NEXT: clrex
1121 ; CHECK-NEXT: .LBB{{[0-9]+}}_4:
11221124 ; CHECK-NOT: dmb
11231125 ; CHECK-NOT: mcr
11241126
11251127 ; CHECK: str{{(.w)?}} r[[OLD]],
1128 ; CHECK-ARM-NEXT: bx lr
11261129 ret void
11271130 }
11281131
11471150 ; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
11481151 ; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
11491152 ; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]]
1150 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
1153 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
11511154 ; CHECK-NEXT: BB#2:
11521155 ; As above, r2, r3 is a reasonable guess.
11531156 ; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
11541157 ; CHECK-NEXT: cmp [[STATUS]], #0
11551158 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
1156 ; CHECK-NEXT: b .LBB{{[0-9]+}}_4
1157 ; CHECK-NEXT: .LBB{{[0-9]+}}_3:
1159 ; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
1160 ; CHECK-NEXT: pop
1161 ; CHECK-NEXT: .LBB{{[0-9]+}}_4:
11581162 ; CHECK-NEXT: clrex
1159 ; CHECK-NEXT: .LBB{{[0-9]+}}_4:
11601163 ; CHECK-NOT: dmb
11611164 ; CHECK-NOT: mcr
11621165
1212 ; CHECK-NEXT: dmb ish
1313 ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0]
1414 ; CHECK-NEXT: cmp [[SUCCESS]], #0
15 ; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
15 ; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]]
1616 ; CHECK-NEXT: BB#2:
17 ; CHECK-NEXT: dmb ish
1817 ; CHECK-NEXT: str r3, [r0]
1918 ; CHECK-NEXT: bx lr
2019 ; CHECK-NEXT: [[LDFAILBB]]:
2120 ; CHECK-NEXT: clrex
22 ; CHECK-NEXT: [[FAILBB]]:
21 ; CHECK-NEXT: str r3, [r0]
22 ; CHECK-NEXT: bx lr
23 ; CHECK-NEXT: [[SUCCESSBB]]:
24 ; CHECK-NEXT: dmb ish
2325 ; CHECK-NEXT: str r3, [r0]
2426 ; CHECK-NEXT: bx lr
2527
5151 ; CHECK-LABEL: f3:
5252 ; CHECK-NOT: sub
5353 ; CHECK: cmp
54 ; CHECK: blt
54 ; CHECK: bge
5555 %0 = load i32, i32* %offset, align 4
5656 %cmp = icmp slt i32 %0, %size
5757 %s = sub nsw i32 %0, %size
77 entry:
88 %0 = load i32, i32* @i, align 4
99 %1 = load i32, i32* @j, align 4
10 %cmp = icmp eq i32 %0, %1
10 %cmp = icmp ne i32 %0, %1
1111 ; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
1212 ; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
1313 ; 16: $[[LABEL]]:
14 br i1 %cmp, label %if.end, label %if.then
14 br i1 %cmp, label %if.then, label %if.end
1515
1616 if.then: ; preds = %entry
1717 store i32 1, i32* @result, align 4
55 define void @test() nounwind {
66 entry:
77 %0 = load i32, i32* @i, align 4
8 %cmp = icmp eq i32 %0, 10
9 br i1 %cmp, label %if.end, label %if.then
8 %cmp = icmp ne i32 %0, 10
9 br i1 %cmp, label %if.then, label %if.end
1010 ; 16: cmpi ${{[0-9]+}}, {{[0-9]+}}
1111 ; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
1212 ; 16: $[[LABEL]]:
88 entry:
99 %0 = load i32, i32* @i, align 4
1010 %1 = load i32, i32* @j, align 4
11 %cmp = icmp sgt i32 %0, %1
12 br i1 %cmp, label %if.end, label %if.then
11 %cmp = icmp sle i32 %0, %1
12 br i1 %cmp, label %if.then, label %if.end
1313 ; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
1414 ; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
1515 ; 16: $[[LABEL]]:
99 entry:
1010 %0 = load i32, i32* @j, align 4
1111 %1 = load i32, i32* @i, align 4
12 %cmp = icmp slt i32 %0, %1
13 br i1 %cmp, label %if.end, label %if.then
12 %cmp = icmp sge i32 %0, %1
13 br i1 %cmp, label %if.then, label %if.end
1414
1515 ; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
1616 ; MM32R6: slt ${{[0-9]+}}, ${{[0-9]+}}
66 entry:
77 %0 = load i32, i32* @j, align 4
88 %cmp = icmp eq i32 %0, 0
9 br i1 %cmp, label %if.then, label %if.end
9 br i1 %cmp, label %if.then, label %if.end, !prof !1
1010
1111 ; 16: bnez ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]]
1212 ; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}})
2020 ret void
2121 }
2222
23
23 !1 = !{!"branch_weights", i32 2, i32 1}
9090 ; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]]
9191 ; M2: or $3, $[[T3]], $[[T2]]
9292 ; M2: $[[BB0]]:
93 ; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]]
93 ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]]
9494 ; M2: nop
95 ; M2: sra $2, $4, 31
95 ; M2: jr $ra
96 ; M2: nop
9697 ; M2: $[[BB1]]:
9798 ; M2: jr $ra
98 ; M2: nop
99 ; M2: sra $2, $4, 31
99100
100101 ; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7
101102 ; 32R1-R5: not $[[T1:[0-9]+]], $7
176177 ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
177178 ; M3: or $3, $[[T7]], $[[T4]]
178179 ; M3: [[BB0]]:
179 ; M3: beqz $[[T3]], [[BB1:.LBB[0-9_]+]]
180 ; M3: bnez $[[T3]], [[BB1:.LBB[0-9_]+]]
180181 ; M3: nop
181 ; M3: dsra $2, $4, 63
182 ; M3: jr $ra
183 ; M3: nop
182184 ; M3: [[BB1]]:
183185 ; M3: jr $ra
184 ; M3: nop
186 ; M3: dsra $2, $4, 63
185187
186188 ; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7
187189 ; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1
55 %x = alloca i32, align 4
66 %0 = load i32, i32* %x, align 4
77 %cmp = icmp eq i32 %0, 0
8 br i1 %cmp, label %if.then, label %if.end
8 br i1 %cmp, label %if.then, label %if.end, !prof !1
99
1010 if.then:
1111 store i32 10, i32* %x, align 4
1616 }
1717
1818 ; CHECK: bnezc
19 !1 = !{!"branch_weights", i32 2, i32 1}
1616 %sum1 = add i32 %sumin, 1
1717 %val1 = load i32, i32* %ptr
1818 %p = icmp eq i32 %sumin, 0
19 br i1 %p, label %true, label %end
19 br i1 %p, label %true, label %end, !prof !1
2020 true:
2121 %sum2 = add i32 %sum1, 1
2222 %ptr2 = getelementptr i32, i32* %ptr, i32 1
5252 ret i32 %valmerge
5353 }
5454 declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
55
56 !1 = !{!"branch_weights", i32 2, i32 1}
0 ; RUN: llc -O2 -o - %s | FileCheck %s
1 target datalayout = "e-m:e-i64:64-n32:64"
2 target triple = "powerpc64le-grtev4-linux-gnu"
3
4 ; Intended layout:
5 ; The code for tail-duplication during layout will produce the layout:
6 ; test1
7 ; test2
8 ; body1 (with copy of test2)
9 ; body2
10 ; exit
11
12 ;CHECK-LABEL: tail_dup_break_cfg:
13 ;CHECK: mr [[TAGREG:[0-9]+]], 3
14 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
15 ;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]]
16 ;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2
17 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
18 ;CHECK-NEXT: bne 0, [[BODY2LABEL:[._0-9A-Za-z]+]]
19 ;CHECK-NEXT: b [[EXITLABEL:[._0-9A-Za-z]+]]
20 ;CHECK-NEXT: [[BODY1LABEL]]
21 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
22 ;CHECK-NEXT: beq 0, [[EXITLABEL]]
23 ;CHECK-NEXT: [[BODY2LABEL]]
24 ;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
25 ;CHECK: blr
26 define void @tail_dup_break_cfg(i32 %tag) {
27 entry:
28 br label %test1
29 test1:
30 %tagbit1 = and i32 %tag, 1
31 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
32 br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely
33 body1:
34 call void @a()
35 call void @a()
36 call void @a()
37 call void @a()
38 br label %test2
39 test2:
40 %tagbit2 = and i32 %tag, 2
41 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
42 br i1 %tagbit2eq0, label %exit, label %body2
43 body2:
44 call void @b()
45 call void @b()
46 call void @b()
47 call void @b()
48 br label %exit
49 exit:
50 ret void
51 }
52
53 declare void @a()
54 declare void @b()
55 declare void @c()
56 declare void @d()
57
58 !1 = !{!"branch_weights", i32 2, i32 1}
1818 ; The CHECK statements check for the whole string of tests and exit block,
1919 ; and then check that the correct test has been duplicated into the end of
2020 ; the optional blocks and that the optional blocks are in the correct order.
21 ;CHECK-LABEL: f:
21 ;CHECK-LABEL: straight_test:
2222 ; test1 may have been merged with entry
2323 ;CHECK: mr [[TAGREG:[0-9]+]], 3
2424 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
4646 ;CHECK-NEXT: [[OPT4LABEL]]
4747 ;CHECK: b [[EXITLABEL]]
4848
49 define void @f(i32 %tag) {
49 define void @straight_test(i32 %tag) {
5050 entry:
5151 br label %test1
5252 test1:
9393 ret void
9494 }
9595
96 ; The block then2 is not unavoidable, but since it can be tail-duplicated, it
97 ; should be placed as a fallthrough from test2 and copied.
98 ; CHECK-LABEL: avoidable_test:
99 ; CHECK: # %entry
100 ; CHECK: andi.
101 ; CHECK: # %test2
102 ; Make sure then2 falls through from test2
103 ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
104 ; CHECK: # %then2
105 ; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
106 ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
107 ; CHECK: # %end2
108 ; CHECK: # %else1
109 ; CHECK: bl a
110 ; CHECK: bl a
111 ; Make sure then2 was copied into else1
112 ; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
113 ; CHECK: # %else2
114 ; CHECK: bl c
115 define void @avoidable_test(i32 %tag) {
116 entry:
117 br label %test1
118 test1:
119 %tagbit1 = and i32 %tag, 1
120 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
121 br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely
122 else1:
123 call void @a()
124 call void @a()
125 br label %then2
126 test2:
127 %tagbit2 = and i32 %tag, 2
128 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
129 br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely
130 then2:
131 %tagbit3 = and i32 %tag, 4
132 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
133 br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely
134 else2:
135 call void @c()
136 br label %end2
137 end2:
138 ret void
139 end1:
140 call void @d()
141 ret void
142 }
143
96144 declare void @a()
97145 declare void @b()
98146 declare void @c()
99147 declare void @d()
148
149 !1 = !{!"branch_weights", i32 2, i32 1}
6565 ; CHECK: ba .LBB1_1
6666 ; CHECK: nop
6767 ; CHECK:.LBB1_1: ! %entry
68 ; CHECK: ba .LBB1_3
6968 ; CHECK: mov %g0, %i0
69 ; CHECK: cmp %i0, 0
70 ; CHECK: bne .LBB1_4
71 ; CHECK: ba .LBB1_5
7072 ; CHECK:.LBB1_2: ! Block address taken
7173 ; CHECK: mov 1, %i0
72 ; CHECK:.LBB1_3: ! %entry
73 ; CHECK: cmp %i0, 0
7474 ; CHECK: be .LBB1_5
75 ; CHECK: nop
75 ; CHECK:.LBB1_4:
76 ; CHECK: ba .LBB1_6
7677 }
7778 declare i8* @llvm.frameaddress(i32) #2
7879
296296 ; CHECK: iihf [[REG]], 2102030405
297297 ; CHECK: blah [[REG]]
298298 ; CHECK: br %r14
299 %cmp = icmp eq i32 %x, 0
299 %cmp = icmp ne i32 %x, 0
300300 %val = select i1 %cmp, i32 0, i32 2102030405
301301 call void asm sideeffect "blah $0", "h"(i32 %val)
302302 ret void
310310 ; CHECK: iilf [[REG]], 2102030405
311311 ; CHECK: blah [[REG]]
312312 ; CHECK: br %r14
313 %cmp = icmp eq i32 %x, 0
313 %cmp = icmp ne i32 %x, 0
314314 %val = select i1 %cmp, i32 0, i32 2102030405
315315 call void asm sideeffect "blah $0", "r"(i32 %val)
316316 ret void
296296 define void @f18(i8 *%ptr, i8 %alt, i32 %limit) {
297297 ; CHECK-LABEL: f18:
298298 ; CHECK: lb {{%r[0-5]}}, 0(%r2)
299 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
300 ; CHECK: [[LABEL]]:
299 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
300 ; CHECK: stc {{%r[0-5]}}, 0(%r2)
301 ; CHECK: br %r14
302 ; CHECK: [[LABEL]]:
303 ; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}}
301304 ; CHECK: stc {{%r[0-5]}}, 0(%r2)
302305 ; CHECK: br %r14
303306 %cond = icmp ult i32 %limit, 420
330333 ; FIXME: should use a normal load instead of CS.
331334 ; CHECK-LABEL: f20:
332335 ; CHECK: lb {{%r[0-9]+}}, 0(%r2)
333 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
334 ; CHECK: [[LABEL]]:
336 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
337 ; CHECK: stc {{%r[0-9]+}}, 0(%r2)
338 ; CHECK: br %r14
339 ; CHECK: [[LABEL]]:
340 ; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}}
335341 ; CHECK: stc {{%r[0-9]+}}, 0(%r2)
336342 ; CHECK: br %r14
337343 %cond = icmp ult i32 %limit, 420
296296 define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
297297 ; CHECK-LABEL: f18:
298298 ; CHECK: lh {{%r[0-5]}}, 0(%r2)
299 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
300 ; CHECK: [[LABEL]]:
299 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
300 ; CHECK: sth {{%r[0-5]}}, 0(%r2)
301 ; CHECK: br %r14
302 ; CHECK: [[LABEL]]:
303 ; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}}
301304 ; CHECK: sth {{%r[0-5]}}, 0(%r2)
302305 ; CHECK: br %r14
303306 %cond = icmp ult i32 %limit, 420
330333 ; FIXME: should use a normal load instead of CS.
331334 ; CHECK-LABEL: f20:
332335 ; CHECK: lh {{%r[0-9]+}}, 0(%r2)
333 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
334 ; CHECK: [[LABEL]]:
336 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
337 ; CHECK: sth {{%r[0-9]+}}, 0(%r2)
338 ; CHECK: br %r14
339 ; CHECK: [[LABEL]]:
340 ; CHECK: lr {{%r[0-9]+}}, {{%r[0-9]+}}
335341 ; CHECK: sth {{%r[0-9]+}}, 0(%r2)
336342 ; CHECK: br %r14
337343 %cond = icmp ult i32 %limit, 420
225225 define void @f14(i32 *%ptr, i32 %alt, i32 %limit) {
226226 ; CHECK-LABEL: f14:
227227 ; CHECK: l {{%r[0-5]}}, 0(%r2)
228 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
229 ; CHECK: [[LABEL]]:
228 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
229 ; CHECK: st {{%r[0-5]}}, 0(%r2)
230 ; CHECK: br %r14
231 ; CHECK: [[LABEL]]:
232 ; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}}
230233 ; CHECK: st {{%r[0-5]}}, 0(%r2)
231234 ; CHECK: br %r14
232235 %cond = icmp ult i32 %limit, 420
259262 ; FIXME: should use a normal load instead of CS.
260263 ; CHECK-LABEL: f16:
261264 ; CHECK: l {{%r[0-5]}}, 0(%r2)
262 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
263 ; CHECK: [[LABEL]]:
265 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
266 ; CHECK: st {{%r[0-5]}}, 0(%r2)
267 ; CHECK: br %r14
268 ; CHECK: [[LABEL]]:
269 ; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}}
264270 ; CHECK: st {{%r[0-5]}}, 0(%r2)
265271 ; CHECK: br %r14
266272 %cond = icmp ult i32 %limit, 420
123123 define void @f8(i64 *%ptr, i64 %alt, i32 %limit) {
124124 ; CHECK-LABEL: f8:
125125 ; CHECK: lg {{%r[0-5]}}, 0(%r2)
126 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
127 ; CHECK: [[LABEL]]:
126 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
127 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
128 ; CHECK: br %r14
129 ; CHECK: [[LABEL]]:
130 ; CHECK: lgr {{%r[0-5]}}, {{%r[0-5]}}
128131 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
129132 ; CHECK: br %r14
130133 %cond = icmp ult i32 %limit, 420
157160 ; FIXME: should use a normal load instead of CSG.
158161 ; CHECK-LABEL: f10:
159162 ; CHECK: lg {{%r[0-5]}}, 0(%r2)
160 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
161 ; CHECK: [[LABEL]]:
163 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
164 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
165 ; CHECK: br %r14
166 ; CHECK: [[LABEL]]:
167 ; CHECK: lgr {{%r[0-5]}}, {{%r[0-5]}}
162168 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
163169 ; CHECK: br %r14
164170 %cond = icmp ult i32 %limit, 420
155155 define void @f10(float *%ptr, float %alt, i32 %limit) {
156156 ; CHECK-LABEL: f10:
157157 ; CHECK: le {{%f[0-5]}}, 0(%r2)
158 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
158 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
159 ; CHECK: ste {{%f[0-5]}}, 0(%r2)
160 ; CHECK: br %r14
159161 ; CHECK: [[LABEL]]:
162 ; CHECK: ler {{%f[0-5]}}, {{%f[0-5]}}
160163 ; CHECK: ste {{%f[0-5]}}, 0(%r2)
161164 ; CHECK: br %r14
162165 %cond = icmp ult i32 %limit, 420
155155 define void @f10(double *%ptr, double %alt, i32 %limit) {
156156 ; CHECK-LABEL: f10:
157157 ; CHECK: ld {{%f[0-5]}}, 0(%r2)
158 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
158 ; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]]
159 ; CHECK: std {{%f[0-5]}}, 0(%r2)
160 ; CHECK: br %r14
159161 ; CHECK: [[LABEL]]:
162 ; CHECK: ldr {{%f[0-5]}}, {{%f[0-5]}}
160163 ; CHECK: std {{%f[0-5]}}, 0(%r2)
161164 ; CHECK: br %r14
162165 %cond = icmp ult i32 %limit, 420
1414 entry:
1515 %val = load i16 , i16 *@g
1616 %src2 = zext i16 %val to i32
17 %cond = icmp ult i32 %src1, %src2
18 br i1 %cond, label %exit, label %mulb
17 %cond = icmp uge i32 %src1, %src2
18 br i1 %cond, label %mulb, label %exit
1919 mulb:
2020 %mul = mul i32 %src1, %src1
2121 br label %exit
3333 entry:
3434 %val = load i16 , i16 *@g
3535 %src2 = zext i16 %val to i32
36 %cond = icmp slt i32 %src1, %src2
37 br i1 %cond, label %exit, label %mulb
36 %cond = icmp sge i32 %src1, %src2
37 br i1 %cond, label %mulb, label %exit
3838 mulb:
3939 %mul = mul i32 %src1, %src1
4040 br label %exit
5353 entry:
5454 %val = load i16 , i16 *@g
5555 %src2 = zext i16 %val to i32
56 %cond = icmp eq i32 %src1, %src2
57 br i1 %cond, label %exit, label %mulb
56 %cond = icmp ne i32 %src1, %src2
57 br i1 %cond, label %mulb, label %exit
5858 mulb:
5959 %mul = mul i32 %src1, %src1
6060 br label %exit
7373 entry:
7474 %val = load i16 , i16 *@g
7575 %src2 = zext i16 %val to i32
76 %cond = icmp ne i32 %src1, %src2
77 br i1 %cond, label %exit, label %mulb
76 %cond = icmp eq i32 %src1, %src2
77 br i1 %cond, label %mulb, label %exit
7878 mulb:
7979 %mul = mul i32 %src1, %src1
8080 br label %exit
9494 entry:
9595 %val = load i16 , i16 *@h, align 1
9696 %src2 = zext i16 %val to i32
97 %cond = icmp ult i32 %src1, %src2
98 br i1 %cond, label %exit, label %mulb
97 %cond = icmp uge i32 %src1, %src2
98 br i1 %cond, label %mulb, label %exit
9999 mulb:
100100 %mul = mul i32 %src1, %src1
101101 br label %exit
114114 entry:
115115 %val = load i16 , i16 *@g
116116 %src1 = zext i16 %val to i32
117 %cond = icmp ult i32 %src1, %src2
118 br i1 %cond, label %exit, label %mulb
117 %cond = icmp uge i32 %src1, %src2
118 br i1 %cond, label %mulb, label %exit
119119 mulb:
120120 %mul = mul i32 %src2, %src2
121121 br label %exit
1414 entry:
1515 %val = load i16 , i16 *@g
1616 %src2 = zext i16 %val to i64
17 %cond = icmp ult i64 %src1, %src2
18 br i1 %cond, label %exit, label %mulb
17 %cond = icmp uge i64 %src1, %src2
18 br i1 %cond, label %mulb, label %exit
1919 mulb:
2020 %mul = mul i64 %src1, %src1
2121 br label %exit
5353 entry:
5454 %val = load i16 , i16 *@g
5555 %src2 = zext i16 %val to i64
56 %cond = icmp eq i64 %src1, %src2
57 br i1 %cond, label %exit, label %mulb
56 %cond = icmp ne i64 %src1, %src2
57 br i1 %cond, label %mulb, label %exit
5858 mulb:
5959 %mul = mul i64 %src1, %src1
6060 br label %exit
7373 entry:
7474 %val = load i16 , i16 *@g
7575 %src2 = zext i16 %val to i64
76 %cond = icmp ne i64 %src1, %src2
77 br i1 %cond, label %exit, label %mulb
76 %cond = icmp eq i64 %src1, %src2
77 br i1 %cond, label %mulb, label %exit
7878 mulb:
7979 %mul = mul i64 %src1, %src1
8080 br label %exit
9494 entry:
9595 %val = load i16 , i16 *@h, align 1
9696 %src2 = zext i16 %val to i64
97 %cond = icmp ult i64 %src1, %src2
98 br i1 %cond, label %exit, label %mulb
97 %cond = icmp uge i64 %src1, %src2
98 br i1 %cond, label %mulb, label %exit
9999 mulb:
100100 %mul = mul i64 %src1, %src1
101101 br label %exit
114114 entry:
115115 %val = load i16 , i16 *@g
116116 %src1 = zext i16 %val to i64
117 %cond = icmp ult i64 %src1, %src2
118 br i1 %cond, label %exit, label %mulb
117 %cond = icmp uge i64 %src1, %src2
118 br i1 %cond, label %mulb, label %exit
119119 mulb:
120120 %mul = mul i64 %src2, %src2
121121 br label %exit
472472 %xor = xor i32 %val, 1
473473 %add = add i32 %xor, 1000000
474474 call void @foo()
475 %cmp = icmp ne i32 %add, 0
476 br i1 %cmp, label %exit, label %store
475 %cmp = icmp eq i32 %add, 0
476 br i1 %cmp, label %store, label %exit, !prof !1
477477
478478 store:
479479 store i32 %add, i32 *%ptr
887887 exit:
888888 ret i64 %res
889889 }
890
891 !1 = !{!"branch_weights", i32 2, i32 1}
5151 define double @f3(i8 *%src, double %a, double %b) {
5252 ; CHECK-LABEL: f3:
5353 ; CHECK: tm 0(%r2), 1
54 ; CHECK: je {{\.L.*}}
54 ; CHECK: jne {{\.L.*}}
5555 ; CHECK: br %r14
5656 %byte = load i8 , i8 *%src
5757 %and = and i8 %byte, 1
7979 define double @f5(i8 *%src, double %a, double %b) {
8080 ; CHECK-LABEL: f5:
8181 ; CHECK: tm 0(%r2), 1
82 ; CHECK: jne {{\.L.*}}
82 ; CHECK: je {{\.L.*}}
8383 ; CHECK: br %r14
8484 %byte = load i8 , i8 *%src
8585 %and = and i8 %byte, 1
9292 define double @f6(i8 *%src, double %a, double %b) {
9393 ; CHECK-LABEL: f6:
9494 ; CHECK: tm 0(%r2), 254
95 ; CHECK: jo {{\.L.*}}
95 ; CHECK: jno {{\.L.*}}
9696 ; CHECK: br %r14
9797 %byte = load i8 , i8 *%src
9898 %and = and i8 %byte, 254
105105 define double @f7(i8 *%src, double %a, double %b) {
106106 ; CHECK-LABEL: f7:
107107 ; CHECK: tm 0(%r2), 254
108 ; CHECK: jno {{\.L.*}}
108 ; CHECK: jo {{\.L.*}}
109109 ; CHECK: br %r14
110110 %byte = load i8 , i8 *%src
111111 %and = and i8 %byte, 254
120120 ; CHECK-LABEL: f8:
121121 ; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
122122 ; CHECK: tmll [[REG]], 3
123 ; CHECK: jh {{\.L.*}}
123 ; CHECK: jnh {{\.L.*}}
124124 ; CHECK: br %r14
125125 %byte = load i8 , i8 *%src
126126 %and = and i8 %byte, 3
134134 ; CHECK-LABEL: f9:
135135 ; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
136136 ; CHECK: tmll [[REG]], 3
137 ; CHECK: jl {{\.L.*}}
137 ; CHECK: jnl {{\.L.*}}
138138 ; CHECK: br %r14
139139 %byte = load i8 , i8 *%src
140140 %and = and i8 %byte, 3
147147 define double @f10(i8 *%src, double %a, double %b) {
148148 ; CHECK-LABEL: f10:
149149 ; CHECK: tm 4095(%r2), 1
150 ; CHECK: je {{\.L.*}}
150 ; CHECK: jne {{\.L.*}}
151151 ; CHECK: br %r14
152152 %ptr = getelementptr i8, i8 *%src, i64 4095
153153 %byte = load i8 , i8 *%ptr
161161 define double @f11(i8 *%src, double %a, double %b) {
162162 ; CHECK-LABEL: f11:
163163 ; CHECK: tmy 4096(%r2), 1
164 ; CHECK: je {{\.L.*}}
164 ; CHECK: jne {{\.L.*}}
165165 ; CHECK: br %r14
166166 %ptr = getelementptr i8, i8 *%src, i64 4096
167167 %byte = load i8 , i8 *%ptr
175175 define double @f12(i8 *%src, double %a, double %b) {
176176 ; CHECK-LABEL: f12:
177177 ; CHECK: tmy 524287(%r2), 1
178 ; CHECK: je {{\.L.*}}
178 ; CHECK: jne {{\.L.*}}
179179 ; CHECK: br %r14
180180 %ptr = getelementptr i8, i8 *%src, i64 524287
181181 %byte = load i8 , i8 *%ptr
190190 ; CHECK-LABEL: f13:
191191 ; CHECK: agfi %r2, 524288
192192 ; CHECK: tm 0(%r2), 1
193 ; CHECK: je {{\.L.*}}
193 ; CHECK: jne {{\.L.*}}
194194 ; CHECK: br %r14
195195 %ptr = getelementptr i8, i8 *%src, i64 524288
196196 %byte = load i8 , i8 *%ptr
204204 define double @f14(i8 *%src, double %a, double %b) {
205205 ; CHECK-LABEL: f14:
206206 ; CHECK: tmy -524288(%r2), 1
207 ; CHECK: je {{\.L.*}}
207 ; CHECK: jne {{\.L.*}}
208208 ; CHECK: br %r14
209209 %ptr = getelementptr i8, i8 *%src, i64 -524288
210210 %byte = load i8 , i8 *%ptr
219219 ; CHECK-LABEL: f15:
220220 ; CHECK: agfi %r2, -524289
221221 ; CHECK: tm 0(%r2), 1
222 ; CHECK: je {{\.L.*}}
222 ; CHECK: jne {{\.L.*}}
223223 ; CHECK: br %r14
224224 %ptr = getelementptr i8, i8 *%src, i64 -524289
225225 %byte = load i8 , i8 *%ptr
233233 define double @f16(i8 *%src, i64 %index, double %a, double %b) {
234234 ; CHECK-LABEL: f16:
235235 ; CHECK: tm 0({{%r[1-5]}}), 1
236 ; CHECK: je {{\.L.*}}
236 ; CHECK: jne {{\.L.*}}
237237 ; CHECK: br %r14
238238 %ptr = getelementptr i8, i8 *%src, i64 %index
239239 %byte = load i8 , i8 *%ptr
2525 nonzeroord:
2626 ; CHECK: lhi %r2, 2
2727 ; CHECK: tcdb %f0, 48
28 ; CHECK: jl [[RET]]
28 ; CHECK: je [[FINITE:.]]
2929 %abs = tail call double @llvm.fabs.f64(double %x)
3030 %testinf = fcmp oeq double %abs, 0x7FF0000000000000
3131 br i1 %testinf, label %ret, label %finite, !prof !1
32
33 ret:
34 ; CHECK: [[RET]]:
35 ; CHECK: br %r14
36 %res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ]
37 ret i32 %res
3238
3339 finite:
3440 ; CHECK: lhi %r2, 3
3541 ; CHECK: tcdb %f0, 831
3642 ; CHECK: blr %r14
3743 ; CHECK: lhi %r2, 4
44 ; CHECK: br %r14
3845 %testnormal = fcmp uge double %abs, 0x10000000000000
3946 %finres = select i1 %testnormal, i32 3, i32 4
4047 br label %ret
4148
42 ret:
43 ; CHECK: [[RET]]:
44 ; CHECK: br %r14
45 %res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ]
46 ret i32 %res
4749 }
4850
4951 !1 = !{!"branch_weights", i32 1, i32 1}
None ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
0 ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \
11 ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T
2 ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
2 ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \
33 ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T
4 ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
4 ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \
55 ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T
6 ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
6 ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \
77 ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T
8
89 ;
910 ; Note: Lots of tests use inline asm instead of regular calls.
1011 ; This allows to have a better control on what the allocation will do.
1415 ; edges.
1516 ; Also disable the late if-converter as it makes harder to reason on
1617 ; the diffs.
18 ; Disable tail-duplication during placement, as v4t vs v5t get different
19 ; results due to branches not being analyzable under v5
1720
1821 ; Initial motivating example: Simple diamond with a call just on one side.
1922 ; CHECK-LABEL: foo:
2525 call void @x()
2626 call void @x()
2727 call void @x()
28 ; CHECK: cbnz
28 ; CHECK: cbz
2929 %q = icmp eq i32 %y, 0
3030 br i1 %q, label %t2, label %f
3131
33
44 define void @f0(i32 %x) optsize {
55 ; CHECK-LABEL: f0:
6 ; CHECK: cbnz
6 ; CHECK: cbz
77 %p = icmp eq i32 %x, 0
88 br i1 %p, label %t, label %f
99
1111
1212 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string,std::allocator >"* %this, %"struct.std::basic_string,std::allocator >"* %__str) {
1313 ; CHECK-LABEL: _ZNKSs7compareERKSs:
14 ; CHECK: cbnz r0,
14 ; CHECK: cbz r0,
15 ; CHECK-NEXT: %bb1
16 ; CHECK-NEXT: pop.w
1517 ; CHECK-NEXT: %bb
1618 ; CHECK-NEXT: sub{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
17 ; CHECK-NEXT: %bb1
1819 ; CHECK-NEXT: pop.w
1920 entry:
2021 %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %this) ; [#uses=3]
77 ; Basic phi triangle.
88
99 ; CHECK-LABEL: test0:
10 ; CHECK: div_s $[[NUM0:[0-9]+]]=, $0, $pop[[NUM1:[0-9]+]]{{$}}
11 ; CHECK: return $[[NUM0]]{{$}}
10 ; CHECK: return $0
11 ; CHECK: div_s $push[[NUM0:[0-9]+]]=, $0, $pop[[NUM1:[0-9]+]]{{$}}
12 ; CHECK: return $pop[[NUM0]]{{$}}
1213 define i32 @test0(i32 %p) {
1314 entry:
1415 %t = icmp slt i32 %p, 0
33
44 define i32 @a(i32 %x) nounwind {
55 entry:
6 %cmp = icmp ult i32 %x, -2147483648 ; [#uses=1]
7 br i1 %cmp, label %if.end, label %if.then
6 %cmp = icmp uge i32 %x, -2147483648 ; [#uses=1]
7 br i1 %cmp, label %if.then, label %if.end
88
99 if.then: ; preds = %entry
1010 %call = call i32 (...) @b() ; [#uses=0]
2929 %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
3030 %sum = extractvalue {i32, i1} %t, 0
3131 %obit = extractvalue {i32, i1} %t, 1
32 br i1 %obit, label %overflow, label %normal
32 %notobit = xor i1 1, %obit
33 br i1 %notobit, label %normal, label %overflow
3334
3435 normal:
3536 store i32 0, i32* %X
5253 %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
5354 %sum = extractvalue {i32, i1} %t, 0
5455 %obit = extractvalue {i32, i1} %t, 1
55 br i1 %obit, label %carry, label %normal
56 %notobit = xor i1 1, %obit
57 br i1 %notobit, label %normal, label %carry
5658
5759 normal:
5860 store i32 0, i32* %X
6161 ; CHECK-NEXT: xorl %eax, %eax
6262 ; CHECK-NEXT: ## implicit-def: %YMM0
6363 ; CHECK-NEXT: testb %al, %al
64 ; CHECK-NEXT: jne LBB4_2
65 ; CHECK-NEXT: ## BB#1: ## %load.i1247
64 ; CHECK-NEXT: je LBB4_1
65 ; CHECK-NEXT: ## BB#2: ## %__load_and_broadcast_32.exit1249
66 ; CHECK-NEXT: retq
67 ; CHECK-NEXT: LBB4_1: ## %load.i1247
6668 ; CHECK-NEXT: pushq %rbp
6769 ; CHECK-NEXT: movq %rsp, %rbp
6870 ; CHECK-NEXT: andq $-32, %rsp
7072 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
7173 ; CHECK-NEXT: movq %rbp, %rsp
7274 ; CHECK-NEXT: popq %rbp
73 ; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249
7475 ; CHECK-NEXT: retq
7576 allocas:
7677 %udx495 = alloca [18 x [18 x float]], align 32
6868 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
6969 ; ALL-NEXT: vucomiss %xmm1, %xmm0
7070 ; ALL-NEXT: jne LBB3_1
71 ; ALL-NEXT: jnp LBB3_2
71 ; ALL-NEXT: jp LBB3_1
72 ; ALL-NEXT: ## BB#2: ## %return
73 ; ALL-NEXT: retq
7274 ; ALL-NEXT: LBB3_1: ## %if.end
7375 ; ALL-NEXT: seta %al
7476 ; ALL-NEXT: movzbl %al, %eax
7577 ; ALL-NEXT: leaq {{.*}}(%rip), %rcx
7678 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
77 ; ALL-NEXT: LBB3_2: ## %return
7879 ; ALL-NEXT: retq
7980 entry:
8081 %cmp = fcmp oeq float %p, 0.000000e+00
4848 %tmp29 = lshr i32 %x, %n
4949 %tmp3 = and i32 1, %tmp29
5050 %tmp4 = icmp eq i32 %tmp3, 0
51 br i1 %tmp4, label %bb, label %UnifiedReturnBlock
51 br i1 %tmp4, label %bb, label %UnifiedReturnBlock, !prof !1
5252
5353 bb:
5454 call void @foo()
8888 %tmp29 = ashr i32 %x, %n
8989 %tmp3 = and i32 1, %tmp29
9090 %tmp4 = icmp eq i32 %tmp3, 0
91 br i1 %tmp4, label %bb, label %UnifiedReturnBlock
91 br i1 %tmp4, label %bb, label %UnifiedReturnBlock, !prof !1
9292
9393 bb:
9494 call void @foo()
108108 %tmp29 = shl i32 1, %n
109109 %tmp3 = and i32 %tmp29, %x
110110 %tmp4 = icmp eq i32 %tmp3, 0
111 br i1 %tmp4, label %bb, label %UnifiedReturnBlock
111 br i1 %tmp4, label %bb, label %UnifiedReturnBlock, !prof !1
112112
113113 bb:
114114 call void @foo()
128128 %tmp29 = shl i32 1, %n
129129 %tmp3 = and i32 %x, %tmp29
130130 %tmp4 = icmp eq i32 %tmp3, 0
131 br i1 %tmp4, label %bb, label %UnifiedReturnBlock
131 br i1 %tmp4, label %bb, label %UnifiedReturnBlock, !prof !1
132132
133133 bb:
134134 call void @foo()
607607 %tobool = icmp ne i64 %and1, 0
608608 ret i1 %tobool
609609 }
610
611 !1 = !{!"branch_weights", i32 2, i32 1}
2323
2424 ; CHECK-LABEL: test1:
2525 ; CHECK: testb %dil, %dil
26 ; CHECK: jne LBB0_2
26 ; CHECK: je LBB0_1
27 ; CHECK: retq
28 ; CHECK: LBB0_1:
2729 ; CHECK: divl
28 ; CHECK: LBB0_2:
3535
3636 entry:
3737 %mul = fmul double %x, %y
38 %cmp = fcmp une double %mul, 0.000000e+00
39 br i1 %cmp, label %bb2, label %bb1
38 %cmp = fcmp oeq double %mul, 0.000000e+00
39 br i1 %cmp, label %bb1, label %bb2
4040
4141 bb1:
4242 %add = fadd double %mul, -1.000000e+00
55 ; CHECK: jns
66 %tmp1 = add i32 %X, 1 ; [#uses=1]
77 %tmp = icmp slt i32 %tmp1, 0 ; [#uses=1]
8 br i1 %tmp, label %cond_true, label %cond_next
8 br i1 %tmp, label %cond_true, label %cond_next, !prof !1
99
1010 cond_true: ; preds = %entry
1111 %tmp2 = tail call i32 (...) @bar( ) ; [#uses=0]
302302 if.end:
303303 ret i32 undef
304304 }
305
306 !1 = !{!"branch_weights", i32 2, i32 1}
8585 ; CHECK-LABEL: cross_mbb_phys_cse:
8686 ; CHECK: cmpl
8787 ; CHECK: ja
88 %cmp = icmp ugt i32 %a, %b
89 br i1 %cmp, label %return, label %if.end
88 %cmp = icmp ule i32 %a, %b
89 br i1 %cmp, label %if.end, label %return
9090
9191 if.end: ; preds = %entry
9292 ; CHECK-NOT: cmpl
1313 ; CHECK-NEXT: shll %cl, %eax
1414 ; CHECK-NEXT: shldl %cl, %esi, %edx
1515 ; CHECK-NEXT: testb $32, %cl
16 ; CHECK-NEXT: je .LBB0_2
17 ; CHECK-NEXT: # BB#1:
16 ; CHECK-NEXT: jne .LBB0_1
17 ; CHECK-NEXT: # BB#2:
18 ; CHECK-NEXT: popl %esi
19 ; CHECK-NEXT: retl
20 ; CHECK-NEXT: .LBB0_1:
1821 ; CHECK-NEXT: movl %eax, %edx
1922 ; CHECK-NEXT: xorl %eax, %eax
20 ; CHECK-NEXT: .LBB0_2:
2123 ; CHECK-NEXT: popl %esi
2224 ; CHECK-NEXT: retl
2325 %shift.upgrd.1 = zext i8 %C to i64 ; [#uses=1]
3638 ; CHECK-NEXT: sarl %cl, %edx
3739 ; CHECK-NEXT: shrdl %cl, %esi, %eax
3840 ; CHECK-NEXT: testb $32, %cl
39 ; CHECK-NEXT: je .LBB1_2
40 ; CHECK-NEXT: # BB#1:
41 ; CHECK-NEXT: jne .LBB1_1
42 ; CHECK-NEXT: # BB#2:
43 ; CHECK-NEXT: popl %esi
44 ; CHECK-NEXT: retl
45 ; CHECK-NEXT: .LBB1_1:
4146 ; CHECK-NEXT: sarl $31, %esi
4247 ; CHECK-NEXT: movl %edx, %eax
4348 ; CHECK-NEXT: movl %esi, %edx
44 ; CHECK-NEXT: .LBB1_2:
4549 ; CHECK-NEXT: popl %esi
4650 ; CHECK-NEXT: retl
4751 %shift.upgrd.2 = zext i8 %C to i64 ; [#uses=1]
6064 ; CHECK-NEXT: shrl %cl, %edx
6165 ; CHECK-NEXT: shrdl %cl, %esi, %eax
6266 ; CHECK-NEXT: testb $32, %cl
63 ; CHECK-NEXT: je .LBB2_2
64 ; CHECK-NEXT: # BB#1:
67 ; CHECK-NEXT: jne .LBB2_1
68 ; CHECK-NEXT: # BB#2:
69 ; CHECK-NEXT: popl %esi
70 ; CHECK-NEXT: retl
71 ; CHECK-NEXT: .LBB2_1:
6572 ; CHECK-NEXT: movl %edx, %eax
6673 ; CHECK-NEXT: xorl %edx, %edx
67 ; CHECK-NEXT: .LBB2_2:
6874 ; CHECK-NEXT: popl %esi
6975 ; CHECK-NEXT: retl
7076 %shift.upgrd.3 = zext i8 %C to i64 ; [#uses=1]
2525
2626 ; CHECK-LABEL: split:
2727 ; CHECK-NEXT: testb $1, %dil
28 ; CHECK-NEXT: je
28 ; CHECK-NEXT: jne
29 ; CHECK: ret
2930 ; CHECK: divsd
3031 ; CHECK: movapd
3132 ; CHECK: ret
11091109 ; AVX1-LABEL: add_ss_mask:
11101110 ; AVX1: # BB#0:
11111111 ; AVX1-NEXT: testb $1, %dil
1112 ; AVX1-NEXT: je .LBB62_2
1113 ; AVX1-NEXT: # BB#1:
1112 ; AVX1-NEXT: jne .LBB62_1
1113 ; AVX1-NEXT: # BB#2:
1114 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
1115 ; AVX1-NEXT: retq
1116 ; AVX1-NEXT: .LBB62_1:
11141117 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2
1115 ; AVX1-NEXT: .LBB62_2:
11161118 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
11171119 ; AVX1-NEXT: retq
11181120 ;
11641166 ; AVX1-LABEL: add_sd_mask:
11651167 ; AVX1: # BB#0:
11661168 ; AVX1-NEXT: testb $1, %dil
1167 ; AVX1-NEXT: je .LBB63_2
1168 ; AVX1-NEXT: # BB#1:
1169 ; AVX1-NEXT: jne .LBB63_1
1170 ; AVX1-NEXT: # BB#2:
1171 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1172 ; AVX1-NEXT: retq
1173 ; AVX1-NEXT: .LBB63_1:
11691174 ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2
1170 ; AVX1-NEXT: .LBB63_2:
11711175 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
11721176 ; AVX1-NEXT: retq
11731177 ;
88 entry:
99 %and = and i32 %flags, 512
1010 %tobool = icmp eq i32 %and, 0
11 br i1 %tobool, label %if.end, label %if.then
11 br i1 %tobool, label %if.end, label %if.then, !prof !1
1212
1313 if.then:
1414 br label %if.end
1717 %hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ]
1818 ret i32 %hasflag
1919 }
20 !1 = !{!"branch_weights", i32 1, i32 2}