llvm.org GIT mirror llvm / aaf3f19
[LoopReroll] Reroll loops with unordered atomic memory accesses Reviewers: hfinkel, jfb, reames Subscribers: mcrosier, mzolotukhin, llvm-commits Differential Revision: https://reviews.llvm.org/D22385 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275932 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjoy Das 3 years ago
2 changed file(s) with 138 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
738738 collectInLoopUserSet(Root, Exclude, Final, Users);
739739 }
740740
741 static bool isSimpleLoadStore(Instruction *I) {
741 static bool isUnorderedLoadStore(Instruction *I) {
742742 if (LoadInst *LI = dyn_cast(I))
743 return LI->isSimple();
743 return LI->isUnordered();
744744 if (StoreInst *SI = dyn_cast(I))
745 return SI->isSimple();
745 return SI->isUnordered();
746746 if (MemIntrinsic *MI = dyn_cast(I))
747747 return !MI->isVolatile();
748748 return false;
12821282 // which while a valid (somewhat arbitrary) micro-optimization, is
12831283 // needed because otherwise isSafeToSpeculativelyExecute returns
12841284 // false on PHI nodes.
1285 if (!isa(I) && !isSimpleLoadStore(I) &&
1285 if (!isa(I) && !isUnorderedLoadStore(I) &&
12861286 !isSafeToSpeculativelyExecute(I))
12871287 // Intervening instructions cause side effects.
12881288 FutureSideEffects = true;
13121312 // If we've past an instruction from a future iteration that may have
13131313 // side effects, and this instruction might also, then we can't reorder
13141314 // them, and this matching fails. As an exception, we allow the alias
1315 // set tracker to handle regular (simple) load/store dependencies.
1316 if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) &&
1315 // set tracker to handle regular (unordered) load/store dependencies.
1316 if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
13171317 !isSafeToSpeculativelyExecute(BaseInst)) ||
1318 (!isSimpleLoadStore(RootInst) &&
1318 (!isUnorderedLoadStore(RootInst) &&
13191319 !isSafeToSpeculativelyExecute(RootInst)))) {
13201320 DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
13211321 " vs. " << *RootInst <<
575575 }
576576
577577
578 define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
579 ; CHECK-LABEL: @unordered_atomic_ops(
580
581 ; CHECK: for.body:
582 ; CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
583 ; CHECK-NEXT: %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvar
584 ; CHECK-NEXT: %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvar
585 ; CHECK-NEXT: %va = load atomic i32, i32* %buf0_a unordered, align 4
586 ; CHECK-NEXT: store atomic i32 %va, i32* %buf1_a unordered, align 4
587 ; CHECK-NEXT: %indvar.next = add i32 %indvar, 1
588 ; CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 3199
589 ; CHECK-NEXT: br i1 %exitcond, label %for.end, label %for.body
590
591 entry:
592 br label %for.body
593
594 for.body:
595 %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
596 %indvars.iv.next = add i32 %indvars.iv, 2
597 %indvars.mid = add i32 %indvars.iv, 1
598 %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
599 %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
600 %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
601 %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
602 %va = load atomic i32, i32* %buf0_a unordered, align 4
603 %vb = load atomic i32, i32* %buf0_b unordered, align 4
604 store atomic i32 %va, i32* %buf1_a unordered, align 4
605 store atomic i32 %vb, i32* %buf1_b unordered, align 4
606 %cmp = icmp slt i32 %indvars.iv.next, 3200
607 br i1 %cmp, label %for.body, label %for.end
608
609 for.end:
610 ret void
611 }
612
613 define void @unordered_atomic_ops_nomatch(i32* noalias %buf_0, i32* noalias %buf_1) {
614 ; Negative test
615
616 ; CHECK-LABEL: @unordered_atomic_ops_nomatch(
617 entry:
618 br label %for.body
619
620 for.body:
621 ; CHECK: for.body:
622 ; CHECK: %indvars.iv.next = add i32 %indvars.iv, 2
623 ; CHECK: %indvars.mid = add i32 %indvars.iv, 1
624 ; CHECK: %cmp = icmp slt i32 %indvars.iv.next, 3200
625 ; CHECK: br i1 %cmp, label %for.body, label %for.end
626
627 %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
628 %indvars.iv.next = add i32 %indvars.iv, 2
629 %indvars.mid = add i32 %indvars.iv, 1
630 %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
631 %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
632 %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
633 %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
634 %va = load atomic i32, i32* %buf0_a unordered, align 4
635 %vb = load atomic i32, i32* %buf0_b unordered, align 4
636 store i32 %va, i32* %buf1_a, align 4 ;; Not atomic
637 store atomic i32 %vb, i32* %buf1_b unordered, align 4
638 %cmp = icmp slt i32 %indvars.iv.next, 3200
639 br i1 %cmp, label %for.body, label %for.end
640
641 for.end:
642 ret void
643 }
644
645 define void @ordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
646 ; Negative test
647
648 ; CHECK-LABEL: @ordered_atomic_ops(
649 entry:
650 br label %for.body
651
652 for.body:
653 ; CHECK: for.body:
654 ; CHECK: %indvars.iv.next = add i32 %indvars.iv, 2
655 ; CHECK: %indvars.mid = add i32 %indvars.iv, 1
656 ; CHECK: %cmp = icmp slt i32 %indvars.iv.next, 3200
657 ; CHECK: br i1 %cmp, label %for.body, label %for.end
658
659 %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
660 %indvars.iv.next = add i32 %indvars.iv, 2
661 %indvars.mid = add i32 %indvars.iv, 1
662 %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
663 %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
664 %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
665 %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
666 %va = load atomic i32, i32* %buf0_a acquire, align 4
667 %vb = load atomic i32, i32* %buf0_b acquire, align 4
668 store atomic i32 %va, i32* %buf1_a release, align 4
669 store atomic i32 %vb, i32* %buf1_b release, align 4
670 %cmp = icmp slt i32 %indvars.iv.next, 3200
671 br i1 %cmp, label %for.body, label %for.end
672
673 for.end:
674 ret void
675 }
676
677 define void @unordered_atomic_ops_with_fence(i32* noalias %buf_0, i32* noalias %buf_1) {
678 ; CHECK-LABEL: @unordered_atomic_ops_with_fence(
679 entry:
680 br label %for.body
681
682 for.body:
683 ; CHECK: for.body:
684 ; CHECK: %va = load atomic i32, i32* %buf0_a unordered, align 4
685 ; CHECK-NEXT: %vb = load atomic i32, i32* %buf0_b unordered, align 4
686 ; CHECK-NEXT: fence seq_cst
687 ; CHECK-NEXT: store atomic i32 %va, i32* %buf1_a unordered, align 4
688 ; CHECK-NEXT: store atomic i32 %vb, i32* %buf1_b unordered, align 4
689
690 %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
691 %indvars.iv.next = add i32 %indvars.iv, 2
692 %indvars.mid = add i32 %indvars.iv, 1
693 %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
694 %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
695 %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
696 %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
697 %va = load atomic i32, i32* %buf0_a unordered, align 4
698 %vb = load atomic i32, i32* %buf0_b unordered, align 4
699 fence seq_cst
700 store atomic i32 %va, i32* %buf1_a unordered, align 4
701 store atomic i32 %vb, i32* %buf1_b unordered, align 4
702 %cmp = icmp slt i32 %indvars.iv.next, 3200
703 br i1 %cmp, label %for.body, label %for.end
704
705 for.end:
706 ret void
707 }
708
578709 attributes #0 = { nounwind uwtable }
579710 attributes #1 = { nounwind }
580711