llvm.org GIT mirror llvm / c7506ed
[AArch64] Mark mrs of TPIDR_EL0 (thread pointer) as not having side effects. Among other things, this allows Machine LICM to hoist a costly 'mrs' instruction from within a loop. Differential Revision: http://reviews.llvm.org/D31151 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298851 91177308-0d34-0410-b5e6-96231b3b80d8 Chad Rosier 3 years ago
3 changed file(s) with 72 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
1616 #include "MCTargetDesc/AArch64AddressingModes.h"
1717 #include "AArch64InstrInfo.h"
1818 #include "AArch64Subtarget.h"
19 #include "Utils/AArch64BaseInfo.h"
1920 #include "llvm/CodeGen/LivePhysRegs.h"
2021 #include "llvm/CodeGen/MachineFunctionPass.h"
2122 #include "llvm/CodeGen/MachineInstrBuilder.h"
888889 MI.eraseFromParent();
889890 return true;
890891 }
892 case AArch64::MOVbaseTLS: {
893 unsigned DstReg = MI.getOperand(0).getReg();
894 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
895 .addImm(AArch64SysReg::TPIDR_EL0);
896 MI.eraseFromParent();
897 return true;
898 }
891899
892900 case AArch64::MOVi32imm:
893901 return expandMOVImm(MBB, MBBI, 32);
425425 def MSRpstateImm4 : MSRpstateImm0_15;
426426
427427 // The thread pointer (on Linux, at least, where this has been implemented) is
428 // TPIDR_EL0.
429 def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
428 // TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects.
429 let hasSideEffects = 0 in
430 def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
431 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>;
430432
431433 // The cycle counter PMC register is PMCCNTR_EL0.
432434 let Predicates = [HasPerfMon] in
0 ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
1
2 @x = thread_local local_unnamed_addr global i32 0, align 4
3 @y = thread_local local_unnamed_addr global i32 0, align 4
4
5 ; Machine LICM should hoist the mrs into the loop preheader.
6 ; CHECK-LABEL: @test1
7 ; CHECK: BB#1:
8 ; CHECK: mrs x[[BASE:[0-9]+]], TPIDR_EL0
9 ; CHECK: add x[[REG1:[0-9]+]], x[[BASE]], :tprel_hi12:x
10 ; CHECK: add x[[REG2:[0-9]+]], x[[REG1]], :tprel_lo12_nc:x
11 ;
12 ; CHECK: .LBB0_2:
13 ; CHECK: ldr w0, [x[[REG2]]]
14 ; CHECK: bl bar
15 ; CHECK: sub w[[REG3:[0-9]+]], w{{[0-9]+}}, #1
16 ; CHECK: cbnz w[[REG3]], .LBB0_2
17
18 define void @test1(i32 %n) local_unnamed_addr {
19 entry:
20 %cmp3 = icmp sgt i32 %n, 0
21 br i1 %cmp3, label %bb1, label %bb2
22
23 bb1:
24 br label %for.body
25
26 for.body:
27 %i.04 = phi i32 [ %inc, %for.body ], [ 0, %bb1 ]
28 %0 = load i32, i32* @x, align 4
29 tail call void @bar(i32 %0) #2
30 %inc = add nuw nsw i32 %i.04, 1
31 %exitcond = icmp eq i32 %inc, %n
32 br i1 %exitcond, label %bb2, label %for.body
33
34 bb2:
35 ret void
36 }
37
38 ; Machine CSE should combine the the mrs between the load of %x and %y.
39 ; CHECK-LABEL: @test2
40 ; CHECK: mrs x{{[0-9]+}}, TPIDR_EL0
41 ; CHECK-NOT: mrs x{{[0-9]+}}, TPIDR_EL0
42 ; CHECK: ret
43 define void @test2(i32 %c) local_unnamed_addr #0 {
44 entry:
45 %0 = load i32, i32* @x, align 4
46 tail call void @bar(i32 %0) #2
47 %cmp = icmp eq i32 %c, 0
48 br i1 %cmp, label %if.end, label %if.then
49
50 if.then:
51 %1 = load i32, i32* @y, align 4
52 tail call void @bar(i32 %1) #2
53 br label %if.end
54
55 if.end:
56 ret void
57 }
58
59 declare void @bar(i32) local_unnamed_addr