llvm.org GIT mirror llvm / 506ed4d
Lower thumbv4t & thumbv5 lo->lo copies through a push-pop sequence On pre-v6 hardware, 'MOV lo, lo' gives undefined results, so such copies need to be avoided. This patch trades simplicity for implementation time at the expense of performance... As they say: correctness first, then performance. See http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html for a few ideas on how to make this better. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216138 91177308-0d34-0410-b5e6-96231b3b80d8 Jonathan Roelofs 6 years ago
3 changed file(s) with 66 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
1010 //
1111 //===----------------------------------------------------------------------===//
1212
13 #include "ARMSubtarget.h"
1314 #include "Thumb1InstrInfo.h"
1415 #include "llvm/CodeGen/MachineFrameInfo.h"
1516 #include "llvm/CodeGen/MachineInstrBuilder.h"
4041 MachineBasicBlock::iterator I, DebugLoc DL,
4142 unsigned DestReg, unsigned SrcReg,
4243 bool KillSrc) const {
43 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
44 .addReg(SrcReg, getKillRegState(KillSrc)));
44 // Need to check the arch.
45 MachineFunction &MF = *MBB.getParent();
46 const ARMSubtarget &st = MF.getTarget().getSubtarget();
47
4548 assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
4649 "Thumb1 can only copy GPR registers");
50
51 if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
52 || !ARM::tGPRRegClass.contains(DestReg))
53 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
54 .addReg(SrcReg, getKillRegState(KillSrc)));
55 else {
56 // FIXME: The performance consequences of this are going to be atrocious.
57 // Some things to try that should be better:
58 // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
59 // * 'movs $dst, $src' if cpsr isn't live
60 // See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html
61
62 // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
63 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
64 .addReg(SrcReg, getKillRegState(KillSrc));
65 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
66 .addReg(DestReg, getDefRegState(true));
67 }
4768 }
4869
4970 void Thumb1InstrInfo::
1919
2020 ; CHECK-THUMB-LABEL: test_cmpxchg_res_i8
2121 ; CHECK-THUMB: bl __sync_val_compare_and_swap_1
22 ; CHECK-THUMB: mov [[R1:r[0-9]+]], r0
22 ; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0
23 ; CHECK-THUMB: push {r0}
24 ; CHECK-THUMB: pop {[[R1:r[0-7]]]}
2325 ; CHECK-THUMB: movs r0, #1
2426 ; CHECK-THUMB: movs [[R2:r[0-9]+]], #0
2527 ; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}}
2628 ; CHECK-THU
27 ; CHECK-THUMB: mov r0, [[R2]]
29 ; CHECK-THUMB: push {[[R2]]}
30 ; CHECK-THUMB: pop {r0}
2831
2932 ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8
3033 ; CHECK-ARMV7: ldrexb [[R3:r[0-9]+]], [r0]
0 ; RUN: llc -mtriple=armv4-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
1 ; RUN: llc -mtriple=armv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
2 ; RUN: llc -mtriple=armv5-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
3 ; RUN: llc -mtriple=armv6-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
4 ; RUN: llc -mtriple=armv7-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
5 ; RUN: llc -mtriple=thumbv6-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
6 ; RUN: llc -mtriple=thumbv7-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
7 ; CHECK-LOLOMOV-LABEL: foo
8 ; CHECK-LOLOMOV: mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
9 ; CHECK-LOLOMOV-NEXT: mov [[SRC1]], [[SRC2:r[01]]]
10 ; CHECK-LOLOMOV-NEXT: mov [[SRC2]], [[TMP]]
11 ; CHECK-LOLOMOV-LABEL: bar
12 ; CHECK-LOLOMOV-LABEL: fnend
13 ;
14 ; 'MOV lo, lo' in Thumb mode produces undefined results on pre-v6 hardware
15 ; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-NOLOLOMOV
16 ; RUN: llc -mtriple=thumbv5-none--eabi < %s | FileCheck %s --check-prefix=CHECK-NOLOLOMOV
17 ; CHECK-NOLOLOMOV-LABEL: foo
18 ; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
19 ; CHECK-NOLOLOMOV: push {[[SRC1:r[01]]]}
20 ; CHECK-NOLOLOMOV-NEXT: pop {[[TMP:r[0-7]]]}
21 ; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
22 ; CHECK-NOLOLOMOV: push {[[SRC2:r[01]]]}
23 ; CHECK-NOLOLOMOV-NEXT: pop {[[SRC1]]}
24 ; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
25 ; CHECK-NOLOLOMOV: push {[[TMP]]}
26 ; CHECK-NOLOLOMOV-NEXT: pop {[[SRC2]]}
27 ; CHECK-NOLOLOMOV-LABEL: bar
28 ; CHECK-NOLOLOMOV-LABEL: fnend
29
30 declare void @bar(i32, i32)
31
32 define void @foo(i32 %a, i32 %b) {
33 entry:
34 call void @bar(i32 %b, i32 %a);
35 ret void
36 }
37