llvm.org GIT mirror llvm / dd4e083
[ARM] Add new target feature to fuse literal generation This feature enables the fusion of such operations on Cortex A57 and Cortex A72, as recommended in their Software Optimisation Guides, sections 4.14 and 4.11, respectively. Differential revision: https://reviews.llvm.org/D49563 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338147 91177308-0d34-0410-b5e6-96231b3b80d8 Evandro Menezes 2 years ago
4 changed file(s) with 94 addition(s) and 19 deletion(s). Raw diff Collapse all Expand all
139139 // Fast execution of AES crypto operations
140140 def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
141141 "CPU fuses AES crypto operations">;
142
143 // Fast execution of bottom and top halves of literal generation
144 def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true",
145 "CPU fuses literal generation operations">;
142146
143147 // The way of reading thread pointer
144148 def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true",
1818
1919 namespace llvm {
2020
21 // Fuse AES crypto encoding or decoding.
22 static bool isAESPair(const MachineInstr *FirstMI,
23 const MachineInstr &SecondMI) {
24 // Assume the 1st instr to be a wildcard if it is unspecified.
25 unsigned FirstOpcode =
26 FirstMI ? FirstMI->getOpcode()
27 : static_cast(ARM::INSTRUCTION_LIST_END);
28 unsigned SecondOpcode = SecondMI.getOpcode();
29
30 switch(SecondOpcode) {
31 // AES encode.
32 case ARM::AESMC :
33 return FirstOpcode == ARM::AESE ||
34 FirstOpcode == ARM::INSTRUCTION_LIST_END;
35 // AES decode.
36 case ARM::AESIMC:
37 return FirstOpcode == ARM::AESD ||
38 FirstOpcode == ARM::INSTRUCTION_LIST_END;
39 }
40
41 return false;
42 }
43
44 // Fuse literal generation.
45 static bool isLiteralsPair(const MachineInstr *FirstMI,
46 const MachineInstr &SecondMI) {
47 // Assume the 1st instr to be a wildcard if it is unspecified.
48 unsigned FirstOpcode =
49 FirstMI ? FirstMI->getOpcode()
50 : static_cast(ARM::INSTRUCTION_LIST_END);
51 unsigned SecondOpcode = SecondMI.getOpcode();
52
53 // 32 bit immediate.
54 if ((FirstOpcode == ARM::INSTRUCTION_LIST_END ||
55 FirstOpcode == ARM::MOVi16) &&
56 SecondOpcode == ARM::MOVTi16)
57 return true;
58
59 return false;
60 }
61
2162 /// Check if the instr pair, FirstMI and SecondMI, should be fused
2263 /// together. Given SecondMI, when FirstMI is unspecified, then check if
2364 /// SecondMI may be part of a fused pair at all.
2768 const MachineInstr &SecondMI) {
2869 const ARMSubtarget &ST = static_cast(TSI);
2970
30 // Assume wildcards for unspecified instrs.
31 unsigned FirstOpcode =
32 FirstMI ? FirstMI->getOpcode()
33 : static_cast(ARM::INSTRUCTION_LIST_END);
34 unsigned SecondOpcode = SecondMI.getOpcode();
35
36 if (ST.hasFuseAES())
37 // Fuse AES crypto operations.
38 switch(SecondOpcode) {
39 // AES encode.
40 case ARM::AESMC :
41 return FirstOpcode == ARM::AESE ||
42 FirstOpcode == ARM::INSTRUCTION_LIST_END;
43 // AES decode.
44 case ARM::AESIMC:
45 return FirstOpcode == ARM::AESD ||
46 FirstOpcode == ARM::INSTRUCTION_LIST_END;
47 }
71 if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
72 return true;
73 if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
74 return true;
4875
4976 return false;
5077 }
325325 /// HasFuseAES - if true, processor executes back to back AES instruction
326326 /// pairs faster.
327327 bool HasFuseAES = false;
328
329 /// HasFuseLiterals - if true, processor executes back to back
330 /// bottom and top halves of literal generation faster.
331 bool HasFuseLiterals = false;
328332
329333 /// If true, if conversion may decide to leave some instructions unpredicated.
330334 bool IsProfitableToUnpredicate = false;
615619 bool hasFullFP16() const { return HasFullFP16; }
616620
617621 bool hasFuseAES() const { return HasFuseAES; }
622 bool hasFuseLiterals() const { return HasFuseLiterals; }
618623 /// Return true if the CPU supports any kind of instruction fusion.
619 bool hasFusion() const { return hasFuseAES(); }
624 bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
620625
621626 const Triple &getTargetTriple() const { return TargetTriple; }
622627
0 ; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=-fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
1 ; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=+fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
2
3 @g = common global i32* zeroinitializer
4
5 define i32* @litp(i32 %a, i32 %b) {
6 entry:
7 %add = add nsw i32 %b, %a
8 %ptr = getelementptr i32, i32* bitcast (i32* (i32, i32)* @litp to i32*), i32 %add
9 %res = getelementptr i32, i32* bitcast (i32** @g to i32*), i32 %add
10 store i32* %ptr, i32** @g, align 4
11 ret i32* %res
12
13 ; CHECK-LABEL: litp:
14 ; CHECK: movw [[R:r[0-9]+]], :lower16:litp
15 ; CHECKDONT-NEXT: movw [[S:r[0-9]+]], :lower16:g
16 ; CHECKFUSE-NEXT: movt [[R]], :upper16:litp
17 ; CHECKFUSE-NEXT: movw [[S:r[0-9]+]], :lower16:g
18 ; CHECKFUSE-NEXT: movt [[S]], :upper16:g
19 }
20
21 define i32 @liti(i32 %a, i32 %b) {
22 entry:
23 %adda = add i32 %a, -262095121
24 %add1 = add i32 %adda, %b
25 %addb = add i32 %b, 121110837
26 %add2 = add i32 %addb, %a
27 store i32 %add1, i32* bitcast (i32** @g to i32*), align 4
28 ret i32 %add2
29
30 ; CHECK-LABEL: liti:
31 ; CHECK: movw [[R:r[0-9]+]], #309
32 ; CHECKDONT-NEXT: add {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}
33 ; CHECKFUSE-NEXT: movt [[R]], #1848
34 ; CHECKFUSE: movw [[S:r[0-9]+]], :lower16:g
35 ; CHECKFUSE-NEXT: movt [[S]], :upper16:g
36 ; CHECKFUSE-NEXT: movw [[T:r[0-9]+]], #48879
37 ; CHECKFUSE-NEXT: movt [[T]], #61536
38 }