llvm.org GIT mirror llvm / d504c85
[ARM] Honour ABI for rem under -O0 for EABI, GNUEABI, Android and Musl At higher optimization levels, we generate the libcall for DIVREM_Ix, which is fine: aeabi_{u|i}divmod. At -O0 we generate the one for REM_Ix, which is the default {u}mod{q|h|s|d}i3. This commit makes sure that we don't generate REM_Ix calls for ABIs that don't support them (i.e. where we need to use DIVREM_Ix instead). This is achieved by bailing out of FastISel, which can't handle non-double multi-reg returns, and letting the legalization infrastructure expand the REM_Ix calls. It also updates the divmod-eabi.ll test to run under -O0 as well, and adds some Windows checks to it to make sure we don't break things for it. Fixes PR27068 Differential Revision: https://reviews.llvm.org/D21926 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275773 91177308-0d34-0410-b5e6-96231b3b80d8 Diana Picus 4 years ago
5 changed file(s) with 130 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
235235 /// for speed or for size.
236236 virtual bool isIntDivCheap(EVT VT, AttributeSet Attr) const {
237237 return false;
238 }
239
240 /// Return true if the target can handle a standalone remainder operation.
241 virtual bool hasStandaloneRem(EVT VT) const {
242 return true;
238243 }
239244
240245 /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x)
17161716 Type *Ty = I->getType();
17171717 if (!isTypeLegal(Ty, VT))
17181718 return false;
1719
1720 // Many ABIs do not provide a libcall for standalone remainder, so we need to
1721 // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1722 // multi-reg returns, we'll have to bail out.
1723 if (!TLI.hasStandaloneRem(VT)) {
1724 return false;
1725 }
17191726
17201727 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
17211728 if (VT == MVT::i8)
805805 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) {
806806 setOperationAction(ISD::SREM, MVT::i64, Custom);
807807 setOperationAction(ISD::UREM, MVT::i64, Custom);
808 HasStandaloneRem = false;
808809
809810 setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
810811 setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
477477 return true;
478478 }
479479
480 bool hasStandaloneRem(EVT VT) const override {
481 return HasStandaloneRem;
482 }
483
480484 protected:
481485 std::pair
482486 findRepresentativeClass(const TargetRegisterInfo *TRI,
498502 // TODO: remove this, and have shouldInsertFencesForAtomic do the proper
499503 // check.
500504 bool InsertFencesForAtomic;
505
506 bool HasStandaloneRem = true;
501507
502508 void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
503509 void addDRTypeForNEON(MVT VT);
0 ; We run the tests with both the default optimization level and O0, to make sure
1 ; we don't have any ABI differences between them. In principle, the ABI checks
2 ; should be the same for both optimization levels (there could be exceptions
3 ; from this when a div and a mod with the same operands are not coallesced into
4 ; the same divmod, but luckily this doesn't occur in practice even at O0).
5 ; Sometimes the checks that the correct registers are used after the libcalls
6 ; are different between optimization levels, so we have to separate them.
7 ; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI
8 ; RUN: llc -mtriple armv7-none-eabi %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefix=EABI
19 ; RUN: llc -mtriple armv7-none-eabihf %s -o - | FileCheck %s --check-prefix=EABI
10 ; RUN: llc -mtriple armv7-none-eabihf %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefix=EABI
211 ; All "eabi" (Bare, GNU and Android) must lower SREM/UREM to __aeabi_{u,i}divmod
312 ; RUN: llc -mtriple armv7-linux-androideabi %s -o - | FileCheck %s --check-prefix=EABI
13 ; RUN: llc -mtriple armv7-linux-androideabi %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefix=EABI
414 ; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=EABI
15 ; RUN: llc -mtriple armv7-linux-gnueabi %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefix=EABI
516 ; RUN: llc -mtriple armv7-linux-musleabi %s -o - | FileCheck %s --check-prefix=EABI
6 ; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN
17 ; RUN: llc -mtriple armv7-linux-musleabi %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefix=EABI
18 ; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefixes=DARWIN,DARWIN-DEFAULT
19 ; RUN: llc -mtriple armv7-apple-darwin %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefixes=DARWIN,DARWIN-O0
720 ; FIXME: long-term, we will use "-apple-macho" and won't need this exception:
8 ; RUN: llc -mtriple armv7-apple-darwin-eabi %s -o - | FileCheck %s --check-prefix=DARWIN
21 ; RUN: llc -mtriple armv7-apple-darwin-eabi %s -o - | FileCheck %s --check-prefixes=DARWIN,DARWIN-DEFAULT
22 ; RUN: llc -mtriple armv7-apple-darwin-eabi %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefixes=DARWIN,DARWIN-O0
23 ; RUN: llc -mtriple thumbv7-windows %s -o - | FileCheck %s --check-prefixes=WINDOWS,WINDOWS-DEFAULT
24 ; RUN: llc -mtriple thumbv7-windows %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefixes=WINDOWS,WINDOWS-O0
925
1026 define signext i16 @f16(i16 signext %a, i16 signext %b) {
1127 ; EABI-LABEL: f16:
1228 ; DARWIN-LABEL: f16:
29 ; WINDOWS-LABEL: f16:
1330 entry:
1431 %conv = sext i16 %a to i32
1532 %conv1 = sext i16 %b to i32
1936 ; EABI: mov [[div:r[0-9]+]], r0
2037 ; EABI: mov [[rem:r[0-9]+]], r1
2138 ; DARWIN: ___divsi3
22 ; DARWIN: mov [[sum:r[0-9]+]], r0
23 ; DARWIN: __modsi3
24 ; DARWIN: add [[sum]]{{.*}}r0
39 ; DARWIN: mov [[div:r[0-9]+]], r0
40 ; DARWIN: __modsi3
41 ; DARWIN-DEFAULT: add [[sum:r[0-9]+]], r0, [[div]]
42 ; DARWIN-O0: mov [[rem:r[0-9]+]], r0
43 ; WINDOWS: __rt_sdiv
44 ; WINDOWS-DEFAULT: mls [[rem:r[0-9]+]], r0,
45 ; WINDOWS-DEFAULT: adds [[sum:r[0-9]+]], [[rem]], r0
46 ; WINDOWS-O0: mov [[div:r[0-9]+]], r0
47 ; WINDOWS-O0: mls [[rem:r[0-9]+]], [[div]],
2548 %rem8 = srem i32 %conv1, %conv
2649 ; EABI: __aeabi_idivmod
2750 ; DARWIN: __modsi3
51 ; WINDOWS: __rt_sdiv
52 ; WINDOWS: mls [[rem1:r[0-9]+]], r0,
2853 %add = add nsw i32 %rem, %div
2954 %add13 = add nsw i32 %add, %rem8
3055 %conv14 = trunc i32 %add13 to i16
3156 ; EABI: add r0{{.*}}r1
3257 ; EABI: sxth r0, r0
33 ; DARWIN: add r0{{.*}}[[sum]]
34 ; DARWIN: sxth r0, r0
58 ; DARWIN-DEFAULT: add [[res:r[0-9]+]], [[sum]], r0
59 ; DARWIN-O0: add [[sum:r[0-9]+]], [[rem]], [[div]]
60 ; DARWIN-O0: add [[res:r[0-9]+]], [[sum]], r0
61 ; DARWIN: sxth r0, [[res]]
62 ; WINDOWS-O0: adds [[sum:r[0-9]+]], [[rem]], [[div]]
63 ; WINDOWS: add [[rem1]], [[sum]]
64 ; WINDOWS: sxth [[res:r[0-9]+]], [[rem1]]
3565 ret i16 %conv14
3666 }
3767
3868 define i32 @f32(i32 %a, i32 %b) {
3969 ; EABI-LABEL: f32:
4070 ; DARWIN-LABEL: f32:
71 ; WINDOWS-LABEL: f32:
4172 entry:
4273 %div = sdiv i32 %a, %b
4374 %rem = srem i32 %a, %b
4576 ; EABI: mov [[div:r[0-9]+]], r0
4677 ; EABI: mov [[rem:r[0-9]+]], r1
4778 ; DARWIN: ___divsi3
48 ; DARWIN: mov [[sum:r[0-9]+]], r0
49 ; DARWIN: __modsi3
50 ; DARWIN: add [[sum]]{{.*}}r0
79 ; DARWIN: mov [[div:r[0-9]+]], r0
80 ; DARWIN: __modsi3
81 ; DARWIN-DEFAULT: add [[sum:r[0-9]+]], r0, [[div]]
82 ; DARWIN-O0: mov [[rem:r[0-9]+]], r0
83 ; WINDOWS: __rt_sdiv
84 ; WINDOWS: mov [[div:r[0-9]+]], r0
85 ; WINDOWS: __rt_sdiv
86 ; WINDOWS: mls [[rem:r[0-9]+]], r0,
87 ; WINDOWS-DEFAULT: add [[div]], [[rem]]
5188 %rem1 = srem i32 %b, %a
5289 ; EABI: __aeabi_idivmod
5390 ; DARWIN: __modsi3
91 ; WINDOWS: __rt_sdiv
92 ; WINDOWS: mls [[rem1:r[0-9]+]], r0,
5493 %add = add nsw i32 %rem, %div
5594 %add2 = add nsw i32 %add, %rem1
5695 ; EABI: add r0{{.*}}r1
57 ; DARWIN: add r0{{.*}}[[sum]]
96 ; DARWIN-DEFAULT: add r0, [[sum]], r0
97 ; DARWIN-O0: add [[sum:r[0-9]+]], [[rem]], [[div]]
98 ; DARWIN-O0: add [[res:r[0-9]+]], [[sum]], r0
99 ; WINDOWS-DEFAULT: add [[rem1]], [[div]]
100 ; WINDOWS-O0: adds [[sum:r[0-9]+]], [[rem]], [[div]]
101 ; WINDOWS-O0: add [[rem1]], [[sum]]
58102 ret i32 %add2
59103 }
60104
61105 define i32 @uf(i32 %a, i32 %b) {
62106 ; EABI-LABEL: uf:
63107 ; DARWIN-LABEL: uf:
108 ; WINDOWS-LABEL: uf:
64109 entry:
65110 %div = udiv i32 %a, %b
66111 %rem = urem i32 %a, %b
67112 ; EABI: __aeabi_uidivmod
68113 ; DARWIN: ___udivsi3
69 ; DARWIN: mov [[sum:r[0-9]+]], r0
114 ; DARWIN: mov [[div:r[0-9]+]], r0
70115 ; DARWIN: __umodsi3
71 ; DARWIN: add [[sum]]{{.*}}r0
116 ; DARWIN-DEFAULT: add [[sum:r[0-9]+]], r0, [[div]]
117 ; DARWIN-O0: mov [[rem:r[0-9]+]], r0
118 ; WINDOWS: __rt_udiv
119 ; WINDOWS: mov [[div:r[0-9]+]], r0
120 ; WINDOWS: __rt_udiv
121 ; WINDOWS: mls [[rem:r[0-9]+]], r0,
122 ; WINDOWS-DEFAULT: add [[div]], [[rem]]
72123 %rem1 = urem i32 %b, %a
73124 ; EABI: __aeabi_uidivmod
74125 ; DARWIN: __umodsi3
126 ; WINDOWS: __rt_udiv
127 ; WINDOWS: mls [[rem1:r[0-9]+]], r0,
75128 %add = add nuw i32 %rem, %div
76129 %add2 = add nuw i32 %add, %rem1
77130 ; EABI: add r0{{.*}}r1
78 ; DARWIN: add r0{{.*}}[[sum]]
131 ; DARWIN-DEFAULT: add r0, [[sum]], r0
132 ; DARWIN-O0: add [[sum:r[0-9]+]], [[rem]], [[div]]
133 ; DARWIN-O0: add [[res:r[0-9]+]], [[sum]], r0
134 ; WINDOWS-DEFAULT: add [[rem1]], [[div]]
135 ; WINDOWS-O0: adds [[sum:r[0-9]+]], [[rem]], [[div]]
136 ; WINDOWS-O0: add [[rem1]], [[sum]]
79137 ret i32 %add2
80138 }
81139
82140 define i64 @longf(i64 %a, i64 %b) {
83141 ; EABI-LABEL: longf:
84142 ; DARWIN-LABEL: longf:
143 ; WINDOWS-LABEL: longf:
85144 entry:
86145 %div = sdiv i64 %a, %b
87146 %rem = srem i64 %a, %b
93152 ; DARWIN: mov [[div1:r[0-9]+]], r0
94153 ; DARWIN: mov [[div2:r[0-9]+]], r1
95154 ; DARWIN: __moddi3
155 ; WINDOWS: __rt_sdiv64
156 ; WINDOWS: mov [[div1:r[0-9]+]], r0
157 ; WINDOWS: mov [[div2:r[0-9]+]], r1
158 ; WINDOWS: __moddi3
96159 %add = add nsw i64 %rem, %div
97160 ; DARWIN: adds r0{{.*}}[[div1]]
98161 ; DARWIN: adc r1{{.*}}[[div2]]
162 ; WINDOWS: adds.w r0, r0, [[div1]]
163 ; WINDOWS: adc.w r1, r1, [[div2]]
99164 ret i64 %add
100165 }
101166
102167 define i16 @shortf(i16 %a, i16 %b) {
103168 ; EABI-LABEL: shortf:
104169 ; DARWIN-LABEL: shortf:
170 ; WINDOWS-LABEL: shortf:
105171 entry:
106172 %div = sdiv i16 %a, %b
107173 %rem = srem i16 %a, %b
109175 ; DARWIN: ___divsi3
110176 ; DARWIN: mov [[div1:r[0-9]+]], r0
111177 ; DARWIN: __modsi3
178 ; WINDOWS: __rt_sdiv
179 ; WINDOWS: mov [[div:r[0-9]+]], r0
180 ; WINDOWS: __rt_sdiv
181 ; WINDOWS: mls [[rem:r[0-9]+]], r0,
112182 %add = add nsw i16 %rem, %div
183 ; EABI: add r0, r1
113184 ; DARWIN: add r0{{.*}}[[div1]]
185 ; WINDOWS: add [[rem]], [[div]]
114186 ret i16 %add
115187 }
116188
117189 define i32 @g1(i32 %a, i32 %b) {
118190 ; EABI-LABEL: g1:
119191 ; DARWIN-LABEL: g1:
192 ; WINDOWS-LABEL: g1:
120193 entry:
121194 %div = sdiv i32 %a, %b
122195 %rem = srem i32 %a, %b
124197 ; DARWIN: ___divsi3
125198 ; DARWIN: mov [[sum:r[0-9]+]], r0
126199 ; DARWIN: __modsi3
200 ; WINDOWS: __rt_sdiv
201 ; WINDOWS: mov [[div:r[0-9]+]], r0
202 ; WINDOWS: __rt_sdiv
203 ; WINDOWS: mls [[rem:r[0-9]+]], r0,
127204 %add = add nsw i32 %rem, %div
128205 ; EABI: add r0{{.*}}r1
129206 ; DARWIN: add r0{{.*}}[[sum]]
207 ; WINDOWS: add [[rem]], [[div]]
130208 ret i32 %add
131209 }
132210
134212 define i32 @g2(i32 %a, i32 %b) {
135213 ; EABI-LABEL: g2:
136214 ; DARWIN-LABEL: g2:
137 entry:
138 %rem = srem i32 %a, %b
139 ; EABI: __aeabi_idivmod
140 ; DARWIN: __modsi3
215 ; WINDOWS-LABEL: g2:
216 entry:
217 %rem = srem i32 %a, %b
218 ; EABI: __aeabi_idivmod
219 ; DARWIN: __modsi3
220 ; WINDOWS: __rt_sdiv
141221 ret i32 %rem
142222 ; EABI: mov r0, r1
223 ; WINDOWS: mls r0, r0,
143224 }
144225
145226 define i32 @g3(i32 %a, i32 %b) {
146227 ; EABI-LABEL: g3:
147228 ; DARWIN-LABEL: g3:
229 ; WINDOWS-LABEL: g3:
148230 entry:
149231 %rem = srem i32 %a, %b
150232 ; EABI: __aeabi_idivmod
151233 ; EABI: mov [[mod:r[0-9]+]], r1
152234 ; DARWIN: __modsi3
153235 ; DARWIN: mov [[sum:r[0-9]+]], r0
236 ; WINDOWS: __rt_sdiv
237 ; WINDOWS: mls [[rem:r[0-9]+]], r0,
154238 %rem1 = srem i32 %b, %rem
155239 ; EABI: __aeabi_idivmod
156240 ; DARWIN: __modsi3
241 ; WINDOWS: __rt_sdiv
242 ; WINDOWS: mls [[rem1:r[0-9]+]], r0,
157243 %add = add nsw i32 %rem1, %rem
158244 ; EABI: add r0, r1, [[mod]]
159245 ; DARWIN: add r0{{.*}}[[sum]]
246 ; WINDOWS: add [[rem1]], [[rem]]
160247 ret i32 %add
161248 }
162249
163250 define i32 @g4(i32 %a, i32 %b) {
164251 ; EABI-LABEL: g4:
165252 ; DARWIN-LABEL: g4:
253 ; WINDOWS-LABEL: g4:
166254 entry:
167255 %div = sdiv i32 %a, %b
168256 ; EABI: __aeabi_idiv{{$}}
169257 ; EABI: mov [[div:r[0-9]+]], r0
170258 ; DARWIN: ___divsi3
171259 ; DARWIN: mov [[sum:r[0-9]+]], r0
260 ; WINDOWS: __rt_sdiv
261 ; WINDOWS: mov [[div:r[0-9]+]], r0
172262 %rem = srem i32 %b, %div
173263 ; EABI: __aeabi_idivmod
174264 ; DARWIN: __modsi3
265 ; WINDOWS: __rt_sdiv
266 ; WINDOWS: mls [[rem:r[0-9]+]], r0,
175267 %add = add nsw i32 %rem, %div
176268 ; EABI: add r0, r1, [[div]]
177269 ; DARWIN: add r0{{.*}}[[sum]]
270 ; WINDOWS: add [[rem]], [[div]]
178271 ret i32 %add
179272 }