llvm.org GIT mirror llvm / e2349b0
Merging r219441: ------------------------------------------------------------------------ r219441 | sfantao | 2014-10-09 20:42:56 +0000 (Thu, 09 Oct 2014) | 3 lines Fix bug in GPR to FPR moves in PPC64LE. The current implementation of GPR->FPR register moves uses a stack slot. This mechanism writes a double word and reads a word. In big-endian the load address must be displaced by 4-bytes in order to get the right value. In little endian this is no longer required. This patch fixes the issue and adds LE regression tests to fast-isel-conversion which currently expose this problem. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_35@223740 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 5 years ago
2 changed file(s) with 125 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
864864 }
865865
866866 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
867 // FIXME: When direct register moves are implemented (see PowerISA 2.08),
867 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
868868 // those should be used instead of moving via a stack slot when the
869869 // subtarget permits.
870870 // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
897897 if (SrcVT == MVT::i32) {
898898 if (!IsSigned) {
899899 LoadOpc = PPC::LFIWZX;
900 Addr.Offset = 4;
900 Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
901901 } else if (PPCSubTarget->hasLFIWAX()) {
902902 LoadOpc = PPC::LFIWAX;
903 Addr.Offset = 4;
903 Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
904904 }
905905 }
906906
984984
985985 // Move the floating-point value in SrcReg into an integer destination
986986 // register, and return the register (or zero if we can't handle it).
987 // FIXME: When direct register moves are implemented (see PowerISA 2.08),
987 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
988988 // those should be used instead of moving via a stack slot when the
989989 // subtarget permits.
990990 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
0 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
1 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=ELF64LE
12 ; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970
23
34 ;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
89 define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
910 entry:
1011 ; ELF64: sitofp_single_i64
12 ; ELF64LE: sitofp_single_i64
1113 ; PPC970: sitofp_single_i64
1214 %b.addr = alloca float, align 4
1315 %conv = sitofp i64 %a to float
1416 ; ELF64: std
1517 ; ELF64: lfd
1618 ; ELF64: fcfids
19 ; ELF64LE: std
20 ; ELF64LE: lfd
21 ; ELF64LE: fcfids
1722 ; PPC970: std
1823 ; PPC970: lfd
1924 ; PPC970: fcfid
2530 define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
2631 entry:
2732 ; ELF64: sitofp_single_i32
33 ; ELF64LE: sitofp_single_i32
2834 ; PPC970: sitofp_single_i32
2935 %b.addr = alloca float, align 4
3036 %conv = sitofp i32 %a to float
3137 ; ELF64: std
38 ; stack offset used to load the float: 65524 = -16 + 4
39 ; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
3240 ; ELF64: lfiwax
3341 ; ELF64: fcfids
42 ; ELF64LE: std
43 ; stack offset used to load the float: 65520 = -16 + 0
44 ; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
45 ; ELF64LE: lfiwax
46 ; ELF64LE: fcfids
3447 ; PPC970: std
3548 ; PPC970: lfd
3649 ; PPC970: fcfid
4255 define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
4356 entry:
4457 ; ELF64: sitofp_single_i16
58 ; ELF64LE: sitofp_single_i16
4559 ; PPC970: sitofp_single_i16
4660 %b.addr = alloca float, align 4
4761 %conv = sitofp i16 %a to float
4963 ; ELF64: std
5064 ; ELF64: lfd
5165 ; ELF64: fcfids
66 ; ELF64LE: extsh
67 ; ELF64LE: std
68 ; ELF64LE: lfd
69 ; ELF64LE: fcfids
5270 ; PPC970: extsh
5371 ; PPC970: std
5472 ; PPC970: lfd
6179 define void @sitofp_single_i8(i8 %a) nounwind ssp {
6280 entry:
6381 ; ELF64: sitofp_single_i8
82 ; ELF64LE: sitofp_single_i8
6483 ; PPC970: sitofp_single_i8
6584 %b.addr = alloca float, align 4
6685 %conv = sitofp i8 %a to float
6887 ; ELF64: std
6988 ; ELF64: lfd
7089 ; ELF64: fcfids
90 ; ELF64LE: extsb
91 ; ELF64LE: std
92 ; ELF64LE: lfd
93 ; ELF64LE: fcfids
7194 ; PPC970: extsb
7295 ; PPC970: std
7396 ; PPC970: lfd
80103 define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
81104 entry:
82105 ; ELF64: sitofp_double_i32
106 ; ELF64LE: sitofp_double_i32
83107 ; PPC970: sitofp_double_i32
84108 %b.addr = alloca double, align 8
85109 %conv = sitofp i32 %a to double
86110 ; ELF64: std
111 ; stack offset used to load the float: 65524 = -16 + 4
112 ; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
87113 ; ELF64: lfiwax
88114 ; ELF64: fcfid
115 ; ELF64LE: std
116 ; stack offset used to load the float: 65520 = -16 + 0
117 ; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
118 ; ELF64LE: lfiwax
119 ; ELF64LE: fcfid
89120 ; PPC970: std
90121 ; PPC970: lfd
91122 ; PPC970: fcfid
96127 define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
97128 entry:
98129 ; ELF64: sitofp_double_i64
130 ; ELF64LE: sitofp_double_i64
99131 ; PPC970: sitofp_double_i64
100132 %b.addr = alloca double, align 8
101133 %conv = sitofp i64 %a to double
102134 ; ELF64: std
103135 ; ELF64: lfd
104136 ; ELF64: fcfid
137 ; ELF64LE: std
138 ; ELF64LE: lfd
139 ; ELF64LE: fcfid
105140 ; PPC970: std
106141 ; PPC970: lfd
107142 ; PPC970: fcfid
112147 define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
113148 entry:
114149 ; ELF64: sitofp_double_i16
150 ; ELF64LE: sitofp_double_i16
115151 ; PPC970: sitofp_double_i16
116152 %b.addr = alloca double, align 8
117153 %conv = sitofp i16 %a to double
119155 ; ELF64: std
120156 ; ELF64: lfd
121157 ; ELF64: fcfid
158 ; ELF64LE: extsh
159 ; ELF64LE: std
160 ; ELF64LE: lfd
161 ; ELF64LE: fcfid
122162 ; PPC970: extsh
123163 ; PPC970: std
124164 ; PPC970: lfd
130170 define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
131171 entry:
132172 ; ELF64: sitofp_double_i8
173 ; ELF64LE: sitofp_double_i8
133174 ; PPC970: sitofp_double_i8
134175 %b.addr = alloca double, align 8
135176 %conv = sitofp i8 %a to double
137178 ; ELF64: std
138179 ; ELF64: lfd
139180 ; ELF64: fcfid
181 ; ELF64LE: extsb
182 ; ELF64LE: std
183 ; ELF64LE: lfd
184 ; ELF64LE: fcfid
140185 ; PPC970: extsb
141186 ; PPC970: std
142187 ; PPC970: lfd
150195 define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
151196 entry:
152197 ; ELF64: uitofp_single_i64
198 ; ELF64LE: uitofp_single_i64
153199 ; PPC970: uitofp_single_i64
154200 %b.addr = alloca float, align 4
155201 %conv = uitofp i64 %a to float
156202 ; ELF64: std
157203 ; ELF64: lfd
158204 ; ELF64: fcfidus
205 ; ELF64LE: std
206 ; ELF64LE: lfd
207 ; ELF64LE: fcfidus
159208 ; PPC970-NOT: fcfidus
160209 store float %conv, float* %b.addr, align 4
161210 ret void
164213 define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
165214 entry:
166215 ; ELF64: uitofp_single_i32
216 ; ELF64LE: uitofp_single_i32
167217 ; PPC970: uitofp_single_i32
168218 %b.addr = alloca float, align 4
169219 %conv = uitofp i32 %a to float
170220 ; ELF64: std
221 ; stack offset used to load the float: 65524 = -16 + 4
222 ; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
171223 ; ELF64: lfiwzx
172224 ; ELF64: fcfidus
225 ; ELF64LE: std
226 ; stack offset used to load the float: 65520 = -16 + 0
227 ; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
228 ; ELF64LE: lfiwzx
229 ; ELF64LE: fcfidus
173230 ; PPC970-NOT: lfiwzx
174231 ; PPC970-NOT: fcfidus
175232 store float %conv, float* %b.addr, align 4
179236 define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
180237 entry:
181238 ; ELF64: uitofp_single_i16
239 ; ELF64LE: uitofp_single_i16
182240 ; PPC970: uitofp_single_i16
183241 %b.addr = alloca float, align 4
184242 %conv = uitofp i16 %a to float
186244 ; ELF64: std
187245 ; ELF64: lfd
188246 ; ELF64: fcfidus
247 ; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
248 ; ELF64LE: std
249 ; ELF64LE: lfd
250 ; ELF64LE: fcfidus
189251 ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
190252 ; PPC970: std
191253 ; PPC970: lfd
198260 define void @uitofp_single_i8(i8 %a) nounwind ssp {
199261 entry:
200262 ; ELF64: uitofp_single_i8
263 ; ELF64LE: uitofp_single_i8
201264 ; PPC970: uitofp_single_i8
202265 %b.addr = alloca float, align 4
203266 %conv = uitofp i8 %a to float
205268 ; ELF64: std
206269 ; ELF64: lfd
207270 ; ELF64: fcfidus
271 ; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
272 ; ELF64LE: std
273 ; ELF64LE: lfd
274 ; ELF64LE: fcfidus
208275 ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
209276 ; PPC970: std
210277 ; PPC970: lfd
217284 define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
218285 entry:
219286 ; ELF64: uitofp_double_i64
287 ; ELF64LE: uitofp_double_i64
220288 ; PPC970: uitofp_double_i64
221289 %b.addr = alloca double, align 8
222290 %conv = uitofp i64 %a to double
223291 ; ELF64: std
224292 ; ELF64: lfd
225293 ; ELF64: fcfidu
294 ; ELF64LE: std
295 ; ELF64LE: lfd
296 ; ELF64LE: fcfidu
226297 ; PPC970-NOT: fcfidu
227298 store double %conv, double* %b.addr, align 8
228299 ret void
231302 define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
232303 entry:
233304 ; ELF64: uitofp_double_i32
305 ; ELF64LE: uitofp_double_i32
234306 ; PPC970: uitofp_double_i32
235307 %b.addr = alloca double, align 8
236308 %conv = uitofp i32 %a to double
237309 ; ELF64: std
310 ; stack offset used to load the float: 65524 = -16 + 4
311 ; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
238312 ; ELF64: lfiwzx
239313 ; ELF64: fcfidu
314 ; ELF64LE: std
315 ; stack offset used to load the float: 65520 = -16 + 0
316 ; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
317 ; ELF64LE: lfiwzx
318 ; ELF64LE: fcfidu
240319 ; PPC970-NOT: lfiwzx
241320 ; PPC970-NOT: fcfidu
242321 store double %conv, double* %b.addr, align 8
246325 define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
247326 entry:
248327 ; ELF64: uitofp_double_i16
328 ; ELF64LE: uitofp_double_i16
249329 ; PPC970: uitofp_double_i16
250330 %b.addr = alloca double, align 8
251331 %conv = uitofp i16 %a to double
253333 ; ELF64: std
254334 ; ELF64: lfd
255335 ; ELF64: fcfidu
336 ; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
337 ; ELF64LE: std
338 ; ELF64LE: lfd
339 ; ELF64LE: fcfidu
256340 ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
257341 ; PPC970: std
258342 ; PPC970: lfd
264348 define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
265349 entry:
266350 ; ELF64: uitofp_double_i8
351 ; ELF64LE: uitofp_double_i8
267352 ; PPC970: uitofp_double_i8
268353 %b.addr = alloca double, align 8
269354 %conv = uitofp i8 %a to double
271356 ; ELF64: std
272357 ; ELF64: lfd
273358 ; ELF64: fcfidu
359 ; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
360 ; ELF64LE: std
361 ; ELF64LE: lfd
362 ; ELF64LE: fcfidu
274363 ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
275364 ; PPC970: std
276365 ; PPC970: lfd
284373 define void @fptosi_float_i32(float %a) nounwind ssp {
285374 entry:
286375 ; ELF64: fptosi_float_i32
376 ; ELF64LE: fptosi_float_i32
287377 ; PPC970: fptosi_float_i32
288378 %b.addr = alloca i32, align 4
289379 %conv = fptosi float %a to i32
290380 ; ELF64: fctiwz
291381 ; ELF64: stfd
292382 ; ELF64: lwa
383 ; ELF64LE: fctiwz
384 ; ELF64LE: stfd
385 ; ELF64LE: lwa
293386 ; PPC970: fctiwz
294387 ; PPC970: stfd
295388 ; PPC970: lwa
300393 define void @fptosi_float_i64(float %a) nounwind ssp {
301394 entry:
302395 ; ELF64: fptosi_float_i64
396 ; ELF64LE: fptosi_float_i64
303397 ; PPC970: fptosi_float_i64
304398 %b.addr = alloca i64, align 4
305399 %conv = fptosi float %a to i64
306400 ; ELF64: fctidz
307401 ; ELF64: stfd
308402 ; ELF64: ld
403 ; ELF64LE: fctidz
404 ; ELF64LE: stfd
405 ; ELF64LE: ld
309406 ; PPC970: fctidz
310407 ; PPC970: stfd
311408 ; PPC970: ld
316413 define void @fptosi_double_i32(double %a) nounwind ssp {
317414 entry:
318415 ; ELF64: fptosi_double_i32
416 ; ELF64LE: fptosi_double_i32
319417 ; PPC970: fptosi_double_i32
320418 %b.addr = alloca i32, align 8
321419 %conv = fptosi double %a to i32
322420 ; ELF64: fctiwz
323421 ; ELF64: stfd
324422 ; ELF64: lwa
423 ; ELF64LE: fctiwz
424 ; ELF64LE: stfd
425 ; ELF64LE: lwa
325426 ; PPC970: fctiwz
326427 ; PPC970: stfd
327428 ; PPC970: lwa
332433 define void @fptosi_double_i64(double %a) nounwind ssp {
333434 entry:
334435 ; ELF64: fptosi_double_i64
436 ; ELF64LE: fptosi_double_i64
335437 ; PPC970: fptosi_double_i64
336438 %b.addr = alloca i64, align 8
337439 %conv = fptosi double %a to i64
338440 ; ELF64: fctidz
339441 ; ELF64: stfd
340442 ; ELF64: ld
443 ; ELF64LE: fctidz
444 ; ELF64LE: stfd
445 ; ELF64LE: ld
341446 ; PPC970: fctidz
342447 ; PPC970: stfd
343448 ; PPC970: ld
350455 define void @fptoui_float_i32(float %a) nounwind ssp {
351456 entry:
352457 ; ELF64: fptoui_float_i32
458 ; ELF64LE: fptoui_float_i32
353459 ; PPC970: fptoui_float_i32
354460 %b.addr = alloca i32, align 4
355461 %conv = fptoui float %a to i32
356462 ; ELF64: fctiwuz
357463 ; ELF64: stfd
358464 ; ELF64: lwz
465 ; ELF64LE: fctiwuz
466 ; ELF64LE: stfd
467 ; ELF64LE: lwz
359468 ; PPC970: fctidz
360469 ; PPC970: stfd
361470 ; PPC970: lwz
366475 define void @fptoui_float_i64(float %a) nounwind ssp {
367476 entry:
368477 ; ELF64: fptoui_float_i64
478 ; ELF64LE: fptoui_float_i64
369479 ; PPC970: fptoui_float_i64
370480 %b.addr = alloca i64, align 4
371481 %conv = fptoui float %a to i64
372482 ; ELF64: fctiduz
373483 ; ELF64: stfd
374484 ; ELF64: ld
485 ; ELF64LE: fctiduz
486 ; ELF64LE: stfd
487 ; ELF64LE: ld
375488 ; PPC970-NOT: fctiduz
376489 store i64 %conv, i64* %b.addr, align 4
377490 ret void
380493 define void @fptoui_double_i32(double %a) nounwind ssp {
381494 entry:
382495 ; ELF64: fptoui_double_i32
496 ; ELF64LE: fptoui_double_i32
383497 ; PPC970: fptoui_double_i32
384498 %b.addr = alloca i32, align 8
385499 %conv = fptoui double %a to i32
386500 ; ELF64: fctiwuz
387501 ; ELF64: stfd
388502 ; ELF64: lwz
503 ; ELF64LE: fctiwuz
504 ; ELF64LE: stfd
505 ; ELF64LE: lwz
389506 ; PPC970: fctidz
390507 ; PPC970: stfd
391508 ; PPC970: lwz
396513 define void @fptoui_double_i64(double %a) nounwind ssp {
397514 entry:
398515 ; ELF64: fptoui_double_i64
516 ; ELF64LE: fptoui_double_i64
399517 ; PPC970: fptoui_double_i64
400518 %b.addr = alloca i64, align 8
401519 %conv = fptoui double %a to i64
402520 ; ELF64: fctiduz
403521 ; ELF64: stfd
404522 ; ELF64: ld
523 ; ELF64LE: fctiduz
524 ; ELF64LE: stfd
525 ; ELF64LE: ld
405526 ; PPC970-NOT: fctiduz
406527 store i64 %conv, i64* %b.addr, align 8
407528 ret void