llvm.org GIT mirror llvm / 11ba3b1
Reapply r54147 with a constraint to only use the 8-bit subreg form on x86-64, to avoid the problem with x86-32 having GPRs that don't have 8-bit subregs. Also, change several 16-bit instructions to use equivalent 32-bit instructions. These have a smaller encoding and avoid partial-register updates. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54223 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 11 years ago
5 changed file(s) with 153 addition(s) and 25 deletion(s). Raw diff Collapse all Expand all
240240 "movs{lq|xd}\t{$src, $dst|$dst, $src}",
241241 [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
242242
243 def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
244 "movz{bq|x}\t{$src, $dst|$dst, $src}",
245 [(set GR64:$dst, (zext GR8:$src))]>, TB;
246 def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
247 "movz{bq|x}\t{$src, $dst|$dst, $src}",
248 [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
249 def MOVZX64rr16: RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
250 "movz{wq|x}\t{$src, $dst|$dst, $src}",
251 [(set GR64:$dst, (zext GR16:$src))]>, TB;
252 def MOVZX64rm16: RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
253 "movz{wq|x}\t{$src, $dst|$dst, $src}",
254 [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
243 // Use movzbl instead of movzbq when the destination is a register; it's
244 // equivalent due to implicit zero-extending, and it has a smaller encoding.
245 def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
246 "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
247 [(set GR64:$dst, (zext GR8:$src))]>, TB;
248 def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
249 "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
250 [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
251 // Use movzwl instead of movzwq when the destination is a register; it's
252 // equivalent due to implicit zero-extending, and it has a smaller encoding.
253 def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
254 "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
255 [(set GR64:$dst, (zext GR16:$src))]>, TB;
256 def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
257 "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
258 [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
255259
256260 let neverHasSideEffects = 1 in {
257261 let Defs = [RAX], Uses = [EAX] in
10921096 // when we have a better way to specify isel priority.
10931097 let Defs = [EFLAGS], AddedComplexity = 1,
10941098 isReMaterializable = 1, isAsCheapAsAMove = 1 in
1095 def MOV64r0 : RI<0x31, MRMInitReg, (outs GR64:$dst), (ins),
1096 "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
1097 [(set GR64:$dst, 0)]>;
1099 def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
1100 "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
1101 [(set GR64:$dst, 0)]>;
10981102
10991103 // Materialize i64 constant where top 32-bits are zero.
11001104 let AddedComplexity = 1, isReMaterializable = 1 in
12391243 (SUBREG_TO_REG (i64 0),
12401244 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)),
12411245 x86_subreg_32bit)>;
1246 // r & (2^16-1) ==> movz
1247 def : Pat<(and GR64:$src, 0xffff),
1248 (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
1249 // r & (2^8-1) ==> movz
1250 def : Pat<(and GR64:$src, 0xff),
1251 (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
1252
1253 // TODO: The following two patterns could be adapted to apply to x86-32, except
1254 // that they'll need some way to deal with the fact that in x86-32 not all GPRs
1255 // have 8-bit subregs. The GR32_ and GR16_ classes are a step in this direction,
1256 // but they aren't ready for this purpose yet.
1257
1258 // r & (2^8-1) ==> movz
1259 def : Pat<(and GR32:$src1, 0xff),
1260 (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit)))>,
1261 Requires<[In64BitMode]>;
1262 // r & (2^8-1) ==> movz
1263 def : Pat<(and GR16:$src1, 0xff),
1264 (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>,
1265 Requires<[In64BitMode]>;
12421266
12431267 // (shl x, 1) ==> (add x, x)
12441268 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
24232423 } // Defs = [EFLAGS]
24242424
24252425 // Sign/Zero extenders
2426 // Use movsbl intead of movsbw; we don't care about the high 16 bits
2427 // of the register here. This has a smaller encoding and avoids a
2428 // partial-register update.
24262429 def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
2427 "movs{bw|x}\t{$src, $dst|$dst, $src}",
2428 [(set GR16:$dst, (sext GR8:$src))]>, TB, OpSize;
2430 "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
2431 [(set GR16:$dst, (sext GR8:$src))]>, TB;
24292432 def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
2430 "movs{bw|x}\t{$src, $dst|$dst, $src}",
2431 [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB, OpSize;
2433 "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
2434 [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
24322435 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
24332436 "movs{bl|x}\t{$src, $dst|$dst, $src}",
24342437 [(set GR32:$dst, (sext GR8:$src))]>, TB;
24422445 "movs{wl|x}\t{$src, $dst|$dst, $src}",
24432446 [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
24442447
2448 // Use movzbl intead of movzbw; we don't care about the high 16 bits
2449 // of the register here. This has a smaller encoding and avoids a
2450 // partial-register update.
24452451 def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
2446 "movz{bw|x}\t{$src, $dst|$dst, $src}",
2447 [(set GR16:$dst, (zext GR8:$src))]>, TB, OpSize;
2452 "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
2453 [(set GR16:$dst, (zext GR8:$src))]>, TB;
24482454 def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
2449 "movz{bw|x}\t{$src, $dst|$dst, $src}",
2450 [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB, OpSize;
2455 "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
2456 [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
24512457 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
24522458 "movz{bl|x}\t{$src, $dst|$dst, $src}",
24532459 [(set GR32:$dst, (zext GR8:$src))]>, TB;
24872493 def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
24882494 "xor{b}\t$dst, $dst",
24892495 [(set GR8:$dst, 0)]>;
2496 // Use xorl instead of xorw since we don't care about the high 16 bits,
2497 // it's smaller, and it avoids a partial-register update.
24902498 def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
2491 "xor{w}\t$dst, $dst",
2492 [(set GR16:$dst, 0)]>, OpSize;
2499 "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
2500 [(set GR16:$dst, 0)]>;
24932501 def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
24942502 "xor{l}\t$dst, $dst",
24952503 [(set GR32:$dst, 0)]>;
27622770 // Some peepholes
27632771 //===----------------------------------------------------------------------===//
27642772
2773 // r & (2^16-1) ==> movz
2774 def : Pat<(and GR32:$src1, 0xffff),
2775 (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>;
2776
27652777 // (shl x, 1) ==> (add x, x)
27662778 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
27672779 def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
0 ; RUN: llvm-as < %s | llc -march=x86 | not grep and
1 ; RUN: llvm-as < %s | llc -march=x86-64 > %t
2 ; RUN: not grep and %t
3 ; RUN: not grep movzbq %t
4 ; RUN: not grep movzwq %t
5 ; RUN: not grep movzlq %t
6
7 ; These should use movzbl instead of 'and 255'.
8 ; This related to not having a ZERO_EXTEND_REG opcode.
9
10 define i32 @c(i32 %d) nounwind {
11 %e = add i32 %d, 1
12 %retval = and i32 %e, 65535
13 ret i32 %retval
14 }
15 define i64 @e(i64 %d) nounwind {
16 %e = add i64 %d, 1
17 %retval = and i64 %e, 65535
18 ret i64 %retval
19 }
20 define i64 @f(i64 %d) nounwind {
21 %e = add i64 %d, 1
22 %retval = and i64 %e, 4294967295
23 ret i64 %retval
24 }
25
26 define i32 @g(i8 %d) nounwind {
27 %e = add i8 %d, 1
28 %retval = zext i8 %e to i32
29 ret i32 %retval
30 }
31 define i32 @h(i16 %d) nounwind {
32 %e = add i16 %d, 1
33 %retval = zext i16 %e to i32
34 ret i32 %retval
35 }
36 define i64 @i(i8 %d) nounwind {
37 %e = add i8 %d, 1
38 %retval = zext i8 %e to i64
39 ret i64 %retval
40 }
41 define i64 @j(i16 %d) nounwind {
42 %e = add i16 %d, 1
43 %retval = zext i16 %e to i64
44 ret i64 %retval
45 }
46 define i64 @k(i32 %d) nounwind {
47 %e = add i32 %d, 1
48 %retval = zext i32 %e to i64
49 ret i64 %retval
50 }
0 ; RUN: llvm-as < %s | llc -march=x86 | not grep and
1
2 ; These tests differ from the ones in zext-inreg-0.ll in that
3 ; on x86-64 they do require and instructions.
4
5 ; These should use movzbl instead of 'and 255'.
6 ; This related to not having ZERO_EXTEND_REG node.
7
8 define i64 @h(i64 %d) nounwind {
9 %e = add i64 %d, 1
10 %retval = and i64 %e, 281474976710655
11 ret i64 %retval
12 }
0 ; RUN: llvm-as < %s | llc -march=x86-64 > %t
1 ; RUN: not grep and %t
2 ; RUN: not grep movzbq %t
3 ; RUN: not grep movzwq %t
4 ; RUN: not grep movzlq %t
5
6 ; These should use movzbl instead of 'and 255'.
7 ; This related to not having a ZERO_EXTEND_REG opcode.
8
9 ; This test was split out of zext-inreg-0.ll because these
10 ; cases don't yet work on x86-32 due to the 8-bit subreg
11 ; issue.
12
13 define i32 @a(i32 %d) nounwind {
14 %e = add i32 %d, 1
15 %retval = and i32 %e, 255
16 ret i32 %retval
17 }
18 define i32 @b(float %d) nounwind {
19 %tmp12 = fptoui float %d to i8
20 %retval = zext i8 %tmp12 to i32
21 ret i32 %retval
22 }
23 define i64 @d(i64 %d) nounwind {
24 %e = add i64 %d, 1
25 %retval = and i64 %e, 255
26 ret i64 %retval
27 }