llvm.org GIT mirror llvm / 1fa5440
[x86] fold the mask op on 8- and 16-bit rotates Ref the post-commit thread for r310770: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20170807/478507.html The motivating cases as 'C' source examples can look like this: unsigned char rotate_right_8(unsigned char v, int shift) { // shift &= 7; v = ( v >> shift ) | ( v << ( 8 - shift ) ); return v; } https://godbolt.org/g/K6rc1A Notice that the source doesn't contain UB-safe masked shift amounts, but instcombine created those in order to produce narrow rotate patterns. This should be the last step needed to resolve PR34046: https://bugs.llvm.org/show_bug.cgi?id=34046 Differential Revision: https://reviews.llvm.org/D36644 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310849 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 2 years ago
2 changed file(s) with 39 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
16261626 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
16271627 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
16281628
1629 // Helper imms that check if a mask doesn't change significant shift bits.
1629 // Helper imms to check if a mask doesn't change significant shift/rotate bits.
1630 def immShift8 : ImmLeaf
1631 return countTrailingOnes(Imm) >= 3;
1632 }]>;
1633 def immShift16 : ImmLeaf
1634 return countTrailingOnes(Imm) >= 4;
1635 }]>;
16301636 def immShift32 : ImmLeaf
16311637 return countTrailingOnes(Imm) >= 5;
16321638 }]>;
16601666 defm : MaskedShiftAmountPats;
16611667 defm : MaskedShiftAmountPats;
16621668 defm : MaskedShiftAmountPats;
1663 defm : MaskedShiftAmountPats;
1664 defm : MaskedShiftAmountPats;
1669
1670 // ROL/ROR instructions allow a stronger mask optimization than shift for 8- and
1671 // 16-bit. We can remove a mask of any (bitwidth - 1) on the rotation amount
1672 // because over-rotating produces the same result. This is noted in the Intel
1673 // docs with: "tempCOUNT <- (COUNT & COUNTMASK) MOD SIZE". Masking the rotation
1674 // amount could affect EFLAGS results, but that does not matter because we are
1675 // not tracking flags for these nodes.
1676 multiclass MaskedRotateAmountPats {
1677 // (rot x (and y, BitWidth - 1)) ==> (rot x, y)
1678 def : Pat<(frag GR8:$src1, (and CL, immShift8)),
1679 (!cast(name # "8rCL") GR8:$src1)>;
1680 def : Pat<(frag GR16:$src1, (and CL, immShift16)),
1681 (!cast(name # "16rCL") GR16:$src1)>;
1682 def : Pat<(frag GR32:$src1, (and CL, immShift32)),
1683 (!cast(name # "32rCL") GR32:$src1)>;
1684 def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift8)), addr:$dst),
1685 (!cast(name # "8mCL") addr:$dst)>;
1686 def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift16)), addr:$dst),
1687 (!cast(name # "16mCL") addr:$dst)>;
1688 def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
1689 (!cast(name # "32mCL") addr:$dst)>;
1690
1691 // (rot x (and y, 63)) ==> (rot x, y)
1692 def : Pat<(frag GR64:$src1, (and CL, immShift64)),
1693 (!cast(name # "64rCL") GR64:$src1)>;
1694 def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst),
1695 (!cast(name # "64mCL") addr:$dst)>;
1696 }
1697
1698
1699 defm : MaskedRotateAmountPats;
1700 defm : MaskedRotateAmountPats;
16651701
16661702 // Double shift amount is implicitly masked.
16671703 multiclass MaskedDoubleShiftAmountPats {
143143 define i8 @rotate_left_8(i8 %x, i32 %amount) {
144144 ; CHECK-LABEL: rotate_left_8:
145145 ; CHECK: # BB#0:
146 ; CHECK-NEXT: andb $7, %sil
147146 ; CHECK-NEXT: movl %esi, %ecx
148147 ; CHECK-NEXT: rolb %cl, %dil
149148 ; CHECK-NEXT: movl %edi, %eax
161160 define i8 @rotate_right_8(i8 %x, i32 %amount) {
162161 ; CHECK-LABEL: rotate_right_8:
163162 ; CHECK: # BB#0:
164 ; CHECK-NEXT: andb $7, %sil
165163 ; CHECK-NEXT: movl %esi, %ecx
166164 ; CHECK-NEXT: rorb %cl, %dil
167165 ; CHECK-NEXT: movl %edi, %eax
179177 define i16 @rotate_left_16(i16 %x, i32 %amount) {
180178 ; CHECK-LABEL: rotate_left_16:
181179 ; CHECK: # BB#0:
182 ; CHECK-NEXT: andb $15, %sil
183180 ; CHECK-NEXT: movl %esi, %ecx
184181 ; CHECK-NEXT: rolw %cl, %di
185182 ; CHECK-NEXT: movl %edi, %eax
197194 define i16 @rotate_right_16(i16 %x, i32 %amount) {
198195 ; CHECK-LABEL: rotate_right_16:
199196 ; CHECK: # BB#0:
200 ; CHECK-NEXT: andb $15, %sil
201197 ; CHECK-NEXT: movl %esi, %ecx
202198 ; CHECK-NEXT: rorw %cl, %di
203199 ; CHECK-NEXT: movl %edi, %eax
215211 define void @rotate_left_m8(i8* %p, i32 %amount) {
216212 ; CHECK-LABEL: rotate_left_m8:
217213 ; CHECK: # BB#0:
218 ; CHECK-NEXT: andb $7, %sil
219214 ; CHECK-NEXT: movl %esi, %ecx
220215 ; CHECK-NEXT: rolb %cl, (%rdi)
221216 ; CHECK-NEXT: retq
234229 define void @rotate_right_m8(i8* %p, i32 %amount) {
235230 ; CHECK-LABEL: rotate_right_m8:
236231 ; CHECK: # BB#0:
237 ; CHECK-NEXT: andb $7, %sil
238232 ; CHECK-NEXT: movl %esi, %ecx
239233 ; CHECK-NEXT: rorb %cl, (%rdi)
240234 ; CHECK-NEXT: retq
253247 define void @rotate_left_m16(i16* %p, i32 %amount) {
254248 ; CHECK-LABEL: rotate_left_m16:
255249 ; CHECK: # BB#0:
256 ; CHECK-NEXT: andb $15, %sil
257250 ; CHECK-NEXT: movl %esi, %ecx
258251 ; CHECK-NEXT: rolw %cl, (%rdi)
259252 ; CHECK-NEXT: retq
272265 define void @rotate_right_m16(i16* %p, i32 %amount) {
273266 ; CHECK-LABEL: rotate_right_m16:
274267 ; CHECK: # BB#0:
275 ; CHECK-NEXT: andb $15, %sil
276268 ; CHECK-NEXT: movl %esi, %ecx
277269 ; CHECK-NEXT: rorw %cl, (%rdi)
278270 ; CHECK-NEXT: retq