llvm.org GIT mirror llvm / f98d763
[PPC CodeGen] Fix the bitreverse.i64 intrinsic. Summary: The two 32-bit words were swapped. Update a test omitted in reverted r316270. Reviewers: jtony, aaron.ballman Subscribers: nemanjai, kbarton Differential Revision: https://reviews.llvm.org/D39163 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316916 91177308-0d34-0410-b5e6-96231b3b80d8 Fangrui Song 2 years ago
3 changed file(s) with 84 addition(s) and 121 deletion(s). Raw diff Collapse all Expand all
45954595 // n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC);
45964596 // Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
45974597 // n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0);
4598 // Step 4: byte reverse (Suppose n = [B1,B2,B3,B4,B5,B6,B7,B8]):
4598 // Step 4: byte reverse (Suppose n = [B0,B1,B2,B3,B4,B5,B6,B7]):
45994599 // Apply the same byte reverse algorithm mentioned above for the fast 32-bit
46004600 // reverse to both the high 32 bit and low 32 bit of the 64 bit value. And
46014601 // then OR them together to get the final result.
46174617 dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0);
46184618 }
46194619
4620 def DWShift1 {
4621 dag Right = (RLDICL $A, 63, 1);
4622 dag Left = (RLDICR $A, 1, 62);
4623 }
4624
4625 def DWSwap1 {
4626 dag Bit = (OR8 (AND8 DWShift1.Right, DWMaskValues.Lo1),
4627 (AND8 DWShift1.Left, DWMaskValues.Hi1));
4628 }
4629
4630 def DWShift2 {
4631 dag Right = (RLDICL DWSwap1.Bit, 62, 2);
4632 dag Left = (RLDICR DWSwap1.Bit, 2, 61);
4633 }
4634
4635 def DWSwap2 {
4636 dag Bits = (OR8 (AND8 DWShift2.Right, DWMaskValues.Lo2),
4637 (AND8 DWShift2.Left, DWMaskValues.Hi2));
4638 }
4639
4640 def DWShift4 {
4641 dag Right = (RLDICL DWSwap2.Bits, 60, 4);
4642 dag Left = (RLDICR DWSwap2.Bits, 4, 59);
4643 }
4644
4645 def DWSwap4 {
4646 dag Bits = (OR8 (AND8 DWShift4.Right, DWMaskValues.Lo4),
4647 (AND8 DWShift4.Left, DWMaskValues.Hi4));
4648 }
4649
4650 // Bit swap is done, now start byte swap.
4651 def DWExtractLo32 {
4652 dag SubReg = (i32 (EXTRACT_SUBREG DWSwap4.Bits, sub_32));
4653 }
4654
4655 def DWRotateLo32 {
4656 dag Left24 = (RLWINM DWExtractLo32.SubReg, 24, 0, 31);
4657 }
4658
4659 def DWLo32RotateInsertByte3 {
4660 dag Left = (RLWIMI DWRotateLo32.Left24, DWExtractLo32.SubReg, 8, 8, 15);
4661 }
4662
4663 // Lower 32 bits in the right order
4664 def DWLo32RotateInsertByte1 {
4665 dag Left =
4666 (RLWIMI DWLo32RotateInsertByte3.Left, DWExtractLo32.SubReg, 8, 24, 31);
4667 }
4668
4669 def ExtendLo32 {
4670 dag To64Bit =
4671 (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
4672 DWLo32RotateInsertByte1.Left, sub_32));
4673 }
4674
4675 def DWShiftHi32 { // SRDI DWSwap4.Bits, 32)
4676 dag ToLo32 = (RLDICL DWSwap4.Bits, 32, 32);
4677 }
4678
4679 def DWExtractHi32 {
4680 dag SubReg = (i32 (EXTRACT_SUBREG DWShiftHi32.ToLo32, sub_32));
4681 }
4682
4683 def DWRotateHi32 {
4684 dag Left24 = (RLWINM DWExtractHi32.SubReg, 24, 0, 31);
4685 }
4686
4687 def DWHi32RotateInsertByte3 {
4688 dag Left = (RLWIMI DWRotateHi32.Left24, DWExtractHi32.SubReg, 8, 8, 15);
4689 }
4690
4691 // High 32 bits in the right order, but in the low 32-bit position
4692 def DWHi32RotateInsertByte1 {
4693 dag Left =
4694 (RLWIMI DWHi32RotateInsertByte3.Left, DWExtractHi32.SubReg, 8, 24, 31);
4695 }
4696
4697 def ExtendHi32 {
4698 dag To64Bit =
4699 (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
4700 DWHi32RotateInsertByte1.Left, sub_32));
4701 }
4702
4703 def DWShiftLo32 { // SLDI ExtendHi32.To64Bit, 32
4704 dag ToHi32 = (RLDICR ExtendHi32.To64Bit, 32, 31);
4705 }
4706
4620 def DWSwapInByte {
4621 dag Swap1 = (OR8 (AND8 (RLDICL $A, 63, 1), DWMaskValues.Lo1),
4622 (AND8 (RLDICR $A, 1, 62), DWMaskValues.Hi1));
4623 dag Swap2 = (OR8 (AND8 (RLDICL DWSwapInByte.Swap1, 62, 2), DWMaskValues.Lo2),
4624 (AND8 (RLDICR DWSwapInByte.Swap1, 2, 61), DWMaskValues.Hi2));
4625 dag Swap4 = (OR8 (AND8 (RLDICL DWSwapInByte.Swap2, 60, 4), DWMaskValues.Lo4),
4626 (AND8 (RLDICR DWSwapInByte.Swap2, 4, 59), DWMaskValues.Hi4));
4627 }
4628
4629 // Intra-byte swap is done, now start inter-byte swap.
4630 def DWBytes4567 {
4631 dag Word = (i32 (EXTRACT_SUBREG DWSwapInByte.Swap4, sub_32));
4632 }
4633
4634 def DWBytes7456 {
4635 dag Word = (RLWINM DWBytes4567.Word, 24, 0, 31);
4636 }
4637
4638 def DWBytes7656 {
4639 dag Word = (RLWIMI DWBytes7456.Word, DWBytes4567.Word, 8, 8, 15);
4640 }
4641
4642 // B7 B6 B5 B4 in the right order
4643 def DWBytes7654 {
4644 dag Word = (RLWIMI DWBytes7656.Word, DWBytes4567.Word, 8, 24, 31);
4645 dag DWord =
4646 (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), DWBytes7654.Word, sub_32));
4647 }
4648
4649 def DWBytes0123 {
4650 dag Word = (i32 (EXTRACT_SUBREG (RLDICL DWSwapInByte.Swap4, 32, 32), sub_32));
4651 }
4652
4653 def DWBytes3012 {
4654 dag Word = (RLWINM DWBytes0123.Word, 24, 0, 31);
4655 }
4656
4657 def DWBytes3212 {
4658 dag Word = (RLWIMI DWBytes3012.Word, DWBytes0123.Word, 8, 8, 15);
4659 }
4660
4661 // B3 B2 B1 B0 in the right order
4662 def DWBytes3210 {
4663 dag Word = (RLWIMI DWBytes3212.Word, DWBytes0123.Word, 8, 24, 31);
4664 dag DWord =
4665 (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), DWBytes3210.Word, sub_32));
4666 }
4667
4668 // Now both high word and low word are reversed, next
4669 // swap the high word and low word.
47074670 def : Pat<(i64 (bitreverse i64:$A)),
4708 (OR8 DWShiftLo32.ToHi32, ExtendLo32.To64Bit)>;
4671 (OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>;
9090 ; CHECK-NEXT: and 4, 8, 4
9191 ; CHECK-NEXT: lis 7, 3855
9292 ; CHECK-NEXT: or 3, 3, 4
93 ; CHECK-NEXT: oris 12, 5, 52428
94 ; CHECK-NEXT: oris 9, 6, 13107
93 ; CHECK-NEXT: oris 9, 5, 52428
94 ; CHECK-NEXT: oris 10, 6, 13107
9595 ; CHECK-NEXT: lis 6, -3856
9696 ; CHECK-NEXT: ori 7, 7, 3855
9797 ; CHECK-NEXT: sldi 8, 3, 2
98 ; CHECK-NEXT: ori 4, 12, 52428
98 ; CHECK-NEXT: ori 4, 9, 52428
9999 ; CHECK-NEXT: rldicl 3, 3, 62, 2
100 ; CHECK-NEXT: ori 5, 9, 13107
100 ; CHECK-NEXT: ori 5, 10, 13107
101101 ; CHECK-NEXT: ori 6, 6, 61680
102102 ; CHECK-NEXT: and 3, 3, 5
103103 ; CHECK-NEXT: sldi 5, 6, 32
104104 ; CHECK-NEXT: and 4, 8, 4
105105 ; CHECK-NEXT: sldi 6, 7, 32
106106 ; CHECK-NEXT: or 3, 3, 4
107 ; CHECK-NEXT: oris 10, 5, 61680
108 ; CHECK-NEXT: oris 11, 6, 3855
107 ; CHECK-NEXT: oris 11, 5, 61680
108 ; CHECK-NEXT: oris 12, 6, 3855
109109 ; CHECK-NEXT: sldi 6, 3, 4
110 ; CHECK-NEXT: ori 4, 10, 61680
110 ; CHECK-NEXT: ori 4, 11, 61680
111111 ; CHECK-NEXT: rldicl 3, 3, 60, 4
112 ; CHECK-NEXT: ori 5, 11, 3855
112 ; CHECK-NEXT: ori 5, 12, 3855
113113 ; CHECK-NEXT: and 4, 6, 4
114114 ; CHECK-NEXT: and 3, 3, 5
115115 ; CHECK-NEXT: or 3, 3, 4
116 ; CHECK-NEXT: rlwinm 5, 3, 24, 0, 31
116117 ; CHECK-NEXT: rldicl 4, 3, 32, 32
117 ; CHECK-NEXT: rlwinm 6, 3, 24, 0, 31
118 ; CHECK-NEXT: rlwinm 5, 4, 24, 0, 31
119 ; CHECK-NEXT: rlwimi 6, 3, 8, 8, 15
120 ; CHECK-NEXT: rlwimi 5, 4, 8, 8, 15
121 ; CHECK-NEXT: rlwimi 6, 3, 8, 24, 31
122 ; CHECK-NEXT: rlwimi 5, 4, 8, 24, 31
123 ; CHECK-NEXT: sldi 12, 5, 32
124 ; CHECK-NEXT: or 3, 12, 6
118 ; CHECK-NEXT: rlwinm 6, 4, 24, 0, 31
119 ; CHECK-NEXT: rlwimi 5, 3, 8, 8, 15
120 ; CHECK-NEXT: rlwimi 6, 4, 8, 8, 15
121 ; CHECK-NEXT: rlwimi 5, 3, 8, 24, 31
122 ; CHECK-NEXT: rlwimi 6, 4, 8, 24, 31
123 ; CHECK-NEXT: sldi 3, 5, 32
124 ; CHECK-NEXT: or 3, 3, 6
125125 ; CHECK-NEXT: blr
126126 entry:
127127 %shr = lshr i64 %n, 1
6666 ; CHECK-NEXT: and 4, 8, 4
6767 ; CHECK-NEXT: lis 7, 3855
6868 ; CHECK-NEXT: or 3, 3, 4
69 ; CHECK-NEXT: oris 12, 5, 52428
70 ; CHECK-NEXT: oris 9, 6, 13107
69 ; CHECK-NEXT: oris 9, 5, 52428
70 ; CHECK-NEXT: oris 10, 6, 13107
7171 ; CHECK-NEXT: lis 6, -3856
7272 ; CHECK-NEXT: ori 7, 7, 3855
7373 ; CHECK-NEXT: sldi 8, 3, 2
74 ; CHECK-NEXT: ori 4, 12, 52428
74 ; CHECK-NEXT: ori 4, 9, 52428
7575 ; CHECK-NEXT: rldicl 3, 3, 62, 2
76 ; CHECK-NEXT: ori 5, 9, 13107
76 ; CHECK-NEXT: ori 5, 10, 13107
7777 ; CHECK-NEXT: ori 6, 6, 61680
7878 ; CHECK-NEXT: and 3, 3, 5
7979 ; CHECK-NEXT: sldi 5, 6, 32
8080 ; CHECK-NEXT: and 4, 8, 4
8181 ; CHECK-NEXT: sldi 6, 7, 32
8282 ; CHECK-NEXT: or 3, 3, 4
83 ; CHECK-NEXT: oris 10, 5, 61680
84 ; CHECK-NEXT: oris 11, 6, 3855
83 ; CHECK-NEXT: oris 11, 5, 61680
84 ; CHECK-NEXT: oris 12, 6, 3855
8585 ; CHECK-NEXT: sldi 6, 3, 4
86 ; CHECK-NEXT: ori 4, 10, 61680
86 ; CHECK-NEXT: ori 4, 11, 61680
8787 ; CHECK-NEXT: rldicl 3, 3, 60, 4
88 ; CHECK-NEXT: ori 5, 11, 3855
88 ; CHECK-NEXT: ori 5, 12, 3855
8989 ; CHECK-NEXT: and 4, 6, 4
9090 ; CHECK-NEXT: and 3, 3, 5
9191 ; CHECK-NEXT: or 3, 3, 4
92 ; CHECK-NEXT: rlwinm 5, 3, 24, 0, 31
9293 ; CHECK-NEXT: rldicl 4, 3, 32, 32
93 ; CHECK-NEXT: rlwinm 6, 3, 24, 0, 31
94 ; CHECK-NEXT: rlwinm 5, 4, 24, 0, 31
95 ; CHECK-NEXT: rlwimi 6, 3, 8, 8, 15
96 ; CHECK-NEXT: rlwimi 5, 4, 8, 8, 15
97 ; CHECK-NEXT: rlwimi 6, 3, 8, 24, 31
98 ; CHECK-NEXT: rlwimi 5, 4, 8, 24, 31
99 ; CHECK-NEXT: sldi 12, 5, 32
100 ; CHECK-NEXT: or 3, 12, 6
94 ; CHECK-NEXT: rlwinm 6, 4, 24, 0, 31
95 ; CHECK-NEXT: rlwimi 5, 3, 8, 8, 15
96 ; CHECK-NEXT: rlwimi 6, 4, 8, 8, 15
97 ; CHECK-NEXT: rlwimi 5, 3, 8, 24, 31
98 ; CHECK-NEXT: rlwimi 6, 4, 8, 24, 31
99 ; CHECK-NEXT: sldi 3, 5, 32
100 ; CHECK-NEXT: or 3, 3, 6
101101 ; CHECK-NEXT: blr
102102 %res = call i64 @llvm.bitreverse.i64(i64 %arg)
103103 ret i64 %res