llvm.org GIT mirror llvm / a173035
Merging r199369: ------------------------------------------------------------------------ r199369 | jiangning.liu | 2014-01-16 04:16:13 -0500 (Thu, 16 Jan 2014) | 2 lines For ARM, fix assertuib failures for some ld/st 3/4 instruction with wirteback. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@205901 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
8 changed file(s) with 128 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
36833683 case ARM::VLD3d16Pseudo:
36843684 case ARM::VLD3d32Pseudo:
36853685 case ARM::VLD1d64TPseudo:
3686 case ARM::VLD1d64TPseudoWB_fixed:
36863687 case ARM::VLD3d8Pseudo_UPD:
36873688 case ARM::VLD3d16Pseudo_UPD:
36883689 case ARM::VLD3d32Pseudo_UPD:
36993700 case ARM::VLD4d16Pseudo:
37003701 case ARM::VLD4d32Pseudo:
37013702 case ARM::VLD1d64QPseudo:
3703 case ARM::VLD1d64QPseudoWB_fixed:
37023704 case ARM::VLD4d8Pseudo_UPD:
37033705 case ARM::VLD4d16Pseudo_UPD:
37043706 case ARM::VLD4d32Pseudo_UPD:
135135 { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
136136
137137 { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
138 { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
138139 { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
140 { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
139141
140142 { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
141143 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
10701072 case ARM::VLD3d16Pseudo:
10711073 case ARM::VLD3d32Pseudo:
10721074 case ARM::VLD1d64TPseudo:
1075 case ARM::VLD1d64TPseudoWB_fixed:
10731076 case ARM::VLD3d8Pseudo_UPD:
10741077 case ARM::VLD3d16Pseudo_UPD:
10751078 case ARM::VLD3d32Pseudo_UPD:
10861089 case ARM::VLD4d16Pseudo:
10871090 case ARM::VLD4d32Pseudo:
10881091 case ARM::VLD1d64QPseudo:
1092 case ARM::VLD1d64QPseudoWB_fixed:
10891093 case ARM::VLD4d8Pseudo_UPD:
10901094 case ARM::VLD4d16Pseudo_UPD:
10911095 case ARM::VLD4d32Pseudo_UPD:
16721672 return CurDAG->getTargetConstant(Alignment, MVT::i32);
16731673 }
16741674
1675 static bool isVLDfixed(unsigned Opc)
1676 {
1677 switch (Opc) {
1678 default: return false;
1679 case ARM::VLD1d8wb_fixed : return true;
1680 case ARM::VLD1d16wb_fixed : return true;
1681 case ARM::VLD1d64Qwb_fixed : return true;
1682 case ARM::VLD1d32wb_fixed : return true;
1683 case ARM::VLD1d64wb_fixed : return true;
1684 case ARM::VLD1d64TPseudoWB_fixed : return true;
1685 case ARM::VLD1d64QPseudoWB_fixed : return true;
1686 case ARM::VLD1q8wb_fixed : return true;
1687 case ARM::VLD1q16wb_fixed : return true;
1688 case ARM::VLD1q32wb_fixed : return true;
1689 case ARM::VLD1q64wb_fixed : return true;
1690 case ARM::VLD2d8wb_fixed : return true;
1691 case ARM::VLD2d16wb_fixed : return true;
1692 case ARM::VLD2d32wb_fixed : return true;
1693 case ARM::VLD2q8PseudoWB_fixed : return true;
1694 case ARM::VLD2q16PseudoWB_fixed : return true;
1695 case ARM::VLD2q32PseudoWB_fixed : return true;
1696 case ARM::VLD2DUPd8wb_fixed : return true;
1697 case ARM::VLD2DUPd16wb_fixed : return true;
1698 case ARM::VLD2DUPd32wb_fixed : return true;
1699 }
1700 }
1701
1702 static bool isVSTfixed(unsigned Opc)
1703 {
1704 switch (Opc) {
1705 default: return false;
1706 case ARM::VST1d8wb_fixed : return true;
1707 case ARM::VST1d16wb_fixed : return true;
1708 case ARM::VST1d32wb_fixed : return true;
1709 case ARM::VST1d64wb_fixed : return true;
1710 case ARM::VST1q8wb_fixed : return true;
1711 case ARM::VST1q16wb_fixed : return true;
1712 case ARM::VST1q32wb_fixed : return true;
1713 case ARM::VST1q64wb_fixed : return true;
1714 case ARM::VST1d64TPseudoWB_fixed : return true;
1715 case ARM::VST1d64QPseudoWB_fixed : return true;
1716 case ARM::VST2d8wb_fixed : return true;
1717 case ARM::VST2d16wb_fixed : return true;
1718 case ARM::VST2d32wb_fixed : return true;
1719 case ARM::VST2q8PseudoWB_fixed : return true;
1720 case ARM::VST2q16PseudoWB_fixed : return true;
1721 case ARM::VST2q32PseudoWB_fixed : return true;
1722 }
1723 }
1724
16751725 // Get the register stride update opcode of a VLD/VST instruction that
16761726 // is otherwise equivalent to the given fixed stride updating instruction.
16771727 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1728 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1729 && "Incorrect fixed stride updating instruction.");
16781730 switch (Opc) {
16791731 default: break;
16801732 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
16851737 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
16861738 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
16871739 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1740 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1741 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1742 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1743 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
16881744
16891745 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
16901746 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
17841840 SDValue Inc = N->getOperand(AddrOpIdx + 1);
17851841 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
17861842 // case entirely when the rest are updated to that form, too.
1787 if ((NumVecs == 1 || NumVecs == 2) && !isa(Inc.getNode()))
1843 if ((NumVecs <= 2) && !isa(Inc.getNode()))
17881844 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1789 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1845 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
17901846 // check for that explicitly too. Horribly hacky, but temporary.
1791 if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
1847 if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
17921848 !isa(Inc.getNode()))
17931849 Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc);
17941850 }
19361992 // case entirely when the rest are updated to that form, too.
19371993 if (NumVecs <= 2 && !isa(Inc.getNode()))
19381994 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1939 // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
1995 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
19401996 // check for that explicitly too. Horribly hacky, but temporary.
1941 if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
1942 !isa(Inc.getNode()))
1943 Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc);
1997 if (!isa(Inc.getNode()))
1998 Ops.push_back(Inc);
1999 else if (NumVecs > 2 && !isVSTfixed(Opc))
2000 Ops.push_back(Reg0);
19442001 }
19452002 Ops.push_back(SrcReg);
19462003 Ops.push_back(Pred);
28332890 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
28342891 ARM::VLD3d16Pseudo_UPD,
28352892 ARM::VLD3d32Pseudo_UPD,
2836 ARM::VLD1q64wb_fixed};
2893 ARM::VLD1d64TPseudoWB_fixed};
28372894 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
28382895 ARM::VLD3q16Pseudo_UPD,
28392896 ARM::VLD3q32Pseudo_UPD };
28472904 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
28482905 ARM::VLD4d16Pseudo_UPD,
28492906 ARM::VLD4d32Pseudo_UPD,
2850 ARM::VLD1q64wb_fixed};
2907 ARM::VLD1d64QPseudoWB_fixed};
28512908 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
28522909 ARM::VLD4q16Pseudo_UPD,
28532910 ARM::VLD4q32Pseudo_UPD };
729729 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
730730
731731 def VLD1d64TPseudo : VLDQQPseudo;
732 def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo;
733 def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo;
732734
733735 // ...with 4 registers
734736 class VLD1D4 op7_4, string Dt>
768770 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
769771
770772 def VLD1d64QPseudo : VLDQQPseudo;
773 def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo;
774 def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo;
771775
772776 // VLD2 : Vector Load (multiple 2-element structures)
773777 class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
16701674 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
16711675
16721676 def VST1d64TPseudo : VSTQQPseudo;
1673 def VST1d64TPseudoWB_fixed : VSTQQWBPseudo;
1677 def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo;
16741678 def VST1d64TPseudoWB_register : VSTQQWBPseudo;
16751679
16761680 // ...with 4 registers
17131717 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
17141718
17151719 def VST1d64QPseudo : VSTQQPseudo;
1716 def VST1d64QPseudoWB_fixed : VSTQQWBPseudo;
1720 def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo;
17171721 def VST1d64QPseudoWB_register : VSTQQWBPseudo;
17181722
17191723 // VST2 : Vector Store (multiple 2-element structures)
8282 ret <1 x i64> %tmp4
8383 }
8484
85 define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
86 ;CHECK-LABEL: vld3i64_update:
87 ;CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
88 %tmp0 = bitcast i64* %A to i8*
89 %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
90 %tmp5 = getelementptr i64* %A, i32 3
91 store i64* %tmp5, i64** %ptr
92 %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
93 %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
94 %tmp4 = add <1 x i64> %tmp2, %tmp3
95 ret <1 x i64> %tmp4
96 }
97
8598 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
8699 ;CHECK-LABEL: vld3Qi8:
87100 ;Check the alignment value. Max for this instruction is 64 bits:
8282 ret <1 x i64> %tmp4
8383 }
8484
85 define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
86 ;CHECK-LABEL: vld4i64_update:
87 ;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
88 %tmp0 = bitcast i64* %A to i8*
89 %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
90 %tmp5 = getelementptr i64* %A, i32 4
91 store i64* %tmp5, i64** %ptr
92 %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
93 %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
94 %tmp4 = add <1 x i64> %tmp2, %tmp3
95 ret <1 x i64> %tmp4
96 }
97
8598 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
8699 ;CHECK-LABEL: vld4Qi8:
87100 ;Check the alignment value. Max for this instruction is 256 bits:
5858 %tmp1 = load <1 x i64>* %B
5959 call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
6060 ret void
61 }
62
63 define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
64 ;CHECK-LABEL: vst3i64_update
65 ;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
66 %A = load i64** %ptr
67 %tmp0 = bitcast i64* %A to i8*
68 %tmp1 = load <1 x i64>* %B
69 call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
70 %tmp2 = getelementptr i64* %A, i32 3
71 store i64* %tmp2, i64** %ptr
72 ret void
6173 }
6274
6375 define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
5757 %tmp1 = load <1 x i64>* %B
5858 call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
5959 ret void
60 }
61
62 define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
63 ;CHECK-LABEL: vst4i64_update:
64 ;CHECK: vst1.64 {d16, d17, d18, d19}, [r1]!
65 %A = load i64** %ptr
66 %tmp0 = bitcast i64* %A to i8*
67 %tmp1 = load <1 x i64>* %B
68 call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
69 %tmp2 = getelementptr i64* %A, i32 4
70 store i64* %tmp2, i64** %ptr
71 ret void
6072 }
6173
6274 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {