llvm.org GIT mirror llvm / d168cef
Add codegen patterns for VST1-lane instructions. Radar 8599955. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118176 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 9 years ago
4 changed file(s) with 84 addition(s) and 14 deletion(s). Raw diff Collapse all Expand all
11251125 nohash_imm:$lane), itin, "$addr.addr = $wb">;
11261126
11271127 // VST1LN : Vector Store (single element from one lane)
1128 class VST1LN op11_8, bits<4> op7_4, string Dt>
1128 class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1129 PatFrag StoreOp, SDNode ExtractOp>
11291130 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
11301131 (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
1131 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", []> {
1132 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
1133 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
11321134 let Rm = 0b1111;
11331135 }
1134
1135 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8"> {
1136 class VST1QLNPseudo
1137 : VSTQLNPseudo {
1138 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
1139 addrmode6:$addr)];
1140 }
1141
1142 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
1143 NEONvgetlaneu> {
11361144 let Inst{7-5} = lane{2-0};
11371145 }
1138 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16"> {
1146 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
1147 NEONvgetlaneu> {
11391148 let Inst{7-6} = lane{1-0};
11401149 let Inst{4} = Rn{5};
11411150 }
1142 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32"> {
1151 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
11431152 let Inst{7} = lane{0};
11441153 let Inst{5-4} = Rn{5-4};
11451154 }
11461155
1147 def VST1LNq8Pseudo : VSTQLNPseudo;
1148 def VST1LNq16Pseudo : VSTQLNPseudo;
1149 def VST1LNq32Pseudo : VSTQLNPseudo>;
1156 def VST1LNq8Pseudo : VST1QLNPseudo>;
1157 def VST1LNq16Pseudo : VST1QLNPseudo;
1158 def VST1LNq32Pseudo : VST1QLNPseudo;
11501159
11511160 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
11521161
101101 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
102102 %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
103103 %1 = extractelement <4 x i16> %0, i32 1 ; [#uses=1]
104 store i16 %1, i16* %out_uint16_t, align 2
104 %2 = add i16 %1, %1
105 store i16 %2, i16* %out_uint16_t, align 2
105106 br label %return
106107
107108 return: ; preds = %entry
116117 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
117118 %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
118119 %1 = extractelement <8 x i8> %0, i32 1 ; [#uses=1]
119 store i8 %1, i8* %out_uint8_t, align 1
120 %2 = add i8 %1, %1
121 store i8 %2, i8* %out_uint8_t, align 1
120122 br label %return
121123
122124 return: ; preds = %entry
131133 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
132134 %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
133135 %1 = extractelement <8 x i16> %0, i32 1 ; [#uses=1]
134 store i16 %1, i16* %out_uint16_t, align 2
136 %2 = add i16 %1, %1
137 store i16 %2, i16* %out_uint16_t, align 2
135138 br label %return
136139
137140 return: ; preds = %entry
146149 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
147150 %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
148151 %1 = extractelement <16 x i8> %0, i32 1 ; [#uses=1]
149 store i8 %1, i8* %out_uint8_t, align 1
152 %2 = add i8 %1, %1
153 store i8 %2, i8* %out_uint8_t, align 1
150154 br label %return
151155
152156 return: ; preds = %entry
2121
2222 define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
2323 ;CHECK: vld1lanei32:
24 ;Check the alignment value. Max for this instruction is 16 bits:
24 ;Check the alignment value. Max for this instruction is 32 bits:
2525 ;CHECK: vld1.32 {d16[1]}, [r0, :32]
2626 %tmp1 = load <2 x i32>* %B
2727 %tmp2 = load i32* %A, align 8
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vst1lanei8:
4 ;Check the (default) alignment.
5 ;CHECK: vst1.8 {d16[3]}, [r0]
6 %tmp1 = load <8 x i8>* %B
7 %tmp2 = extractelement <8 x i8> %tmp1, i32 3
8 store i8 %tmp2, i8* %A, align 8
9 ret void
10 }
11
12 define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
13 ;CHECK: vst1lanei16:
14 ;Check the alignment value. Max for this instruction is 16 bits:
15 ;CHECK: vst1.16 {d16[2]}, [r0, :16]
16 %tmp1 = load <4 x i16>* %B
17 %tmp2 = extractelement <4 x i16> %tmp1, i32 2
18 store i16 %tmp2, i16* %A, align 8
19 ret void
20 }
21
22 define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
23 ;CHECK: vst1lanei32:
24 ;Check the alignment value. Max for this instruction is 32 bits:
25 ;CHECK: vst1.32 {d16[1]}, [r0, :32]
26 %tmp1 = load <2 x i32>* %B
27 %tmp2 = extractelement <2 x i32> %tmp1, i32 1
28 store i32 %tmp2, i32* %A, align 8
29 ret void
30 }
31
32 define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
33 ;CHECK: vst1laneQi8:
34 ;CHECK: vst1.8 {d17[1]}, [r0]
35 %tmp1 = load <16 x i8>* %B
36 %tmp2 = extractelement <16 x i8> %tmp1, i32 9
37 store i8 %tmp2, i8* %A, align 8
38 ret void
39 }
40
41 define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
42 ;CHECK: vst1laneQi16:
43 ;CHECK: vst1.16 {d17[1]}, [r0, :16]
44 %tmp1 = load <8 x i16>* %B
45 %tmp2 = extractelement <8 x i16> %tmp1, i32 5
46 store i16 %tmp2, i16* %A, align 8
47 ret void
48 }
49
50 define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
51 ;CHECK: vst1laneQi32:
52 ;CHECK: vst1.32 {d17[1]}, [r0, :32]
53 %tmp1 = load <4 x i32>* %B
54 %tmp2 = extractelement <4 x i32> %tmp1, i32 3
55 store i32 %tmp2, i32* %A, align 8
56 ret void
57 }
158
259 define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
360 ;CHECK: vst2lanei8: