llvm.org GIT mirror llvm / fa16ddf
[ARM] Add a SelectTAddrModeImm7 for MVE narrow loads and stores We were previously using the SelectT2AddrModeImm7 for both normal and narrowing MVE loads/stores. As the narrowing instructions do not accept sp as a register, it makes little sense to optimise a FrameIndex into the load, only to have to recover that later on. This adds a SelectTAddrModeImm7 which does not do that folding, and uses it for narrowing load/store patterns. Differential Revision: https://reviews.llvm.org/D67489 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372134 91177308-0d34-0410-b5e6-96231b3b80d8 David Green 1 year, 13 days ago
3 changed file(s) with 70 addition(s) and 48 deletion(s). Raw diff Collapse all Expand all
138138 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
139139 SDValue &OffImm);
140140 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
141 template
142 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
141143
142144 // Thumb 2 Addressing Modes:
143145 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
11481150 }
11491151
11501152 return false;
1153 }
1154
1155 template
1156 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1157 SDValue &OffImm) {
1158 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1159 int RHSC;
1160 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1161 RHSC)) {
1162 Base = N.getOperand(0);
1163 if (N.getOpcode() == ISD::SUB)
1164 RHSC = -RHSC;
1165 OffImm =
1166 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1167 return true;
1168 }
1169 }
1170
1171 // Base only.
1172 Base = N;
1173 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1174 return true;
11511175 }
11521176
11531177
159159 let RenderMethod = "addMemImmOffsetOperands";
160160 }
161161
162 class taddrmode_imm7 : MemOperand {
162 class taddrmode_imm7 : MemOperand,
163 ComplexPattern", []> {
163164 let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand;
164165 // They are printed the same way as the T2 imm8 version
165166 let PrintMethod = "printT2AddrModeImm8Operand";
51565157 }
51575158
51585159 let Predicates = [HasMVEInt] in {
5159 def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
5160 (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
5161 def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
5162 (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
5163 def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
5164 (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
5160 def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr),
5161 (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>;
5162 def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr),
5163 (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>;
5164 def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr),
5165 (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>;
51655166
51665167 def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
51675168 (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
52035204 }
52045205
52055206 let Predicates = [HasMVEInt] in {
5206 defm : MVEExtLoad<"4", "32", "8", "B", "", t2addrmode_imm7<0>>;
5207 defm : MVEExtLoad<"8", "16", "8", "B", "", t2addrmode_imm7<0>>;
5208 defm : MVEExtLoad<"4", "32", "16", "H", "_align2", t2addrmode_imm7<1>>;
5207 defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>;
5208 defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>;
5209 defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>;
52095210 }
52105211
52115212
7676 ; CHECK-NEXT: push {r7, lr}
7777 ; CHECK-NEXT: .pad #8
7878 ; CHECK-NEXT: sub sp, #8
79 ; CHECK-NEXT: mov r0, sp
7980 ; CHECK-NEXT: vmov.i32 q0, #0x6
80 ; CHECK-NEXT: mov r0, sp
8181 ; CHECK-NEXT: vstrh.32 q0, [r0, #4]
82 ; CHECK-NEXT: mov r0, sp
8382 ; CHECK-NEXT: bl func
8483 ; CHECK-NEXT: add sp, #8
8584 ; CHECK-NEXT: pop {r7, pc}
10099 ; CHECK-NEXT: push {r7, lr}
101100 ; CHECK-NEXT: .pad #8
102101 ; CHECK-NEXT: sub sp, #8
102 ; CHECK-NEXT: add r0, sp, #4
103103 ; CHECK-NEXT: vmov.i32 q0, #0x6
104 ; CHECK-NEXT: mov r0, sp
105 ; CHECK-NEXT: vstrb.32 q0, [r0, #6]
106 ; CHECK-NEXT: add r0, sp, #4
104 ; CHECK-NEXT: vstrb.32 q0, [r0, #2]
107105 ; CHECK-NEXT: bl func
108106 ; CHECK-NEXT: add sp, #8
109107 ; CHECK-NEXT: pop {r7, pc}
124122 ; CHECK-NEXT: push {r7, lr}
125123 ; CHECK-NEXT: .pad #8
126124 ; CHECK-NEXT: sub sp, #8
127 ; CHECK-NEXT: vmov.i32 q0, #0x0
128 ; CHECK-NEXT: mov r0, sp
125 ; CHECK-NEXT: mov r0, sp
126 ; CHECK-NEXT: vmov.i32 q0, #0x0
129127 ; CHECK-NEXT: vstrb.16 q0, [r0, #2]
130 ; CHECK-NEXT: mov r0, sp
131128 ; CHECK-NEXT: bl func
132129 ; CHECK-NEXT: add sp, #8
133130 ; CHECK-NEXT: pop {r7, pc}
211208 define arm_aapcs_vfpcc <4 x i16> @vldrh32() {
212209 ; CHECK-LABEL: vldrh32:
213210 ; CHECK: @ %bb.0: @ %entry
214 ; CHECK-NEXT: .save {r7, lr}
215 ; CHECK-NEXT: push {r7, lr}
216 ; CHECK-NEXT: .pad #8
217 ; CHECK-NEXT: sub sp, #8
218 ; CHECK-NEXT: mov r0, sp
219 ; CHECK-NEXT: bl func
220 ; CHECK-NEXT: mov r0, sp
221 ; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
222 ; CHECK-NEXT: add sp, #8
223 ; CHECK-NEXT: pop {r7, pc}
211 ; CHECK-NEXT: .save {r4, lr}
212 ; CHECK-NEXT: push {r4, lr}
213 ; CHECK-NEXT: .pad #8
214 ; CHECK-NEXT: sub sp, #8
215 ; CHECK-NEXT: mov r4, sp
216 ; CHECK-NEXT: mov r0, r4
217 ; CHECK-NEXT: bl func
218 ; CHECK-NEXT: vldrh.u32 q0, [r4, #4]
219 ; CHECK-NEXT: add sp, #8
220 ; CHECK-NEXT: pop {r4, pc}
224221 entry:
225222 %d = alloca [4 x i16], align 2
226223 %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %d, i32 0, i32 0
234231 define arm_aapcs_vfpcc <4 x i8> @vldrb32() {
235232 ; CHECK-LABEL: vldrb32:
236233 ; CHECK: @ %bb.0: @ %entry
237 ; CHECK-NEXT: .save {r7, lr}
238 ; CHECK-NEXT: push {r7, lr}
239 ; CHECK-NEXT: .pad #8
240 ; CHECK-NEXT: sub sp, #8
241 ; CHECK-NEXT: add r0, sp, #4
242 ; CHECK-NEXT: bl func
243 ; CHECK-NEXT: mov r0, sp
244 ; CHECK-NEXT: vldrb.u32 q0, [r0, #6]
245 ; CHECK-NEXT: add sp, #8
246 ; CHECK-NEXT: pop {r7, pc}
234 ; CHECK-NEXT: .save {r4, lr}
235 ; CHECK-NEXT: push {r4, lr}
236 ; CHECK-NEXT: .pad #8
237 ; CHECK-NEXT: sub sp, #8
238 ; CHECK-NEXT: add r4, sp, #4
239 ; CHECK-NEXT: mov r0, r4
240 ; CHECK-NEXT: bl func
241 ; CHECK-NEXT: vldrb.u32 q0, [r4, #2]
242 ; CHECK-NEXT: add sp, #8
243 ; CHECK-NEXT: pop {r4, pc}
247244 entry:
248245 %d = alloca [4 x i8], align 2
249246 %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %d, i32 0, i32 0
257254 define arm_aapcs_vfpcc <8 x i8> @vldrb16() {
258255 ; CHECK-LABEL: vldrb16:
259256 ; CHECK: @ %bb.0: @ %entry
260 ; CHECK-NEXT: .save {r7, lr}
261 ; CHECK-NEXT: push {r7, lr}
262 ; CHECK-NEXT: .pad #8
263 ; CHECK-NEXT: sub sp, #8
264 ; CHECK-NEXT: mov r0, sp
265 ; CHECK-NEXT: bl func
266 ; CHECK-NEXT: mov r0, sp
267 ; CHECK-NEXT: vldrb.u16 q0, [r0, #2]
268 ; CHECK-NEXT: add sp, #8
269 ; CHECK-NEXT: pop {r7, pc}
257 ; CHECK-NEXT: .save {r4, lr}
258 ; CHECK-NEXT: push {r4, lr}
259 ; CHECK-NEXT: .pad #8
260 ; CHECK-NEXT: sub sp, #8
261 ; CHECK-NEXT: mov r4, sp
262 ; CHECK-NEXT: mov r0, r4
263 ; CHECK-NEXT: bl func
264 ; CHECK-NEXT: vldrb.u16 q0, [r4, #2]
265 ; CHECK-NEXT: add sp, #8
266 ; CHECK-NEXT: pop {r4, pc}
270267 entry:
271268 %d = alloca [8 x i8], align 2
272269 %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %d, i32 0, i32 0