llvm.org GIT mirror llvm / af1cd03
[AArch64][LoadStoreOptimizer] Form LDPSW when possible. This patch adds the missing LD[U]RSW variants to the load store optimizer, so that we generate LDPSW when possible. <rdar://problem/19583480> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226978 91177308-0d34-0410-b5e6-96231b3b80d8 Quentin Colombet 5 years ago
2 changed file(s) with 100 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
133133 case AArch64::LDURWi:
134134 return true;
135135 case AArch64::LDURXi:
136 return true;
137 case AArch64::LDURSWi:
136138 return true;
137139 }
138140 }
172174 case AArch64::LDRXui:
173175 case AArch64::LDURXi:
174176 return 8;
177 case AArch64::LDRSWui:
178 case AArch64::LDURSWi:
179 return 4;
175180 }
176181 }
177182
209214 case AArch64::LDRXui:
210215 case AArch64::LDURXi:
211216 return AArch64::LDPXi;
217 case AArch64::LDRSWui:
218 case AArch64::LDURSWi:
219 return AArch64::LDPSWi;
212220 }
213221 }
214222
236244 return AArch64::LDRWpre;
237245 case AArch64::LDRXui:
238246 return AArch64::LDRXpre;
247 case AArch64::LDRSWui:
248 return AArch64::LDRSWpre;
239249 }
240250 }
241251
263273 return AArch64::LDRWpost;
264274 case AArch64::LDRXui:
265275 return AArch64::LDRXpost;
276 case AArch64::LDRSWui:
277 return AArch64::LDRSWpost;
266278 }
267279 }
268280
779791 case AArch64::LDRQui:
780792 case AArch64::LDRXui:
781793 case AArch64::LDRWui:
794 case AArch64::LDRSWui:
782795 // do the unscaled versions as well
783796 case AArch64::STURSi:
784797 case AArch64::STURDi:
789802 case AArch64::LDURDi:
790803 case AArch64::LDURQi:
791804 case AArch64::LDURWi:
792 case AArch64::LDURXi: {
805 case AArch64::LDURXi:
806 case AArch64::LDURSWi: {
793807 // If this is a volatile load/store, don't mess with it.
794808 if (MI->hasOrderedMemoryRef()) {
795809 ++MBBI;
1111 ret i32 %add
1212 }
1313
14 ; CHECK: ldp_sext_int
15 ; CHECK: ldpsw
16 define i64 @ldp_sext_int(i32* %p) nounwind {
17 %tmp = load i32* %p, align 4
18 %add.ptr = getelementptr inbounds i32* %p, i64 1
19 %tmp1 = load i32* %add.ptr, align 4
20 %sexttmp = sext i32 %tmp to i64
21 %sexttmp1 = sext i32 %tmp1 to i64
22 %add = add nsw i64 %sexttmp1, %sexttmp
23 ret i64 %add
24 }
25
1426 ; CHECK: ldp_long
1527 ; CHECK: ldp
1628 define i64 @ldp_long(i64* %p) nounwind {
5567 ret i32 %tmp3
5668 }
5769
70 define i64 @ldur_sext_int(i32* %a) nounwind {
71 ; LDUR_CHK: ldur_sext_int
72 ; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
73 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
74 ; LDUR_CHK-NEXT: ret
75 %p1 = getelementptr inbounds i32* %a, i32 -1
76 %tmp1 = load i32* %p1, align 2
77 %p2 = getelementptr inbounds i32* %a, i32 -2
78 %tmp2 = load i32* %p2, align 2
79 %sexttmp1 = sext i32 %tmp1 to i64
80 %sexttmp2 = sext i32 %tmp2 to i64
81 %tmp3 = add i64 %sexttmp1, %sexttmp2
82 ret i64 %tmp3
83 }
84
5885 define i64 @ldur_long(i64* %a) nounwind ssp {
5986 ; LDUR_CHK: ldur_long
6087 ; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
109136 ret i64 %tmp3
110137 }
111138
139 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
140 ; LDUR_CHK: pairUpBarelyInSext
141 ; LDUR_CHK-NOT: ldur
142 ; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
143 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
144 ; LDUR_CHK-NEXT: ret
145 %p1 = getelementptr inbounds i32* %a, i64 -63
146 %tmp1 = load i32* %p1, align 2
147 %p2 = getelementptr inbounds i32* %a, i64 -64
148 %tmp2 = load i32* %p2, align 2
149 %sexttmp1 = sext i32 %tmp1 to i64
150 %sexttmp2 = sext i32 %tmp2 to i64
151 %tmp3 = add i64 %sexttmp1, %sexttmp2
152 ret i64 %tmp3
153 }
154
112155 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
113156 ; LDUR_CHK: pairUpBarelyOut
114157 ; LDUR_CHK-NOT: ldp
121164 %p2 = getelementptr inbounds i64* %a, i64 -33
122165 %tmp2 = load i64* %p2, align 2
123166 %tmp3 = add i64 %tmp1, %tmp2
167 ret i64 %tmp3
168 }
169
170 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
171 ; LDUR_CHK: pairUpBarelyOutSext
172 ; LDUR_CHK-NOT: ldp
173 ; Don't be fragile about which loads or manipulations of the base register
174 ; are used---just check that there isn't an ldp before the add
175 ; LDUR_CHK: add
176 ; LDUR_CHK-NEXT: ret
177 %p1 = getelementptr inbounds i32* %a, i64 -64
178 %tmp1 = load i32* %p1, align 2
179 %p2 = getelementptr inbounds i32* %a, i64 -65
180 %tmp2 = load i32* %p2, align 2
181 %sexttmp1 = sext i32 %tmp1 to i64
182 %sexttmp2 = sext i32 %tmp2 to i64
183 %tmp3 = add i64 %sexttmp1, %sexttmp2
124184 ret i64 %tmp3
125185 }
126186
146206 %tmp3 = add i64 %tmp1, %tmp2
147207 ret i64 %tmp3
148208 }
209
210 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
211 ; LDUR_CHK: pairUpNotAlignedSext
212 ; LDUR_CHK-NOT: ldp
213 ; LDUR_CHK: ldursw
214 ; LDUR_CHK-NEXT: ldursw
215 ; LDUR_CHK-NEXT: add
216 ; LDUR_CHK-NEXT: ret
217 %p1 = getelementptr inbounds i32* %a, i64 -18
218 %bp1 = bitcast i32* %p1 to i8*
219 %bp1p1 = getelementptr inbounds i8* %bp1, i64 1
220 %dp1 = bitcast i8* %bp1p1 to i32*
221 %tmp1 = load i32* %dp1, align 1
222
223 %p2 = getelementptr inbounds i32* %a, i64 -17
224 %bp2 = bitcast i32* %p2 to i8*
225 %bp2p1 = getelementptr inbounds i8* %bp2, i64 1
226 %dp2 = bitcast i8* %bp2p1 to i32*
227 %tmp2 = load i32* %dp2, align 1
228
229 %sexttmp1 = sext i32 %tmp1 to i64
230 %sexttmp2 = sext i32 %tmp2 to i64
231 %tmp3 = add i64 %sexttmp1, %sexttmp2
232 ret i64 %tmp3
233 }