llvm.org GIT mirror llvm / 4a05921
Merging r341642: ------------------------------------------------------------------------ r341642 | tnorthover | 2018-09-07 11:21:25 +0200 (Fri, 07 Sep 2018) | 8 lines ARM: fix Thumb2 CodeGen for ldrex with folded frame-index. Because t2LDREX (& t2STREX) were marked as AddrModeNone, but did allow a FrameIndex operand, rewriteT2FrameIndex asserted. This gives them a proper addressing-mode and tells the rewriter about it so that encodable offsets are exploited and others are rejected. Should fix PR38828. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_70@341783 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 2 years ago
7 changed file(s) with 133 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
15131513 break;
15141514 case ARMII::AddrMode5:
15151515 case ARMII::AddrModeT2_i8s4:
1516 case ARMII::AddrModeT2_ldrex:
15161517 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
15171518 break;
15181519 case ARMII::AddrModeT2_i12:
108108 def AddrModeT2_i8s4 : AddrMode<15>;
109109 def AddrMode_i12 : AddrMode<16>;
110110 def AddrMode5FP16 : AddrMode<17>;
111 def AddrModeT2_ldrex : AddrMode<18>;
111112
112113 // Load / store index mode.
113114 class IndexMode val> {
32663266 [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>,
32673267 Requires<[IsThumb, HasV8MBaseline]>;
32683268 def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
3269 AddrModeNone, 4, NoItinerary,
3269 AddrModeT2_ldrex, 4, NoItinerary,
32703270 "ldrex", "\t$Rt, $addr", "",
32713271 [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>,
32723272 Requires<[IsThumb, HasV8MBaseline]> {
33453345
33463346 def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
33473347 t2addrmode_imm0_1020s4:$addr),
3348 AddrModeNone, 4, NoItinerary,
3348 AddrModeT2_ldrex, 4, NoItinerary,
33493349 "strex", "\t$Rd, $Rt, $addr", "",
33503350 [(set rGPR:$Rd,
33513351 (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>,
200200 AddrModeT2_pc = 14, // +/- i12 for pc relative data
201201 AddrModeT2_i8s4 = 15, // i8 * 4
202202 AddrMode_i12 = 16,
203 AddrMode5FP16 = 17 // i8 * 2
203 AddrMode5FP16 = 17, // i8 * 2
204 AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst
204205 };
205206
206207 inline static const char *AddrModeToString(AddrMode addrmode) {
223224 case AddrModeT2_pc: return "AddrModeT2_pc";
224225 case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
225226 case AddrMode_i12: return "AddrMode_i12";
227 case AddrModeT2_ldrex:return "AddrModeT2_ldrex";
226228 }
227229 }
228230
620620 // MCInst operand expects already scaled value.
621621 Scale = 1;
622622 assert((Offset & 3) == 0 && "Can't encode this offset!");
623 } else if (AddrMode == ARMII::AddrModeT2_ldrex) {
624 Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
625 NumBits = 8; // 8 bits scaled by 4
626 Scale = 4;
627 assert((Offset & 3) == 0 && "Can't encode this offset!");
623628 } else {
624629 llvm_unreachable("Unsupported addressing mode!");
625630 }
0 ; RUN: llc -mtriple=thumbv7-linux-gnueabi -o - %s | FileCheck %s
1
2 ; This alloca is just large enough that FrameLowering decides it needs a frame
3 ; to guarantee access, based on the range of ldrex.
4
5 ; The actual alloca size is a bit of black magic, unfortunately: the real
6 ; maximum accessible is 1020, but FrameLowering adds 16 bytes to its estimated
7 ; stack size just because so the alloca is not actually the what the limit gets
8 ; compared to. The important point is that we don't go up to ~4096, which is the
9 ; default with no strange instructions.
10 define void @test_large_frame() {
11 ; CHECK-LABEL: test_large_frame:
12 ; CHECK: push
13 ; CHECK: sub.w sp, sp, #1004
14
15 %ptr = alloca i32, i32 251
16
17 %addr = getelementptr i32, i32* %ptr, i32 1
18 call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
19 ret void
20 }
21
22 ; This alloca is just is just the other side of the limit, so no frame
23 define void @test_small_frame() {
24 ; CHECK-LABEL: test_small_frame:
25 ; CHECK-NOT: push
26 ; CHECK: sub.w sp, sp, #1000
27
28 %ptr = alloca i32, i32 250
29
30 %addr = getelementptr i32, i32* %ptr, i32 1
31 call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
32 ret void
33 }
34
35 declare i32 @llvm.arm.ldrex.p0i32(i32*)
140140
141141 ret void
142142 }
143
144 define void @test_excl_addrmode_folded() {
145 ; CHECK-LABEL: test_excl_addrmode_folded:
146 %local = alloca i8, i32 4096
147
148 %local.0 = getelementptr i8, i8* %local, i32 4
149 %local32.0 = bitcast i8* %local.0 to i32*
150 call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
151 call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
152 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #4]
153 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #4]
154
155 %local.1 = getelementptr i8, i8* %local, i32 1020
156 %local32.1 = bitcast i8* %local.1 to i32*
157 call i32 @llvm.arm.ldrex.p0i32(i32* %local32.1)
158 call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.1)
159 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #1020]
160 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #1020]
161
162 ret void
163 }
164
165 define void @test_excl_addrmode_range() {
166 ; CHECK-LABEL: test_excl_addrmode_range:
167 %local = alloca i8, i32 4096
168
169 %local.0 = getelementptr i8, i8* %local, i32 1024
170 %local32.0 = bitcast i8* %local.0 to i32*
171 call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
172 call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
173 ; CHECK-T2ADDRMODE: mov r[[TMP:[0-9]+]], sp
174 ; CHECK-T2ADDRMODE: add.w r[[ADDR:[0-9]+]], r[[TMP]], #1024
175 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
176 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
177
178 ret void
179 }
180
181 define void @test_excl_addrmode_align() {
182 ; CHECK-LABEL: test_excl_addrmode_align:
183 %local = alloca i8, i32 4096
184
185 %local.0 = getelementptr i8, i8* %local, i32 2
186 %local32.0 = bitcast i8* %local.0 to i32*
187 call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
188 call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
189 ; CHECK-T2ADDRMODE: mov r[[ADDR:[0-9]+]], sp
190 ; CHECK-T2ADDRMODE: adds r[[ADDR:[0-9]+]], #2
191 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
192 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
193
194 ret void
195 }
196
197 define void @test_excl_addrmode_sign() {
198 ; CHECK-LABEL: test_excl_addrmode_sign:
199 %local = alloca i8, i32 4096
200
201 %local.0 = getelementptr i8, i8* %local, i32 -4
202 %local32.0 = bitcast i8* %local.0 to i32*
203 call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
204 call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
205 ; CHECK-T2ADDRMODE: mov r[[ADDR:[0-9]+]], sp
206 ; CHECK-T2ADDRMODE: subs r[[ADDR:[0-9]+]], #4
207 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
208 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
209
210 ret void
211 }
212
213 define void @test_excl_addrmode_combination() {
214 ; CHECK-LABEL: test_excl_addrmode_combination:
215 %local = alloca i8, i32 4096
216 %unused = alloca i8, i32 64
217
218 %local.0 = getelementptr i8, i8* %local, i32 4
219 %local32.0 = bitcast i8* %local.0 to i32*
220 call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
221 call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
222 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #68]
223 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #68]
224
225 ret void
226 }
227
143228
144229 ; LLVM should know, even across basic blocks, that ldrex is setting the high
145230 ; bits of its i32 to 0. There should be no zero-extend operation.