llvm.org GIT mirror llvm / c9d62b3
[SelectionDAG] Add tests for LKK algorithm Added some tests testing urem and srem operations with a constant divisor. Patch by TG908 (Tim Gymnich) Differential Revision: https://reviews.llvm.org/D68421 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373830 91177308-0d34-0410-b5e6-96231b3b80d8 David Bolvansky 4 months ago
16 changed file(s) with 9357 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
2
3 define i32 @fold_srem_positive_odd(i32 %x) {
4 ; CHECK-LABEL: fold_srem_positive_odd:
5 ; CHECK: // %bb.0:
6 ; CHECK-NEXT: mov w8, #37253
7 ; CHECK-NEXT: movk w8, #44150, lsl #16
8 ; CHECK-NEXT: smull x8, w0, w8
9 ; CHECK-NEXT: lsr x8, x8, #32
10 ; CHECK-NEXT: add w8, w8, w0
11 ; CHECK-NEXT: asr w9, w8, #6
12 ; CHECK-NEXT: add w8, w9, w8, lsr #31
13 ; CHECK-NEXT: mov w9, #95
14 ; CHECK-NEXT: msub w0, w8, w9, w0
15 ; CHECK-NEXT: ret
16 %1 = srem i32 %x, 95
17 ret i32 %1
18 }
19
20
21 define i32 @fold_srem_positive_even(i32 %x) {
22 ; CHECK-LABEL: fold_srem_positive_even:
23 ; CHECK: // %bb.0:
24 ; CHECK-NEXT: mov w8, #36849
25 ; CHECK-NEXT: movk w8, #15827, lsl #16
26 ; CHECK-NEXT: smull x8, w0, w8
27 ; CHECK-NEXT: lsr x9, x8, #63
28 ; CHECK-NEXT: asr x8, x8, #40
29 ; CHECK-NEXT: add w8, w8, w9
30 ; CHECK-NEXT: mov w9, #1060
31 ; CHECK-NEXT: msub w0, w8, w9, w0
32 ; CHECK-NEXT: ret
33 %1 = srem i32 %x, 1060
34 ret i32 %1
35 }
36
37
38 define i32 @fold_srem_negative_odd(i32 %x) {
39 ; CHECK-LABEL: fold_srem_negative_odd:
40 ; CHECK: // %bb.0:
41 ; CHECK-NEXT: mov w8, #65445
42 ; CHECK-NEXT: movk w8, #42330, lsl #16
43 ; CHECK-NEXT: smull x8, w0, w8
44 ; CHECK-NEXT: lsr x9, x8, #63
45 ; CHECK-NEXT: asr x8, x8, #40
46 ; CHECK-NEXT: add w8, w8, w9
47 ; CHECK-NEXT: mov w9, #-723
48 ; CHECK-NEXT: msub w0, w8, w9, w0
49 ; CHECK-NEXT: ret
50 %1 = srem i32 %x, -723
51 ret i32 %1
52 }
53
54
55 define i32 @fold_srem_negative_even(i32 %x) {
56 ; CHECK-LABEL: fold_srem_negative_even:
57 ; CHECK: // %bb.0:
58 ; CHECK-NEXT: mov w8, #62439
59 ; CHECK-NEXT: movk w8, #64805, lsl #16
60 ; CHECK-NEXT: smull x8, w0, w8
61 ; CHECK-NEXT: lsr x9, x8, #63
62 ; CHECK-NEXT: asr x8, x8, #40
63 ; CHECK-NEXT: add w8, w8, w9
64 ; CHECK-NEXT: mov w9, #-22981
65 ; CHECK-NEXT: msub w0, w8, w9, w0
66 ; CHECK-NEXT: ret
67 %1 = srem i32 %x, -22981
68 ret i32 %1
69 }
70
71
72 ; Don't fold if we can combine srem with sdiv.
73 define i32 @combine_srem_sdiv(i32 %x) {
74 ; CHECK-LABEL: combine_srem_sdiv:
75 ; CHECK: // %bb.0:
76 ; CHECK-NEXT: mov w8, #37253
77 ; CHECK-NEXT: movk w8, #44150, lsl #16
78 ; CHECK-NEXT: smull x8, w0, w8
79 ; CHECK-NEXT: lsr x8, x8, #32
80 ; CHECK-NEXT: add w8, w8, w0
81 ; CHECK-NEXT: asr w9, w8, #6
82 ; CHECK-NEXT: add w8, w9, w8, lsr #31
83 ; CHECK-NEXT: mov w9, #95
84 ; CHECK-NEXT: msub w9, w8, w9, w0
85 ; CHECK-NEXT: add w0, w9, w8
86 ; CHECK-NEXT: ret
87 %1 = srem i32 %x, 95
88 %2 = sdiv i32 %x, 95
89 %3 = add i32 %1, %2
90 ret i32 %3
91 }
92
93 ; Don't fold for divisors that are a power of two.
94 define i32 @dont_fold_srem_power_of_two(i32 %x) {
95 ; CHECK-LABEL: dont_fold_srem_power_of_two:
96 ; CHECK: // %bb.0:
97 ; CHECK-NEXT: add w8, w0, #63 // =63
98 ; CHECK-NEXT: cmp w0, #0 // =0
99 ; CHECK-NEXT: csel w8, w8, w0, lt
100 ; CHECK-NEXT: and w8, w8, #0xffffffc0
101 ; CHECK-NEXT: sub w0, w0, w8
102 ; CHECK-NEXT: ret
103 %1 = srem i32 %x, 64
104 ret i32 %1
105 }
106
107 ; Don't fold if the divisor is one.
108 define i32 @dont_fold_srem_one(i32 %x) {
109 ; CHECK-LABEL: dont_fold_srem_one:
110 ; CHECK: // %bb.0:
111 ; CHECK-NEXT: mov w0, wzr
112 ; CHECK-NEXT: ret
113 %1 = srem i32 %x, 1
114 ret i32 %1
115 }
116
117 ; Don't fold if the divisor is 2^31.
118 define i32 @dont_fold_srem_i32_smax(i32 %x) {
119 ; CHECK-LABEL: dont_fold_srem_i32_smax:
120 ; CHECK: // %bb.0:
121 ; CHECK-NEXT: mov w8, #2147483647
122 ; CHECK-NEXT: add w8, w0, w8
123 ; CHECK-NEXT: cmp w0, #0 // =0
124 ; CHECK-NEXT: csel w8, w8, w0, lt
125 ; CHECK-NEXT: and w8, w8, #0x80000000
126 ; CHECK-NEXT: add w0, w0, w8
127 ; CHECK-NEXT: ret
128 %1 = srem i32 %x, 2147483648
129 ret i32 %1
130 }
131
132 ; Don't fold i64 srem
133 define i64 @dont_fold_srem_i64(i64 %x) {
134 ; CHECK-LABEL: dont_fold_srem_i64:
135 ; CHECK: // %bb.0:
136 ; CHECK-NEXT: mov x8, #58849
137 ; CHECK-NEXT: movk x8, #48148, lsl #16
138 ; CHECK-NEXT: movk x8, #33436, lsl #32
139 ; CHECK-NEXT: movk x8, #21399, lsl #48
140 ; CHECK-NEXT: smulh x8, x0, x8
141 ; CHECK-NEXT: asr x9, x8, #5
142 ; CHECK-NEXT: add x8, x9, x8, lsr #63
143 ; CHECK-NEXT: mov w9, #98
144 ; CHECK-NEXT: msub x0, x8, x9, x0
145 ; CHECK-NEXT: ret
146 %1 = srem i64 %x, 98
147 ret i64 %1
148 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
2
3 define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
4 ; CHECK-LABEL: fold_srem_vec_1:
5 ; CHECK: // %bb.0:
6 ; CHECK-NEXT: mov w9, #63421
7 ; CHECK-NEXT: mov w12, #33437
8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
9 ; CHECK-NEXT: smov w8, v0.h[1]
10 ; CHECK-NEXT: movk w9, #31710, lsl #16
11 ; CHECK-NEXT: smov w11, v0.h[2]
12 ; CHECK-NEXT: movk w12, #21399, lsl #16
13 ; CHECK-NEXT: smull x12, w11, w12
14 ; CHECK-NEXT: smull x9, w8, w9
15 ; CHECK-NEXT: lsr x13, x12, #63
16 ; CHECK-NEXT: asr x12, x12, #37
17 ; CHECK-NEXT: lsr x9, x9, #32
18 ; CHECK-NEXT: add w12, w12, w13
19 ; CHECK-NEXT: mov w13, #98
20 ; CHECK-NEXT: sub w9, w9, w8
21 ; CHECK-NEXT: msub w11, w12, w13, w11
22 ; CHECK-NEXT: asr w13, w9, #6
23 ; CHECK-NEXT: add w9, w13, w9, lsr #31
24 ; CHECK-NEXT: mov w13, #37253
25 ; CHECK-NEXT: mov w10, #-124
26 ; CHECK-NEXT: smov w12, v0.h[0]
27 ; CHECK-NEXT: movk w13, #44150, lsl #16
28 ; CHECK-NEXT: msub w8, w9, w10, w8
29 ; CHECK-NEXT: smull x10, w12, w13
30 ; CHECK-NEXT: lsr x10, x10, #32
31 ; CHECK-NEXT: add w10, w10, w12
32 ; CHECK-NEXT: asr w13, w10, #6
33 ; CHECK-NEXT: mov w9, #95
34 ; CHECK-NEXT: add w10, w13, w10, lsr #31
35 ; CHECK-NEXT: msub w9, w10, w9, w12
36 ; CHECK-NEXT: mov w10, #63249
37 ; CHECK-NEXT: smov w13, v0.h[3]
38 ; CHECK-NEXT: movk w10, #48808, lsl #16
39 ; CHECK-NEXT: smull x10, w13, w10
40 ; CHECK-NEXT: lsr x12, x10, #63
41 ; CHECK-NEXT: asr x10, x10, #40
42 ; CHECK-NEXT: fmov s0, w9
43 ; CHECK-NEXT: add w10, w10, w12
44 ; CHECK-NEXT: mov v0.h[1], w8
45 ; CHECK-NEXT: mov w8, #-1003
46 ; CHECK-NEXT: mov v0.h[2], w11
47 ; CHECK-NEXT: msub w8, w10, w8, w13
48 ; CHECK-NEXT: mov v0.h[3], w8
49 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
50 ; CHECK-NEXT: ret
51 %1 = srem <4 x i16> %x,
52 ret <4 x i16> %1
53 }
54
55 define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
56 ; CHECK-LABEL: fold_srem_vec_2:
57 ; CHECK: // %bb.0:
58 ; CHECK-NEXT: mov w9, #37253
59 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
60 ; CHECK-NEXT: smov w8, v0.h[1]
61 ; CHECK-NEXT: movk w9, #44150, lsl #16
62 ; CHECK-NEXT: smov w10, v0.h[0]
63 ; CHECK-NEXT: smull x13, w8, w9
64 ; CHECK-NEXT: smov w11, v0.h[2]
65 ; CHECK-NEXT: smull x14, w10, w9
66 ; CHECK-NEXT: lsr x13, x13, #32
67 ; CHECK-NEXT: smov w12, v0.h[3]
68 ; CHECK-NEXT: smull x15, w11, w9
69 ; CHECK-NEXT: lsr x14, x14, #32
70 ; CHECK-NEXT: add w13, w13, w8
71 ; CHECK-NEXT: smull x9, w12, w9
72 ; CHECK-NEXT: lsr x15, x15, #32
73 ; CHECK-NEXT: add w14, w14, w10
74 ; CHECK-NEXT: asr w16, w13, #6
75 ; CHECK-NEXT: lsr x9, x9, #32
76 ; CHECK-NEXT: add w15, w15, w11
77 ; CHECK-NEXT: add w13, w16, w13, lsr #31
78 ; CHECK-NEXT: asr w16, w14, #6
79 ; CHECK-NEXT: add w9, w9, w12
80 ; CHECK-NEXT: add w14, w16, w14, lsr #31
81 ; CHECK-NEXT: asr w16, w15, #6
82 ; CHECK-NEXT: add w15, w16, w15, lsr #31
83 ; CHECK-NEXT: asr w16, w9, #6
84 ; CHECK-NEXT: add w9, w16, w9, lsr #31
85 ; CHECK-NEXT: mov w16, #95
86 ; CHECK-NEXT: msub w10, w14, w16, w10
87 ; CHECK-NEXT: msub w8, w13, w16, w8
88 ; CHECK-NEXT: fmov s0, w10
89 ; CHECK-NEXT: msub w11, w15, w16, w11
90 ; CHECK-NEXT: mov v0.h[1], w8
91 ; CHECK-NEXT: mov v0.h[2], w11
92 ; CHECK-NEXT: msub w8, w9, w16, w12
93 ; CHECK-NEXT: mov v0.h[3], w8
94 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
95 ; CHECK-NEXT: ret
96 %1 = srem <4 x i16> %x,
97 ret <4 x i16> %1
98 }
99
100
101 ; Don't fold if we can combine srem with sdiv.
102 define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
103 ; CHECK-LABEL: combine_srem_sdiv:
104 ; CHECK: // %bb.0:
105 ; CHECK-NEXT: mov w8, #37253
106 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
107 ; CHECK-NEXT: movk w8, #44150, lsl #16
108 ; CHECK-NEXT: smov w9, v0.h[1]
109 ; CHECK-NEXT: smov w10, v0.h[0]
110 ; CHECK-NEXT: smull x13, w9, w8
111 ; CHECK-NEXT: smov w11, v0.h[2]
112 ; CHECK-NEXT: smull x14, w10, w8
113 ; CHECK-NEXT: lsr x13, x13, #32
114 ; CHECK-NEXT: smov w12, v0.h[3]
115 ; CHECK-NEXT: smull x15, w11, w8
116 ; CHECK-NEXT: lsr x14, x14, #32
117 ; CHECK-NEXT: add w13, w13, w9
118 ; CHECK-NEXT: smull x8, w12, w8
119 ; CHECK-NEXT: lsr x15, x15, #32
120 ; CHECK-NEXT: add w14, w14, w10
121 ; CHECK-NEXT: asr w16, w13, #6
122 ; CHECK-NEXT: lsr x8, x8, #32
123 ; CHECK-NEXT: add w15, w15, w11
124 ; CHECK-NEXT: add w13, w16, w13, lsr #31
125 ; CHECK-NEXT: asr w16, w14, #6
126 ; CHECK-NEXT: add w8, w8, w12
127 ; CHECK-NEXT: add w14, w16, w14, lsr #31
128 ; CHECK-NEXT: asr w16, w15, #6
129 ; CHECK-NEXT: add w15, w16, w15, lsr #31
130 ; CHECK-NEXT: asr w16, w8, #6
131 ; CHECK-NEXT: add w8, w16, w8, lsr #31
132 ; CHECK-NEXT: mov w16, #95
133 ; CHECK-NEXT: msub w10, w14, w16, w10
134 ; CHECK-NEXT: msub w9, w13, w16, w9
135 ; CHECK-NEXT: fmov s0, w14
136 ; CHECK-NEXT: fmov s1, w10
137 ; CHECK-NEXT: msub w11, w15, w16, w11
138 ; CHECK-NEXT: mov v0.h[1], w13
139 ; CHECK-NEXT: mov v1.h[1], w9
140 ; CHECK-NEXT: msub w12, w8, w16, w12
141 ; CHECK-NEXT: mov v0.h[2], w15
142 ; CHECK-NEXT: mov v1.h[2], w11
143 ; CHECK-NEXT: mov v1.h[3], w12
144 ; CHECK-NEXT: mov v0.h[3], w8
145 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
146 ; CHECK-NEXT: ret
147 %1 = srem <4 x i16> %x,
148 %2 = sdiv <4 x i16> %x,
149 %3 = add <4 x i16> %1, %2
150 ret <4 x i16> %3
151 }
152
153 ; Don't fold for divisors that are a power of two.
154 define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
155 ; CHECK-LABEL: dont_fold_srem_power_of_two:
156 ; CHECK: // %bb.0:
157 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
158 ; CHECK-NEXT: smov w8, v0.h[1]
159 ; CHECK-NEXT: add w12, w8, #31 // =31
160 ; CHECK-NEXT: cmp w8, #0 // =0
161 ; CHECK-NEXT: mov w11, #37253
162 ; CHECK-NEXT: csel w12, w12, w8, lt
163 ; CHECK-NEXT: smov w9, v0.h[0]
164 ; CHECK-NEXT: smov w10, v0.h[3]
165 ; CHECK-NEXT: movk w11, #44150, lsl #16
166 ; CHECK-NEXT: and w12, w12, #0xffffffe0
167 ; CHECK-NEXT: sub w8, w8, w12
168 ; CHECK-NEXT: add w12, w9, #63 // =63
169 ; CHECK-NEXT: smull x11, w10, w11
170 ; CHECK-NEXT: cmp w9, #0 // =0
171 ; CHECK-NEXT: lsr x11, x11, #32
172 ; CHECK-NEXT: csel w12, w12, w9, lt
173 ; CHECK-NEXT: add w11, w11, w10
174 ; CHECK-NEXT: and w12, w12, #0xffffffc0
175 ; CHECK-NEXT: sub w9, w9, w12
176 ; CHECK-NEXT: asr w12, w11, #6
177 ; CHECK-NEXT: add w11, w12, w11, lsr #31
178 ; CHECK-NEXT: smov w12, v0.h[2]
179 ; CHECK-NEXT: fmov s0, w9
180 ; CHECK-NEXT: add w9, w12, #7 // =7
181 ; CHECK-NEXT: cmp w12, #0 // =0
182 ; CHECK-NEXT: csel w9, w9, w12, lt
183 ; CHECK-NEXT: and w9, w9, #0xfffffff8
184 ; CHECK-NEXT: sub w9, w12, w9
185 ; CHECK-NEXT: mov v0.h[1], w8
186 ; CHECK-NEXT: mov w8, #95
187 ; CHECK-NEXT: mov v0.h[2], w9
188 ; CHECK-NEXT: msub w8, w11, w8, w10
189 ; CHECK-NEXT: mov v0.h[3], w8
190 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
191 ; CHECK-NEXT: ret
192 %1 = srem <4 x i16> %x,
193 ret <4 x i16> %1
194 }
195
196 ; Don't fold if the divisor is one.
197 define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
198 ; CHECK-LABEL: dont_fold_srem_one:
199 ; CHECK: // %bb.0:
200 ; CHECK-NEXT: mov w9, #17097
201 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
202 ; CHECK-NEXT: smov w8, v0.h[2]
203 ; CHECK-NEXT: movk w9, #45590, lsl #16
204 ; CHECK-NEXT: smull x9, w8, w9
205 ; CHECK-NEXT: lsr x9, x9, #32
206 ; CHECK-NEXT: add w9, w9, w8
207 ; CHECK-NEXT: asr w12, w9, #4
208 ; CHECK-NEXT: add w9, w12, w9, lsr #31
209 ; CHECK-NEXT: mov w12, #30865
210 ; CHECK-NEXT: mov w10, #23
211 ; CHECK-NEXT: smov w11, v0.h[1]
212 ; CHECK-NEXT: movk w12, #51306, lsl #16
213 ; CHECK-NEXT: msub w8, w9, w10, w8
214 ; CHECK-NEXT: smull x10, w11, w12
215 ; CHECK-NEXT: lsr x10, x10, #32
216 ; CHECK-NEXT: add w10, w10, w11
217 ; CHECK-NEXT: asr w12, w10, #9
218 ; CHECK-NEXT: mov w9, #654
219 ; CHECK-NEXT: add w10, w12, w10, lsr #31
220 ; CHECK-NEXT: msub w9, w10, w9, w11
221 ; CHECK-NEXT: mov w10, #47143
222 ; CHECK-NEXT: smov w12, v0.h[3]
223 ; CHECK-NEXT: movk w10, #24749, lsl #16
224 ; CHECK-NEXT: smull x10, w12, w10
225 ; CHECK-NEXT: lsr x11, x10, #63
226 ; CHECK-NEXT: asr x10, x10, #43
227 ; CHECK-NEXT: movi d0, #0000000000000000
228 ; CHECK-NEXT: add w10, w10, w11
229 ; CHECK-NEXT: mov v0.h[1], w9
230 ; CHECK-NEXT: mov w9, #5423
231 ; CHECK-NEXT: mov v0.h[2], w8
232 ; CHECK-NEXT: msub w8, w10, w9, w12
233 ; CHECK-NEXT: mov v0.h[3], w8
234 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
235 ; CHECK-NEXT: ret
236 %1 = srem <4 x i16> %x,
237 ret <4 x i16> %1
238 }
239
240 ; Don't fold if the divisor is 2^15.
241 define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) {
242 ; CHECK-LABEL: dont_fold_srem_i16_smax:
243 ; CHECK: // %bb.0:
244 ; CHECK-NEXT: mov w10, #17097
245 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
246 ; CHECK-NEXT: smov w9, v0.h[2]
247 ; CHECK-NEXT: movk w10, #45590, lsl #16
248 ; CHECK-NEXT: smull x10, w9, w10
249 ; CHECK-NEXT: lsr x10, x10, #32
250 ; CHECK-NEXT: add w10, w10, w9
251 ; CHECK-NEXT: asr w12, w10, #4
252 ; CHECK-NEXT: mov w11, #23
253 ; CHECK-NEXT: add w10, w12, w10, lsr #31
254 ; CHECK-NEXT: msub w9, w10, w11, w9
255 ; CHECK-NEXT: mov w10, #47143
256 ; CHECK-NEXT: smov w12, v0.h[3]
257 ; CHECK-NEXT: movk w10, #24749, lsl #16
258 ; CHECK-NEXT: smull x10, w12, w10
259 ; CHECK-NEXT: lsr x11, x10, #63
260 ; CHECK-NEXT: asr x10, x10, #43
261 ; CHECK-NEXT: smov w8, v0.h[1]
262 ; CHECK-NEXT: add w10, w10, w11
263 ; CHECK-NEXT: mov w11, #32767
264 ; CHECK-NEXT: add w11, w8, w11
265 ; CHECK-NEXT: cmp w8, #0 // =0
266 ; CHECK-NEXT: csel w11, w11, w8, lt
267 ; CHECK-NEXT: and w11, w11, #0xffff8000
268 ; CHECK-NEXT: sub w8, w8, w11
269 ; CHECK-NEXT: movi d0, #0000000000000000
270 ; CHECK-NEXT: mov v0.h[1], w8
271 ; CHECK-NEXT: mov w8, #5423
272 ; CHECK-NEXT: mov v0.h[2], w9
273 ; CHECK-NEXT: msub w8, w10, w8, w12
274 ; CHECK-NEXT: mov v0.h[3], w8
275 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
276 ; CHECK-NEXT: ret
277 %1 = srem <4 x i16> %x,
278 ret <4 x i16> %1
279 }
280
281 ; Don't fold i64 srem.
282 define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
283 ; CHECK-LABEL: dont_fold_srem_i64:
284 ; CHECK: // %bb.0:
285 ; CHECK-NEXT: mov x9, #6055
286 ; CHECK-NEXT: movk x9, #58853, lsl #16
287 ; CHECK-NEXT: movk x9, #47142, lsl #32
288 ; CHECK-NEXT: mov x8, v1.d[1]
289 ; CHECK-NEXT: movk x9, #24749, lsl #48
290 ; CHECK-NEXT: smulh x9, x8, x9
291 ; CHECK-NEXT: asr x12, x9, #11
292 ; CHECK-NEXT: mov w10, #5423
293 ; CHECK-NEXT: add x9, x12, x9, lsr #63
294 ; CHECK-NEXT: msub x8, x9, x10, x8
295 ; CHECK-NEXT: mov x9, #21445
296 ; CHECK-NEXT: movk x9, #1603, lsl #16
297 ; CHECK-NEXT: movk x9, #15432, lsl #32
298 ; CHECK-NEXT: mov x12, v0.d[1]
299 ; CHECK-NEXT: movk x9, #25653, lsl #48
300 ; CHECK-NEXT: smulh x9, x12, x9
301 ; CHECK-NEXT: asr x10, x9, #8
302 ; CHECK-NEXT: add x9, x10, x9, lsr #63
303 ; CHECK-NEXT: mov w10, #654
304 ; CHECK-NEXT: msub x9, x9, x10, x12
305 ; CHECK-NEXT: mov x10, #8549
306 ; CHECK-NEXT: movk x10, #22795, lsl #16
307 ; CHECK-NEXT: movk x10, #17096, lsl #32
308 ; CHECK-NEXT: fmov x11, d1
309 ; CHECK-NEXT: movk x10, #45590, lsl #48
310 ; CHECK-NEXT: smulh x10, x11, x10
311 ; CHECK-NEXT: add x10, x10, x11
312 ; CHECK-NEXT: asr x12, x10, #4
313 ; CHECK-NEXT: add x10, x12, x10, lsr #63
314 ; CHECK-NEXT: mov w12, #23
315 ; CHECK-NEXT: msub x10, x10, x12, x11
316 ; CHECK-NEXT: movi v0.2d, #0000000000000000
317 ; CHECK-NEXT: fmov d1, x10
318 ; CHECK-NEXT: mov v1.d[1], x8
319 ; CHECK-NEXT: mov v0.d[1], x9
320 ; CHECK-NEXT: ret
321 %1 = srem <4 x i64> %x,
322 ret <4 x i64> %1
323 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
2
3 define i32 @fold_urem_positive_odd(i32 %x) {
4 ; CHECK-LABEL: fold_urem_positive_odd:
5 ; CHECK: // %bb.0:
6 ; CHECK-NEXT: mov w8, #8969
7 ; CHECK-NEXT: movk w8, #22765, lsl #16
8 ; CHECK-NEXT: umull x8, w0, w8
9 ; CHECK-NEXT: lsr x8, x8, #32
10 ; CHECK-NEXT: sub w9, w0, w8
11 ; CHECK-NEXT: add w8, w8, w9, lsr #1
12 ; CHECK-NEXT: lsr w8, w8, #6
13 ; CHECK-NEXT: mov w9, #95
14 ; CHECK-NEXT: msub w0, w8, w9, w0
15 ; CHECK-NEXT: ret
16 %1 = urem i32 %x, 95
17 ret i32 %1
18 }
19
20
21 define i32 @fold_urem_positive_even(i32 %x) {
22 ; CHECK-LABEL: fold_urem_positive_even:
23 ; CHECK: // %bb.0:
24 ; CHECK-NEXT: mov w8, #16323
25 ; CHECK-NEXT: movk w8, #63310, lsl #16
26 ; CHECK-NEXT: umull x8, w0, w8
27 ; CHECK-NEXT: lsr x8, x8, #42
28 ; CHECK-NEXT: mov w9, #1060
29 ; CHECK-NEXT: msub w0, w8, w9, w0
30 ; CHECK-NEXT: ret
31 %1 = urem i32 %x, 1060
32 ret i32 %1
33 }
34
35
36 ; Don't fold if we can combine urem with udiv.
37 define i32 @combine_urem_udiv(i32 %x) {
38 ; CHECK-LABEL: combine_urem_udiv:
39 ; CHECK: // %bb.0:
40 ; CHECK-NEXT: mov w8, #8969
41 ; CHECK-NEXT: movk w8, #22765, lsl #16
42 ; CHECK-NEXT: umull x8, w0, w8
43 ; CHECK-NEXT: lsr x8, x8, #32
44 ; CHECK-NEXT: sub w9, w0, w8
45 ; CHECK-NEXT: add w8, w8, w9, lsr #1
46 ; CHECK-NEXT: lsr w8, w8, #6
47 ; CHECK-NEXT: mov w9, #95
48 ; CHECK-NEXT: msub w9, w8, w9, w0
49 ; CHECK-NEXT: add w0, w9, w8
50 ; CHECK-NEXT: ret
51 %1 = urem i32 %x, 95
52 %2 = udiv i32 %x, 95
53 %3 = add i32 %1, %2
54 ret i32 %3
55 }
56
57 ; Don't fold for divisors that are a power of two.
58 define i32 @dont_fold_urem_power_of_two(i32 %x) {
59 ; CHECK-LABEL: dont_fold_urem_power_of_two:
60 ; CHECK: // %bb.0:
61 ; CHECK-NEXT: and w0, w0, #0x3f
62 ; CHECK-NEXT: ret
63 %1 = urem i32 %x, 64
64 ret i32 %1
65 }
66
67 ; Don't fold if the divisor is one.
68 define i32 @dont_fold_urem_one(i32 %x) {
69 ; CHECK-LABEL: dont_fold_urem_one:
70 ; CHECK: // %bb.0:
71 ; CHECK-NEXT: mov w0, wzr
72 ; CHECK-NEXT: ret
73 %1 = urem i32 %x, 1
74 ret i32 %1
75 }
76
77 ; Don't fold if the divisor is 2^32.
78 define i32 @dont_fold_urem_i32_umax(i32 %x) {
79 ; CHECK-LABEL: dont_fold_urem_i32_umax:
80 ; CHECK: // %bb.0:
81 ; CHECK-NEXT: ret
82 %1 = urem i32 %x, 4294967296
83 ret i32 %1
84 }
85
86 ; Don't fold i64 urem
87 define i64 @dont_fold_urem_i64(i64 %x) {
88 ; CHECK-LABEL: dont_fold_urem_i64:
89 ; CHECK: // %bb.0:
90 ; CHECK-NEXT: mov x9, #58849
91 ; CHECK-NEXT: movk x9, #48148, lsl #16
92 ; CHECK-NEXT: movk x9, #33436, lsl #32
93 ; CHECK-NEXT: lsr x8, x0, #1
94 ; CHECK-NEXT: movk x9, #21399, lsl #48
95 ; CHECK-NEXT: umulh x8, x8, x9
96 ; CHECK-NEXT: lsr x8, x8, #4
97 ; CHECK-NEXT: mov w9, #98
98 ; CHECK-NEXT: msub x0, x8, x9, x0
99 ; CHECK-NEXT: ret
100 %1 = urem i64 %x, 98
101 ret i64 %1
102 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
2
3 define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
4 ; CHECK-LABEL: fold_urem_vec_1:
5 ; CHECK: // %bb.0:
6 ; CHECK-NEXT: mov w11, #33437
7 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
8 ; CHECK-NEXT: umov w10, v0.h[2]
9 ; CHECK-NEXT: movk w11, #21399, lsl #16
10 ; CHECK-NEXT: umull x11, w10, w11
11 ; CHECK-NEXT: umov w8, v0.h[1]
12 ; CHECK-NEXT: mov w9, #16913
13 ; CHECK-NEXT: mov w12, #98
14 ; CHECK-NEXT: lsr x11, x11, #37
15 ; CHECK-NEXT: movk w9, #8456, lsl #16
16 ; CHECK-NEXT: msub w10, w11, w12, w10
17 ; CHECK-NEXT: ubfx w12, w8, #2, #14
18 ; CHECK-NEXT: umull x9, w12, w9
19 ; CHECK-NEXT: mov w11, #124
20 ; CHECK-NEXT: lsr x9, x9, #34
21 ; CHECK-NEXT: msub w8, w9, w11, w8
22 ; CHECK-NEXT: mov w9, #8969
23 ; CHECK-NEXT: umov w12, v0.h[0]
24 ; CHECK-NEXT: movk w9, #22765, lsl #16
25 ; CHECK-NEXT: umull x9, w12, w9
26 ; CHECK-NEXT: lsr x9, x9, #32
27 ; CHECK-NEXT: sub w11, w12, w9
28 ; CHECK-NEXT: add w9, w9, w11, lsr #1
29 ; CHECK-NEXT: mov w11, #95
30 ; CHECK-NEXT: lsr w9, w9, #6
31 ; CHECK-NEXT: msub w9, w9, w11, w12
32 ; CHECK-NEXT: umov w11, v0.h[3]
33 ; CHECK-NEXT: fmov s0, w9
34 ; CHECK-NEXT: mov w9, #2287
35 ; CHECK-NEXT: movk w9, #16727, lsl #16
36 ; CHECK-NEXT: umull x9, w11, w9
37 ; CHECK-NEXT: mov v0.h[1], w8
38 ; CHECK-NEXT: mov w8, #1003
39 ; CHECK-NEXT: lsr x9, x9, #40
40 ; CHECK-NEXT: mov v0.h[2], w10
41 ; CHECK-NEXT: msub w8, w9, w8, w11
42 ; CHECK-NEXT: mov v0.h[3], w8
43 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
44 ; CHECK-NEXT: ret
45 %1 = urem <4 x i16> %x,
46 ret <4 x i16> %1
47 }
48
49 define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
50 ; CHECK-LABEL: fold_urem_vec_2:
51 ; CHECK: // %bb.0:
52 ; CHECK-NEXT: mov w9, #8969
53 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
54 ; CHECK-NEXT: umov w8, v0.h[1]
55 ; CHECK-NEXT: movk w9, #22765, lsl #16
56 ; CHECK-NEXT: umov w10, v0.h[0]
57 ; CHECK-NEXT: umull x13, w8, w9
58 ; CHECK-NEXT: umov w11, v0.h[2]
59 ; CHECK-NEXT: umull x14, w10, w9
60 ; CHECK-NEXT: lsr x13, x13, #32
61 ; CHECK-NEXT: umov w12, v0.h[3]
62 ; CHECK-NEXT: umull x15, w11, w9
63 ; CHECK-NEXT: lsr x14, x14, #32
64 ; CHECK-NEXT: sub w16, w8, w13
65 ; CHECK-NEXT: umull x9, w12, w9
66 ; CHECK-NEXT: lsr x15, x15, #32
67 ; CHECK-NEXT: add w13, w13, w16, lsr #1
68 ; CHECK-NEXT: sub w16, w10, w14
69 ; CHECK-NEXT: lsr x9, x9, #32
70 ; CHECK-NEXT: add w14, w14, w16, lsr #1
71 ; CHECK-NEXT: sub w16, w11, w15
72 ; CHECK-NEXT: add w15, w15, w16, lsr #1
73 ; CHECK-NEXT: sub w16, w12, w9
74 ; CHECK-NEXT: add w9, w9, w16, lsr #1
75 ; CHECK-NEXT: mov w16, #95
76 ; CHECK-NEXT: lsr w13, w13, #6
77 ; CHECK-NEXT: msub w8, w13, w16, w8
78 ; CHECK-NEXT: lsr w13, w14, #6
79 ; CHECK-NEXT: msub w10, w13, w16, w10
80 ; CHECK-NEXT: lsr w13, w15, #6
81 ; CHECK-NEXT: fmov s0, w10
82 ; CHECK-NEXT: msub w11, w13, w16, w11
83 ; CHECK-NEXT: lsr w9, w9, #6
84 ; CHECK-NEXT: mov v0.h[1], w8
85 ; CHECK-NEXT: mov v0.h[2], w11
86 ; CHECK-NEXT: msub w8, w9, w16, w12
87 ; CHECK-NEXT: mov v0.h[3], w8
88 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
89 ; CHECK-NEXT: ret
90 %1 = urem <4 x i16> %x,
91 ret <4 x i16> %1
92 }
93
94
95 ; Don't fold if we can combine urem with udiv.
96 define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
97 ; CHECK-LABEL: combine_urem_udiv:
98 ; CHECK: // %bb.0:
99 ; CHECK-NEXT: mov w8, #8969
100 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
101 ; CHECK-NEXT: movk w8, #22765, lsl #16
102 ; CHECK-NEXT: umov w9, v0.h[1]
103 ; CHECK-NEXT: umov w10, v0.h[0]
104 ; CHECK-NEXT: umull x13, w9, w8
105 ; CHECK-NEXT: umov w11, v0.h[2]
106 ; CHECK-NEXT: umull x14, w10, w8
107 ; CHECK-NEXT: lsr x13, x13, #32
108 ; CHECK-NEXT: umov w12, v0.h[3]
109 ; CHECK-NEXT: umull x15, w11, w8
110 ; CHECK-NEXT: lsr x14, x14, #32
111 ; CHECK-NEXT: sub w16, w9, w13
112 ; CHECK-NEXT: umull x8, w12, w8
113 ; CHECK-NEXT: lsr x15, x15, #32
114 ; CHECK-NEXT: add w13, w13, w16, lsr #1
115 ; CHECK-NEXT: sub w16, w10, w14
116 ; CHECK-NEXT: lsr x8, x8, #32
117 ; CHECK-NEXT: add w14, w14, w16, lsr #1
118 ; CHECK-NEXT: sub w16, w11, w15
119 ; CHECK-NEXT: add w15, w15, w16, lsr #1
120 ; CHECK-NEXT: sub w16, w12, w8
121 ; CHECK-NEXT: add w8, w8, w16, lsr #1
122 ; CHECK-NEXT: mov w16, #95
123 ; CHECK-NEXT: lsr w14, w14, #6
124 ; CHECK-NEXT: lsr w13, w13, #6
125 ; CHECK-NEXT: msub w10, w14, w16, w10
126 ; CHECK-NEXT: lsr w15, w15, #6
127 ; CHECK-NEXT: msub w9, w13, w16, w9
128 ; CHECK-NEXT: fmov s0, w14
129 ; CHECK-NEXT: fmov s1, w10
130 ; CHECK-NEXT: lsr w8, w8, #6
131 ; CHECK-NEXT: msub w11, w15, w16, w11
132 ; CHECK-NEXT: mov v0.h[1], w13
133 ; CHECK-NEXT: mov v1.h[1], w9
134 ; CHECK-NEXT: msub w12, w8, w16, w12
135 ; CHECK-NEXT: mov v0.h[2], w15
136 ; CHECK-NEXT: mov v1.h[2], w11
137 ; CHECK-NEXT: mov v1.h[3], w12
138 ; CHECK-NEXT: mov v0.h[3], w8
139 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
140 ; CHECK-NEXT: ret
141 %1 = urem <4 x i16> %x,
142 %2 = udiv <4 x i16> %x,
143 %3 = add <4 x i16> %1, %2
144 ret <4 x i16> %3
145 }
146
147
148 ; Don't fold for divisors that are a power of two.
149 define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
150 ; CHECK-LABEL: dont_fold_urem_power_of_two:
151 ; CHECK: // %bb.0:
152 ; CHECK-NEXT: mov w9, #8969
153 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
154 ; CHECK-NEXT: umov w8, v0.h[3]
155 ; CHECK-NEXT: movk w9, #22765, lsl #16
156 ; CHECK-NEXT: umull x9, w8, w9
157 ; CHECK-NEXT: lsr x9, x9, #32
158 ; CHECK-NEXT: sub w10, w8, w9
159 ; CHECK-NEXT: add w9, w9, w10, lsr #1
160 ; CHECK-NEXT: mov w10, #95
161 ; CHECK-NEXT: lsr w9, w9, #6
162 ; CHECK-NEXT: msub w8, w9, w10, w8
163 ; CHECK-NEXT: umov w9, v0.h[0]
164 ; CHECK-NEXT: and w9, w9, #0x3f
165 ; CHECK-NEXT: umov w10, v0.h[1]
166 ; CHECK-NEXT: fmov s1, w9
167 ; CHECK-NEXT: umov w9, v0.h[2]
168 ; CHECK-NEXT: and w10, w10, #0x1f
169 ; CHECK-NEXT: and w9, w9, #0x7
170 ; CHECK-NEXT: mov v1.h[1], w10
171 ; CHECK-NEXT: mov v1.h[2], w9
172 ; CHECK-NEXT: mov v1.h[3], w8
173 ; CHECK-NEXT: mov v0.16b, v1.16b
174 ; CHECK-NEXT: ret
175 %1 = urem <4 x i16> %x,
176 ret <4 x i16> %1
177 }
178
179 ; Don't fold if the divisor is one.
180 define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
181 ; CHECK-LABEL: dont_fold_srem_one:
182 ; CHECK: // %bb.0:
183 ; CHECK-NEXT: mov w9, #17097
184 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
185 ; CHECK-NEXT: umov w8, v0.h[2]
186 ; CHECK-NEXT: movk w9, #45590, lsl #16
187 ; CHECK-NEXT: umull x9, w8, w9
188 ; CHECK-NEXT: mov w10, #23
189 ; CHECK-NEXT: lsr x9, x9, #36
190 ; CHECK-NEXT: umov w11, v0.h[1]
191 ; CHECK-NEXT: msub w8, w9, w10, w8
192 ; CHECK-NEXT: mov w9, #30865
193 ; CHECK-NEXT: movk w9, #51306, lsl #16
194 ; CHECK-NEXT: ubfx w10, w11, #1, #15
195 ; CHECK-NEXT: umull x9, w10, w9
196 ; CHECK-NEXT: mov w10, #654
197 ; CHECK-NEXT: lsr x9, x9, #40
198 ; CHECK-NEXT: msub w9, w9, w10, w11
199 ; CHECK-NEXT: mov w11, #47143
200 ; CHECK-NEXT: umov w10, v0.h[3]
201 ; CHECK-NEXT: movk w11, #24749, lsl #16
202 ; CHECK-NEXT: movi d1, #0000000000000000
203 ; CHECK-NEXT: umull x11, w10, w11
204 ; CHECK-NEXT: mov v1.h[1], w9
205 ; CHECK-NEXT: mov w9, #5423
206 ; CHECK-NEXT: lsr x11, x11, #43
207 ; CHECK-NEXT: mov v1.h[2], w8
208 ; CHECK-NEXT: msub w8, w11, w9, w10
209 ; CHECK-NEXT: mov v1.h[3], w8
210 ; CHECK-NEXT: mov v0.16b, v1.16b
211 ; CHECK-NEXT: ret
212 %1 = urem <4 x i16> %x,
213 ret <4 x i16> %1
214 }
215
216 ; Don't fold if the divisor is 2^16.
217 define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
218 ; CHECK-LABEL: dont_fold_urem_i16_smax:
219 ; CHECK: // %bb.0:
220 ; CHECK-NEXT: ret
221 %1 = urem <4 x i16> %x,
222 ret <4 x i16> %1
223 }
224
225 ; Don't fold i64 urem.
226 define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
227 ; CHECK-LABEL: dont_fold_urem_i64:
228 ; CHECK: // %bb.0:
229 ; CHECK-NEXT: mov x10, #12109
230 ; CHECK-NEXT: movk x10, #52170, lsl #16
231 ; CHECK-NEXT: movk x10, #28749, lsl #32
232 ; CHECK-NEXT: mov x8, v1.d[1]
233 ; CHECK-NEXT: movk x10, #49499, lsl #48
234 ; CHECK-NEXT: umulh x10, x8, x10
235 ; CHECK-NEXT: mov w11, #5423
236 ; CHECK-NEXT: lsr x10, x10, #12
237 ; CHECK-NEXT: msub x8, x10, x11, x8
238 ; CHECK-NEXT: mov x10, #21445
239 ; CHECK-NEXT: movk x10, #1603, lsl #16
240 ; CHECK-NEXT: mov x12, v0.d[1]
241 ; CHECK-NEXT: movk x10, #15432, lsl #32
242 ; CHECK-NEXT: movk x10, #25653, lsl #48
243 ; CHECK-NEXT: lsr x11, x12, #1
244 ; CHECK-NEXT: umulh x10, x11, x10
245 ; CHECK-NEXT: mov w11, #654
246 ; CHECK-NEXT: lsr x10, x10, #7
247 ; CHECK-NEXT: msub x10, x10, x11, x12
248 ; CHECK-NEXT: mov x11, #17097
249 ; CHECK-NEXT: movk x11, #45590, lsl #16
250 ; CHECK-NEXT: movk x11, #34192, lsl #32
251 ; CHECK-NEXT: fmov x9, d1
252 ; CHECK-NEXT: movk x11, #25644, lsl #48
253 ; CHECK-NEXT: umulh x11, x9, x11
254 ; CHECK-NEXT: sub x12, x9, x11
255 ; CHECK-NEXT: add x11, x11, x12, lsr #1
256 ; CHECK-NEXT: mov w12, #23
257 ; CHECK-NEXT: lsr x11, x11, #4
258 ; CHECK-NEXT: msub x9, x11, x12, x9
259 ; CHECK-NEXT: movi v0.2d, #0000000000000000
260 ; CHECK-NEXT: fmov d1, x9
261 ; CHECK-NEXT: mov v1.d[1], x8
262 ; CHECK-NEXT: mov v0.d[1], x10
263 ; CHECK-NEXT: ret
264 %1 = urem <4 x i64> %x,
265 ret <4 x i64> %1
266 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc64 < %s | FileCheck -check-prefixes=CHECK,CHECK64 %s
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc < %s | FileCheck -check-prefixes=CHECK,CHECK32 %s
3
4 define i32 @fold_srem_positive_odd(i32 %x) {
5 ; CHECK-LABEL: fold_srem_positive_odd:
6 ; CHECK: # %bb.0:
7 ; CHECK-NEXT: lis 4, -21386
8 ; CHECK-NEXT: ori 4, 4, 37253
9 ; CHECK-NEXT: mulhw 4, 3, 4
10 ; CHECK-NEXT: add 4, 4, 3
11 ; CHECK-NEXT: srwi 5, 4, 31
12 ; CHECK-NEXT: srawi 4, 4, 6
13 ; CHECK-NEXT: add 4, 4, 5
14 ; CHECK-NEXT: mulli 4, 4, 95
15 ; CHECK-NEXT: subf 3, 4, 3
16 ; CHECK-NEXT: blr
17 %1 = srem i32 %x, 95
18 ret i32 %1
19 }
20
21
22 define i32 @fold_srem_positive_even(i32 %x) {
23 ; CHECK-LABEL: fold_srem_positive_even:
24 ; CHECK: # %bb.0:
25 ; CHECK-NEXT: lis 4, 15827
26 ; CHECK-NEXT: ori 4, 4, 36849
27 ; CHECK-NEXT: mulhw 4, 3, 4
28 ; CHECK-NEXT: srwi 5, 4, 31
29 ; CHECK-NEXT: srawi 4, 4, 8
30 ; CHECK-NEXT: add 4, 4, 5
31 ; CHECK-NEXT: mulli 4, 4, 1060
32 ; CHECK-NEXT: subf 3, 4, 3
33 ; CHECK-NEXT: blr
34 %1 = srem i32 %x, 1060
35 ret i32 %1
36 }
37
38
39 define i32 @fold_srem_negative_odd(i32 %x) {
40 ; CHECK-LABEL: fold_srem_negative_odd:
41 ; CHECK: # %bb.0:
42 ; CHECK-NEXT: lis 4, -23206
43 ; CHECK-NEXT: ori 4, 4, 65445
44 ; CHECK-NEXT: mulhw 4, 3, 4
45 ; CHECK-NEXT: srwi 5, 4, 31
46 ; CHECK-NEXT: srawi 4, 4, 8
47 ; CHECK-NEXT: add 4, 4, 5
48 ; CHECK-NEXT: mulli 4, 4, -723
49 ; CHECK-NEXT: subf 3, 4, 3
50 ; CHECK-NEXT: blr
51 %1 = srem i32 %x, -723
52 ret i32 %1
53 }
54
55
56 define i32 @fold_srem_negative_even(i32 %x) {
57 ; CHECK-LABEL: fold_srem_negative_even:
58 ; CHECK: # %bb.0:
59 ; CHECK-NEXT: lis 4, -731
60 ; CHECK-NEXT: ori 4, 4, 62439
61 ; CHECK-NEXT: mulhw 4, 3, 4
62 ; CHECK-NEXT: srwi 5, 4, 31
63 ; CHECK-NEXT: srawi 4, 4, 8
64 ; CHECK-NEXT: add 4, 4, 5
65 ; CHECK-NEXT: mulli 4, 4, -22981
66 ; CHECK-NEXT: subf 3, 4, 3
67 ; CHECK-NEXT: blr
68 %1 = srem i32 %x, -22981
69 ret i32 %1
70 }
71
72
73 ; Don't fold if we can combine srem with sdiv.
74 define i32 @combine_srem_sdiv(i32 %x) {
75 ; CHECK-LABEL: combine_srem_sdiv:
76 ; CHECK: # %bb.0:
77 ; CHECK-NEXT: lis 4, -21386
78 ; CHECK-NEXT: ori 4, 4, 37253
79 ; CHECK-NEXT: mulhw 4, 3, 4
80 ; CHECK-NEXT: add 4, 4, 3
81 ; CHECK-NEXT: srwi 5, 4, 31
82 ; CHECK-NEXT: srawi 4, 4, 6
83 ; CHECK-NEXT: add 4, 4, 5
84 ; CHECK-NEXT: mulli 5, 4, 95
85 ; CHECK-NEXT: subf 3, 5, 3
86 ; CHECK-NEXT: add 3, 3, 4
87 ; CHECK-NEXT: blr
88 %1 = srem i32 %x, 95
89 %2 = sdiv i32 %x, 95
90 %3 = add i32 %1, %2
91 ret i32 %3
92 }
93
94 ; Don't fold for divisors that are a power of two.
95 define i32 @dont_fold_srem_power_of_two(i32 %x) {
96 ; CHECK-LABEL: dont_fold_srem_power_of_two:
97 ; CHECK: # %bb.0:
98 ; CHECK-NEXT: srawi 4, 3, 6
99 ; CHECK-NEXT: addze 4, 4
100 ; CHECK-NEXT: slwi 4, 4, 6
101 ; CHECK-NEXT: subf 3, 4, 3
102 ; CHECK-NEXT: blr
103 %1 = srem i32 %x, 64
104 ret i32 %1
105 }
106
107 ; Don't fold if the divisor is one.
108 define i32 @dont_fold_srem_one(i32 %x) {
109 ; CHECK-LABEL: dont_fold_srem_one:
110 ; CHECK: # %bb.0:
111 ; CHECK-NEXT: li 3, 0
112 ; CHECK-NEXT: blr
113 %1 = srem i32 %x, 1
114 ret i32 %1
115 }
116
117 ; Don't fold if the divisor is 2^31.
118 define i32 @dont_fold_srem_i32_smax(i32 %x) {
119 ; CHECK-LABEL: dont_fold_srem_i32_smax:
120 ; CHECK: # %bb.0:
121 ; CHECK-NEXT: srawi 4, 3, 31
122 ; CHECK-NEXT: addze 4, 4
123 ; CHECK-NEXT: slwi 4, 4, 31
124 ; CHECK-NEXT: add 3, 3, 4
125 ; CHECK-NEXT: blr
126 %1 = srem i32 %x, 2147483648
127 ret i32 %1
128 }
129
130 ; Don't fold i64 srem
131 define i64 @dont_fold_srem_i64(i64 %x) {
132 ; CHECK-LABEL: dont_fold_srem_i64:
133 ; CHECK: # %bb.0:
134 ; CHECK-NEXT: mflr 0
135 ; CHECK-NEXT: stw 0, 4(1)
136 ; CHECK-NEXT: stwu 1, -16(1)
137 ; CHECK-NEXT: .cfi_def_cfa_offset 16
138 ; CHECK-NEXT: .cfi_offset lr, 4
139 ; CHECK-NEXT: li 5, 0
140 ; CHECK-NEXT: li 6, 98
141 ; CHECK-NEXT: bl __moddi3@PLT
142 ; CHECK-NEXT: lwz 0, 20(1)
143 ; CHECK-NEXT: addi 1, 1, 16
144 ; CHECK-NEXT: mtlr 0
145 ; CHECK-NEXT: blr
146 %1 = srem i64 %x, 98
147 ret i64 %1
148 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
2 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE
3 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
4 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE
5 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
6 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE
7 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
8 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE
9
10 define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
11 ; P9LE-LABEL: fold_srem_vec_1:
12 ; P9LE: # %bb.0:
13 ; P9LE-NEXT: li r3, 0
14 ; P9LE-NEXT: vextuhrx r3, r3, v2
15 ; P9LE-NEXT: extsh r4, r3
16 ; P9LE-NEXT: lis r5, -21386
17 ; P9LE-NEXT: ori r5, r5, 37253
18 ; P9LE-NEXT: extsw r4, r4
19 ; P9LE-NEXT: mulld r5, r4, r5
20 ; P9LE-NEXT: rldicl r5, r5, 32, 32
21 ; P9LE-NEXT: add r4, r5, r4
22 ; P9LE-NEXT: srwi r5, r4, 31
23 ; P9LE-NEXT: srawi r4, r4, 6
24 ; P9LE-NEXT: add r4, r4, r5
25 ; P9LE-NEXT: lis r5, 31710
26 ; P9LE-NEXT: mulli r4, r4, 95
27 ; P9LE-NEXT: subf r3, r4, r3
28 ; P9LE-NEXT: mtvsrd f0, r3
29 ; P9LE-NEXT: li r3, 2
30 ; P9LE-NEXT: vextuhrx r3, r3, v2
31 ; P9LE-NEXT: extsh r4, r3
32 ; P9LE-NEXT: extsw r4, r4
33 ; P9LE-NEXT: ori r5, r5, 63421
34 ; P9LE-NEXT: mulld r5, r4, r5
35 ; P9LE-NEXT: rldicl r5, r5, 32, 32
36 ; P9LE-NEXT: subf r4, r4, r5
37 ; P9LE-NEXT: srwi r5, r4, 31
38 ; P9LE-NEXT: srawi r4, r4, 6
39 ; P9LE-NEXT: add r4, r4, r5
40 ; P9LE-NEXT: lis r5, 21399
41 ; P9LE-NEXT: mulli r4, r4, -124
42 ; P9LE-NEXT: subf r3, r4, r3
43 ; P9LE-NEXT: xxswapd v3, vs0
44 ; P9LE-NEXT: mtvsrd f0, r3
45 ; P9LE-NEXT: li r3, 4
46 ; P9LE-NEXT: vextuhrx r3, r3, v2
47 ; P9LE-NEXT: extsh r4, r3
48 ; P9LE-NEXT: extsw r4, r4
49 ; P9LE-NEXT: ori r5, r5, 33437
50 ; P9LE-NEXT: mulld r4, r4, r5
51 ; P9LE-NEXT: rldicl r5, r4, 1, 63
52 ; P9LE-NEXT: rldicl r4, r4, 32, 32
53 ; P9LE-NEXT: srawi r4, r4, 5
54 ; P9LE-NEXT: add r4, r4, r5
55 ; P9LE-NEXT: lis r5, -16728
56 ; P9LE-NEXT: mulli r4, r4, 98
57 ; P9LE-NEXT: subf r3, r4, r3
58 ; P9LE-NEXT: xxswapd v4, vs0
59 ; P9LE-NEXT: mtvsrd f0, r3
60 ; P9LE-NEXT: li r3, 6
61 ; P9LE-NEXT: vextuhrx r3, r3, v2
62 ; P9LE-NEXT: extsh r4, r3
63 ; P9LE-NEXT: extsw r4, r4
64 ; P9LE-NEXT: ori r5, r5, 63249
65 ; P9LE-NEXT: mulld r4, r4, r5
66 ; P9LE-NEXT: rldicl r5, r4, 1, 63
67 ; P9LE-NEXT: rldicl r4, r4, 32, 32
68 ; P9LE-NEXT: srawi r4, r4, 8
69 ; P9LE-NEXT: add r4, r4, r5
70 ; P9LE-NEXT: mulli r4, r4, -1003
71 ; P9LE-NEXT: subf r3, r4, r3
72 ; P9LE-NEXT: vmrglh v3, v4, v3
73 ; P9LE-NEXT: xxswapd v4, vs0
74 ; P9LE-NEXT: mtvsrd f0, r3
75 ; P9LE-NEXT: xxswapd v2, vs0
76 ; P9LE-NEXT: vmrglh v2, v2, v4
77 ; P9LE-NEXT: vmrglw v2, v2, v3
78 ; P9LE-NEXT: blr
79 ;
80 ; P9BE-LABEL: fold_srem_vec_1:
81 ; P9BE: # %bb.0:
82 ; P9BE-NEXT: li r3, 2
83 ; P9BE-NEXT: vextuhlx r3, r3, v2
84 ; P9BE-NEXT: extsh r3, r3
85 ; P9BE-NEXT: lis r4, 31710
86 ; P9BE-NEXT: ori r4, r4, 63421
87 ; P9BE-NEXT: extsw r3, r3
88 ; P9BE-NEXT: mulld r4, r3, r4
89 ; P9BE-NEXT: rldicl r4, r4, 32, 32
90 ; P9BE-NEXT: subf r4, r3, r4
91 ; P9BE-NEXT: srwi r5, r4, 31
92 ; P9BE-NEXT: srawi r4, r4, 6
93 ; P9BE-NEXT: add r4, r4, r5
94 ; P9BE-NEXT: mulli r4, r4, -124
95 ; P9BE-NEXT: subf r3, r4, r3
96 ; P9BE-NEXT: lis r4, -21386
97 ; P9BE-NEXT: sldi r3, r3, 48
98 ; P9BE-NEXT: mtvsrd v3, r3
99 ; P9BE-NEXT: li r3, 0
100 ; P9BE-NEXT: vextuhlx r3, r3, v2
101 ; P9BE-NEXT: extsh r3, r3
102 ; P9BE-NEXT: extsw r3, r3
103 ; P9BE-NEXT: ori r4, r4, 37253
104 ; P9BE-NEXT: mulld r4, r3, r4
105 ; P9BE-NEXT: rldicl r4, r4, 32, 32
106 ; P9BE-NEXT: add r4, r4, r3
107 ; P9BE-NEXT: srwi r5, r4, 31
108 ; P9BE-NEXT: srawi r4, r4, 6
109 ; P9BE-NEXT: add r4, r4, r5
110 ; P9BE-NEXT: mulli r4, r4, 95
111 ; P9BE-NEXT: subf r3, r4, r3
112 ; P9BE-NEXT: lis r4, -16728
113 ; P9BE-NEXT: sldi r3, r3, 48
114 ; P9BE-NEXT: mtvsrd v4, r3
115 ; P9BE-NEXT: li r3, 6
116 ; P9BE-NEXT: vextuhlx r3, r3, v2
117 ; P9BE-NEXT: extsh r3, r3
118 ; P9BE-NEXT: extsw r3, r3
119 ; P9BE-NEXT: ori r4, r4, 63249
120 ; P9BE-NEXT: mulld r4, r3, r4
121 ; P9BE-NEXT: rldicl r5, r4, 1, 63
122 ; P9BE-NEXT: rldicl r4, r4, 32, 32
123 ; P9BE-NEXT: srawi r4, r4, 8
124 ; P9BE-NEXT: add r4, r4, r5
125 ; P9BE-NEXT: mulli r4, r4, -1003
126 ; P9BE-NEXT: subf r3, r4, r3
127 ; P9BE-NEXT: lis r4, 21399
128 ; P9BE-NEXT: sldi r3, r3, 48
129 ; P9BE-NEXT: vmrghh v3, v4, v3
130 ; P9BE-NEXT: mtvsrd v4, r3
131 ; P9BE-NEXT: li r3, 4
132 ; P9BE-NEXT: vextuhlx r3, r3, v2
133 ; P9BE-NEXT: extsh r3, r3
134 ; P9BE-NEXT: extsw r3, r3
135 ; P9BE-NEXT: ori r4, r4, 33437
136 ; P9BE-NEXT: mulld r4, r3, r4
137 ; P9BE-NEXT: rldicl r5, r4, 1, 63
138 ; P9BE-NEXT: rldicl r4, r4, 32, 32
139 ; P9BE-NEXT: srawi r4, r4, 5
140 ; P9BE-NEXT: add r4, r4, r5
141 ; P9BE-NEXT: mulli r4, r4, 98
142 ; P9BE-NEXT: subf r3, r4, r3
143 ; P9BE-NEXT: sldi r3, r3, 48
144 ; P9BE-NEXT: mtvsrd v2, r3
145 ; P9BE-NEXT: vmrghh v2, v2, v4
146 ; P9BE-NEXT: vmrghw v2, v3, v2
147 ; P9BE-NEXT: blr
148 ;
149 ; P8LE-LABEL: fold_srem_vec_1:
150 ; P8LE: # %bb.0:
151 ; P8LE-NEXT: xxswapd vs0, v2
152 ; P8LE-NEXT: lis r4, 21399
153 ; P8LE-NEXT: lis r9, -16728
154 ; P8LE-NEXT: lis r11, -21386
155 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
156 ; P8LE-NEXT: ori r4, r4, 33437
157 ; P8LE-NEXT: ori r9, r9, 63249
158 ; P8LE-NEXT: ori r11, r11, 37253
159 ; P8LE-NEXT: mfvsrd r5, f0
160 ; P8LE-NEXT: rldicl r3, r5, 32, 48
161 ; P8LE-NEXT: rldicl r6, r5, 16, 48
162 ; P8LE-NEXT: clrldi r7, r5, 48
163 ; P8LE-NEXT: extsh r8, r3
164 ; P8LE-NEXT: extsh r10, r6
165 ; P8LE-NEXT: rldicl r5, r5, 48, 48
166 ; P8LE-NEXT: extsw r8, r8
167 ; P8LE-NEXT: extsh r12, r7
168 ; P8LE-NEXT: extsw r10, r10
169 ; P8LE-NEXT: mulld r4, r8, r4
170 ; P8LE-NEXT: lis r8, 31710
171 ; P8LE-NEXT: extsh r0, r5
172 ; P8LE-NEXT: extsw r12, r12
173 ; P8LE-NEXT: mulld r9, r10, r9
174 ; P8LE-NEXT: ori r8, r8, 63421
175 ; P8LE-NEXT: extsw r10, r0
176 ; P8LE-NEXT: mulld r11, r12, r11
177 ; P8LE-NEXT: mulld r8, r10, r8
178 ; P8LE-NEXT: rldicl r0, r4, 1, 63
179 ; P8LE-NEXT: rldicl r4, r4, 32, 32
180 ; P8LE-NEXT: rldicl r30, r9, 1, 63
181 ; P8LE-NEXT: rldicl r9, r9, 32, 32
182 ; P8LE-NEXT: rldicl r11, r11, 32, 32
183 ; P8LE-NEXT: rldicl r8, r8, 32, 32
184 ; P8LE-NEXT: add r11, r11, r12
185 ; P8LE-NEXT: srawi r4, r4, 5
186 ; P8LE-NEXT: subf r8, r10, r8
187 ; P8LE-NEXT: srawi r9, r9, 8
188 ; P8LE-NEXT: srwi r10, r11, 31
189 ; P8LE-NEXT: add r4, r4, r0
190 ; P8LE-NEXT: srawi r11, r11, 6
191 ; P8LE-NEXT: add r9, r9, r30
192 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
193 ; P8LE-NEXT: add r10, r11, r10
194 ; P8LE-NEXT: srwi r11, r8, 31
195 ; P8LE-NEXT: srawi r8, r8, 6
196 ; P8LE-NEXT: mulli r4, r4, 98
197 ; P8LE-NEXT: mulli r9, r9, -1003
198 ; P8LE-NEXT: add r8, r8, r11
199 ; P8LE-NEXT: mulli r10, r10, 95
200 ; P8LE-NEXT: mulli r8, r8, -124
201 ; P8LE-NEXT: subf r3, r4, r3
202 ; P8LE-NEXT: subf r4, r9, r6
203 ; P8LE-NEXT: mtvsrd f0, r3
204 ; P8LE-NEXT: subf r3, r10, r7
205 ; P8LE-NEXT: mtvsrd f1, r4
206 ; P8LE-NEXT: subf r4, r8, r5
207 ; P8LE-NEXT: mtvsrd f2, r3
208 ; P8LE-NEXT: xxswapd v2, vs0
209 ; P8LE-NEXT: mtvsrd f3, r4
210 ; P8LE-NEXT: xxswapd v3, vs1
211 ; P8LE-NEXT: xxswapd v4, vs2
212 ; P8LE-NEXT: xxswapd v5, vs3
213 ; P8LE-NEXT: vmrglh v2, v3, v2
214 ; P8LE-NEXT: vmrglh v3, v5, v4
215 ; P8LE-NEXT: vmrglw v2, v2, v3
216 ; P8LE-NEXT: blr
217 ;
218 ; P8BE-LABEL: fold_srem_vec_1:
219 ; P8BE: # %bb.0:
220 ; P8BE-NEXT: mfvsrd r4, v2
221 ; P8BE-NEXT: lis r3, -16728
222 ; P8BE-NEXT: lis r9, 31710
223 ; P8BE-NEXT: lis r8, 21399
224 ; P8BE-NEXT: lis r10, -21386
225 ; P8BE-NEXT: ori r3, r3, 63249
226 ; P8BE-NEXT: ori r9, r9, 63421
227 ; P8BE-NEXT: ori r8, r8, 33437
228 ; P8BE-NEXT: ori r10, r10, 37253
229 ; P8BE-NEXT: clrldi r5, r4, 48
230 ; P8BE-NEXT: rldicl r7, r4, 32, 48
231 ; P8BE-NEXT: rldicl r6, r4, 48, 48
232 ; P8BE-NEXT: rldicl r4, r4, 16, 48
233 ; P8BE-NEXT: extsh r5, r5
234 ; P8BE-NEXT: extsh r7, r7
235 ; P8BE-NEXT: extsh r6, r6
236 ; P8BE-NEXT: extsw r5, r5
237 ; P8BE-NEXT: extsh r4, r4
238 ; P8BE-NEXT: extsw r7, r7
239 ; P8BE-NEXT: extsw r6, r6
240 ; P8BE-NEXT: mulld r3, r5, r3
241 ; P8BE-NEXT: extsw r4, r4
242 ; P8BE-NEXT: mulld r9, r7, r9
243 ; P8BE-NEXT: mulld r8, r6, r8
244 ; P8BE-NEXT: mulld r10, r4, r10
245 ; P8BE-NEXT: rldicl r11, r3, 1, 63
246 ; P8BE-NEXT: rldicl r3, r3, 32, 32
247 ; P8BE-NEXT: rldicl r9, r9, 32, 32
248 ; P8BE-NEXT: rldicl r12, r8, 1, 63
249 ; P8BE-NEXT: rldicl r8, r8, 32, 32
250 ; P8BE-NEXT: rldicl r10, r10, 32, 32
251 ; P8BE-NEXT: subf r9, r7, r9
252 ; P8BE-NEXT: srawi r3, r3, 8
253 ; P8BE-NEXT: srawi r8, r8, 5
254 ; P8BE-NEXT: add r10, r10, r4
255 ; P8BE-NEXT: add r3, r3, r11
256 ; P8BE-NEXT: srwi r11, r9, 31
257 ; P8BE-NEXT: add r8, r8, r12
258 ; P8BE-NEXT: srawi r9, r9, 6
259 ; P8BE-NEXT: mulli r3, r3, -1003
260 ; P8BE-NEXT: add r9, r9, r11
261 ; P8BE-NEXT: srwi r11, r10, 31
262 ; P8BE-NEXT: srawi r10, r10, 6
263 ; P8BE-NEXT: mulli r8, r8, 98
264 ; P8BE-NEXT: add r10, r10, r11
265 ; P8BE-NEXT: mulli r9, r9, -124
266 ; P8BE-NEXT: mulli r10, r10, 95
267 ; P8BE-NEXT: subf r3, r3, r5
268 ; P8BE-NEXT: sldi r3, r3, 48
269 ; P8BE-NEXT: subf r5, r8, r6
270 ; P8BE-NEXT: mtvsrd v2, r3
271 ; P8BE-NEXT: subf r6, r9, r7
272 ; P8BE-NEXT: sldi r3, r5, 48
273 ; P8BE-NEXT: subf r4, r10, r4
274 ; P8BE-NEXT: mtvsrd v3, r3
275 ; P8BE-NEXT: sldi r3, r6, 48
276 ; P8BE-NEXT: sldi r4, r4, 48
277 ; P8BE-NEXT: mtvsrd v4, r3
278 ; P8BE-NEXT: mtvsrd v5, r4
279 ; P8BE-NEXT: vmrghh v2, v3, v2
280 ; P8BE-NEXT: vmrghh v3, v5, v4
281 ; P8BE-NEXT: vmrghw v2, v3, v2
282 ; P8BE-NEXT: blr
283 %1 = srem <4 x i16> %x,
284 ret <4 x i16> %1
285 }
286
287 define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
288 ; P9LE-LABEL: fold_srem_vec_2:
289 ; P9LE: # %bb.0:
290 ; P9LE-NEXT: li r3, 0
291 ; P9LE-NEXT: vextuhrx r3, r3, v2
292 ; P9LE-NEXT: extsh r4, r3
293 ; P9LE-NEXT: lis r5, -21386
294 ; P9LE-NEXT: ori r5, r5, 37253
295 ; P9LE-NEXT: extsw r4, r4
296 ; P9LE-NEXT: mulld r6, r4, r5
297 ; P9LE-NEXT: rldicl r6, r6, 32, 32
298 ; P9LE-NEXT: add r4, r6, r4
299 ; P9LE-NEXT: srwi r6, r4, 31
300 ; P9LE-NEXT: srawi r4, r4, 6
301 ; P9LE-NEXT: add r4, r4, r6
302 ; P9LE-NEXT: mulli r4, r4, 95
303 ; P9LE-NEXT: subf r3, r4, r3
304 ; P9LE-NEXT: mtvsrd f0, r3
305 ; P9LE-NEXT: li r3, 2
306 ; P9LE-NEXT: vextuhrx r3, r3, v2
307 ; P9LE-NEXT: extsh r4, r3
308 ; P9LE-NEXT: extsw r4, r4
309 ; P9LE-NEXT: mulld r6, r4, r5
310 ; P9LE-NEXT: rldicl r6, r6, 32, 32
311 ; P9LE-NEXT: add r4, r6, r4
312 ; P9LE-NEXT: srwi r6, r4, 31
313 ; P9LE-NEXT: srawi r4, r4, 6
314 ; P9LE-NEXT: add r4, r4, r6
315 ; P9LE-NEXT: mulli r4, r4, 95
316 ; P9LE-NEXT: subf r3, r4, r3
317 ; P9LE-NEXT: xxswapd v3, vs0
318 ; P9LE-NEXT: mtvsrd f0, r3
319 ; P9LE-NEXT: li r3, 4
320 ; P9LE-NEXT: vextuhrx r3, r3, v2
321 ; P9LE-NEXT: extsh r4, r3
322 ; P9LE-NEXT: extsw r4, r4
323 ; P9LE-NEXT: mulld r6, r4, r5
324 ; P9LE-NEXT: rldicl r6, r6, 32, 32
325 ; P9LE-NEXT: add r4, r6, r4
326 ; P9LE-NEXT: srwi r6, r4, 31
327 ; P9LE-NEXT: srawi r4, r4, 6
328 ; P9LE-NEXT: add r4, r4, r6
329 ; P9LE-NEXT: mulli r4, r4, 95
330 ; P9LE-NEXT: subf r3, r4, r3
331 ; P9LE-NEXT: xxswapd v4, vs0
332 ; P9LE-NEXT: mtvsrd f0, r3
333 ; P9LE-NEXT: li r3, 6
334 ; P9LE-NEXT: vextuhrx r3, r3, v2
335 ; P9LE-NEXT: extsh r4, r3
336 ; P9LE-NEXT: extsw r4, r4
337 ; P9LE-NEXT: mulld r5, r4, r5
338 ; P9LE-NEXT: rldicl r5, r5, 32, 32
339 ; P9LE-NEXT: add r4, r5, r4
340 ; P9LE-NEXT: srwi r5, r4, 31
341 ; P9LE-NEXT: srawi r4, r4, 6
342 ; P9LE-NEXT: add r4, r4, r5
343 ; P9LE-NEXT: mulli r4, r4, 95
344 ; P9LE-NEXT: subf r3, r4, r3
345 ; P9LE-NEXT: vmrglh v3, v4, v3
346 ; P9LE-NEXT: xxswapd v4, vs0
347 ; P9LE-NEXT: mtvsrd f0, r3
348 ; P9LE-NEXT: xxswapd v2, vs0
349 ; P9LE-NEXT: vmrglh v2, v2, v4
350 ; P9LE-NEXT: vmrglw v2, v2, v3
351 ; P9LE-NEXT: blr
352 ;
353 ; P9BE-LABEL: fold_srem_vec_2:
354 ; P9BE: # %bb.0:
355 ; P9BE-NEXT: li r3, 6
356 ; P9BE-NEXT: vextuhlx r3, r3, v2
357 ; P9BE-NEXT: extsh r3, r3
358 ; P9BE-NEXT: lis r4, -21386
359 ; P9BE-NEXT: ori r4, r4, 37253
360 ; P9BE-NEXT: extsw r3, r3
361 ; P9BE-NEXT: mulld r5, r3, r4
362 ; P9BE-NEXT: rldicl r5, r5, 32, 32
363 ; P9BE-NEXT: add r5, r5, r3
364 ; P9BE-NEXT: srwi r6, r5, 31
365 ; P9BE-NEXT: srawi r5, r5, 6
366 ; P9BE-NEXT: add r5, r5, r6
367 ; P9BE-NEXT: mulli r5, r5, 95
368 ; P9BE-NEXT: subf r3, r5, r3
369 ; P9BE-NEXT: sldi r3, r3, 48
370 ; P9BE-NEXT: mtvsrd v3, r3
371 ; P9BE-NEXT: li r3, 4
372 ; P9BE-NEXT: vextuhlx r3, r3, v2
373 ; P9BE-NEXT: extsh r3, r3
374 ; P9BE-NEXT: extsw r3, r3
375 ; P9BE-NEXT: mulld r5, r3, r4
376 ; P9BE-NEXT: rldicl r5, r5, 32, 32
377 ; P9BE-NEXT: add r5, r5, r3
378 ; P9BE-NEXT: srwi r6, r5, 31
379 ; P9BE-NEXT: srawi r5, r5, 6
380 ; P9BE-NEXT: add r5, r5, r6
381 ; P9BE-NEXT: mulli r5, r5, 95
382 ; P9BE-NEXT: subf r3, r5, r3
383 ; P9BE-NEXT: sldi r3, r3, 48
384 ; P9BE-NEXT: mtvsrd v4, r3
385 ; P9BE-NEXT: li r3, 2
386 ; P9BE-NEXT: vextuhlx r3, r3, v2
387 ; P9BE-NEXT: extsh r3, r3
388 ; P9BE-NEXT: extsw r3, r3
389 ; P9BE-NEXT: mulld r5, r3, r4
390 ; P9BE-NEXT: rldicl r5, r5, 32, 32
391 ; P9BE-NEXT: add r5, r5, r3
392 ; P9BE-NEXT: srwi r6, r5, 31
393 ; P9BE-NEXT: srawi r5, r5, 6
394 ; P9BE-NEXT: add r5, r5, r6
395 ; P9BE-NEXT: mulli r5, r5, 95
396 ; P9BE-NEXT: subf r3, r5, r3
397 ; P9BE-NEXT: sldi r3, r3, 48
398 ; P9BE-NEXT: vmrghh v3, v4, v3
399 ; P9BE-NEXT: mtvsrd v4, r3
400 ; P9BE-NEXT: li r3, 0
401 ; P9BE-NEXT: vextuhlx r3, r3, v2
402 ; P9BE-NEXT: extsh r3, r3
403 ; P9BE-NEXT: extsw r3, r3
404 ; P9BE-NEXT: mulld r4, r3, r4
405 ; P9BE-NEXT: rldicl r4, r4, 32, 32
406 ; P9BE-NEXT: add r4, r4, r3
407 ; P9BE-NEXT: srwi r5, r4, 31
408 ; P9BE-NEXT: srawi r4, r4, 6
409 ; P9BE-NEXT: add r4, r4, r5
410 ; P9BE-NEXT: mulli r4, r4, 95
411 ; P9BE-NEXT: subf r3, r4, r3
412 ; P9BE-NEXT: sldi r3, r3, 48
413 ; P9BE-NEXT: mtvsrd v2, r3
414 ; P9BE-NEXT: vmrghh v2, v2, v4
415 ; P9BE-NEXT: vmrghw v2, v2, v3
416 ; P9BE-NEXT: blr
417 ;
418 ; P8LE-LABEL: fold_srem_vec_2:
419 ; P8LE: # %bb.0:
420 ; P8LE-NEXT: xxswapd vs0, v2
421 ; P8LE-NEXT: lis r4, -21386
422 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
423 ; P8LE-NEXT: ori r4, r4, 37253
424 ; P8LE-NEXT: mfvsrd r5, f0
425 ; P8LE-NEXT: clrldi r3, r5, 48
426 ; P8LE-NEXT: rldicl r7, r5, 32, 48
427 ; P8LE-NEXT: extsh r8, r3
428 ; P8LE-NEXT: rldicl r6, r5, 48, 48
429 ; P8LE-NEXT: extsh r10, r7
430 ; P8LE-NEXT: rldicl r5, r5, 16, 48
431 ; P8LE-NEXT: extsw r8, r8
432 ; P8LE-NEXT: extsh r9, r6
433 ; P8LE-NEXT: extsw r10, r10
434 ; P8LE-NEXT: extsh r11, r5
435 ; P8LE-NEXT: mulld r12, r8, r4
436 ; P8LE-NEXT: extsw r9, r9
437 ; P8LE-NEXT: extsw r11, r11
438 ; P8LE-NEXT: mulld r30, r10, r4
439 ; P8LE-NEXT: mulld r0, r9, r4
440 ; P8LE-NEXT: mulld r4, r11, r4
441 ; P8LE-NEXT: rldicl r12, r12, 32, 32
442 ; P8LE-NEXT: add r8, r12, r8
443 ; P8LE-NEXT: rldicl r12, r30, 32, 32
444 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
445 ; P8LE-NEXT: rldicl r0, r0, 32, 32
446 ; P8LE-NEXT: rldicl r4, r4, 32, 32
447 ; P8LE-NEXT: add r10, r12, r10
448 ; P8LE-NEXT: add r9, r0, r9
449 ; P8LE-NEXT: srwi r0, r8, 31
450 ; P8LE-NEXT: add r4, r4, r11
451 ; P8LE-NEXT: srwi r11, r10, 31
452 ; P8LE-NEXT: srawi r8, r8, 6
453 ; P8LE-NEXT: srawi r10, r10, 6
454 ; P8LE-NEXT: srwi r12, r9, 31
455 ; P8LE-NEXT: add r8, r8, r0
456 ; P8LE-NEXT: srawi r9, r9, 6
457 ; P8LE-NEXT: add r10, r10, r11
458 ; P8LE-NEXT: srwi r11, r4, 31
459 ; P8LE-NEXT: srawi r4, r4, 6
460 ; P8LE-NEXT: add r9, r9, r12
461 ; P8LE-NEXT: mulli r8, r8, 95
462 ; P8LE-NEXT: add r4, r4, r11
463 ; P8LE-NEXT: mulli r9, r9, 95
464 ; P8LE-NEXT: mulli r10, r10, 95
465 ; P8LE-NEXT: mulli r4, r4, 95
466 ; P8LE-NEXT: subf r3, r8, r3
467 ; P8LE-NEXT: subf r6, r9, r6
468 ; P8LE-NEXT: mtvsrd f0, r3
469 ; P8LE-NEXT: subf r3, r10, r7
470 ; P8LE-NEXT: subf r4, r4, r5
471 ; P8LE-NEXT: mtvsrd f1, r6
472 ; P8LE-NEXT: mtvsrd f2, r3
473 ; P8LE-NEXT: xxswapd v2, vs0
474 ; P8LE-NEXT: mtvsrd f3, r4
475 ; P8LE-NEXT: xxswapd v3, vs1
476 ; P8LE-NEXT: xxswapd v4, vs2
477 ; P8LE-NEXT: xxswapd v5, vs3
478 ; P8LE-NEXT: vmrglh v2, v3, v2
479 ; P8LE-NEXT: vmrglh v3, v5, v4
480 ; P8LE-NEXT: vmrglw v2, v3, v2
481 ; P8LE-NEXT: blr
482 ;
483 ; P8BE-LABEL: fold_srem_vec_2:
484 ; P8BE: # %bb.0:
485 ; P8BE-NEXT: mfvsrd r4, v2
486 ; P8BE-NEXT: lis r3, -21386
487 ; P8BE-NEXT: ori r3, r3, 37253
488 ; P8BE-NEXT: clrldi r5, r4, 48
489 ; P8BE-NEXT: rldicl r6, r4, 48, 48
490 ; P8BE-NEXT: extsh r5, r5
491 ; P8BE-NEXT: rldicl r7, r4, 32, 48
492 ; P8BE-NEXT: extsh r6, r6
493 ; P8BE-NEXT: extsw r5, r5
494 ; P8BE-NEXT: rldicl r4, r4, 16, 48
495 ; P8BE-NEXT: extsh r7, r7
496 ; P8BE-NEXT: extsw r6, r6
497 ; P8BE-NEXT: mulld r8, r5, r3
498 ; P8BE-NEXT: extsh r4, r4
499 ; P8BE-NEXT: extsw r7, r7
500 ; P8BE-NEXT: mulld r9, r6, r3
501 ; P8BE-NEXT: extsw r4, r4
502 ; P8BE-NEXT: mulld r10, r7, r3
503 ; P8BE-NEXT: mulld r3, r4, r3
504 ; P8BE-NEXT: rldicl r8, r8, 32, 32
505 ; P8BE-NEXT: rldicl r9, r9, 32, 32
506 ; P8BE-NEXT: add r8, r8, r5
507 ; P8BE-NEXT: rldicl r10, r10, 32, 32
508 ; P8BE-NEXT: add r9, r9, r6
509 ; P8BE-NEXT: srwi r11, r8, 31
510 ; P8BE-NEXT: srawi r8, r8, 6
511 ; P8BE-NEXT: rldicl r3, r3, 32, 32
512 ; P8BE-NEXT: add r10, r10, r7
513 ; P8BE-NEXT: add r8, r8, r11
514 ; P8BE-NEXT: srwi r11, r9, 31
515 ; P8BE-NEXT: add r3, r3, r4
516 ; P8BE-NEXT: srawi r9, r9, 6
517 ; P8BE-NEXT: mulli r8, r8, 95
518 ; P8BE-NEXT: add r9, r9, r11
519 ; P8BE-NEXT: srwi r11, r10, 31
520 ; P8BE-NEXT: srawi r10, r10, 6
521 ; P8BE-NEXT: mulli r9, r9, 95
522 ; P8BE-NEXT: add r10, r10, r11
523 ; P8BE-NEXT: srwi r11, r3, 31
524 ; P8BE-NEXT: srawi r3, r3, 6
525 ; P8BE-NEXT: mulli r10, r10, 95
526 ; P8BE-NEXT: subf r5, r8, r5
527 ; P8BE-NEXT: add r3, r3, r11
528 ; P8BE-NEXT: sldi r5, r5, 48
529 ; P8BE-NEXT: mulli r3, r3, 95
530 ; P8BE-NEXT: subf r6, r9, r6
531 ; P8BE-NEXT: mtvsrd v2, r5
532 ; P8BE-NEXT: sldi r6, r6, 48
533 ; P8BE-NEXT: subf r7, r10, r7
534 ; P8BE-NEXT: mtvsrd v3, r6
535 ; P8BE-NEXT: subf r3, r3, r4
536 ; P8BE-NEXT: sldi r4, r7, 48
537 ; P8BE-NEXT: vmrghh v2, v3, v2
538 ; P8BE-NEXT: sldi r3, r3, 48
539 ; P8BE-NEXT: mtvsrd v4, r4
540 ; P8BE-NEXT: mtvsrd v5, r3
541 ; P8BE-NEXT: vmrghh v3, v5, v4
542 ; P8BE-NEXT: vmrghw v2, v3, v2
543 ; P8BE-NEXT: blr
544 %1 = srem <4 x i16> %x,
545 ret <4 x i16> %1
546 }
547
548
549 ; Don't fold if we can combine srem with sdiv.
550 define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
551 ; P9LE-LABEL: combine_srem_sdiv:
552 ; P9LE: # %bb.0:
553 ; P9LE-NEXT: li r3, 0
554 ; P9LE-NEXT: vextuhrx r3, r3, v2
555 ; P9LE-NEXT: extsh r4, r3
556 ; P9LE-NEXT: lis r5, -21386
557 ; P9LE-NEXT: ori r5, r5, 37253
558 ; P9LE-NEXT: extsw r4, r4
559 ; P9LE-NEXT: mulld r6, r4, r5
560 ; P9LE-NEXT: rldicl r6, r6, 32, 32
561 ; P9LE-NEXT: add r4, r6, r4
562 ; P9LE-NEXT: srwi r6, r4, 31
563 ; P9LE-NEXT: srawi r4, r4, 6
564 ; P9LE-NEXT: add r4, r4, r6
565 ; P9LE-NEXT: mulli r6, r4, 95
566 ; P9LE-NEXT: subf r3, r6, r3
567 ; P9LE-NEXT: mtvsrd f0, r3
568 ; P9LE-NEXT: li r3, 2
569 ; P9LE-NEXT: vextuhrx r3, r3, v2
570 ; P9LE-NEXT: extsh r6, r3
571 ; P9LE-NEXT: extsw r6, r6
572 ; P9LE-NEXT: mulld r7, r6, r5
573 ; P9LE-NEXT: rldicl r7, r7, 32, 32
574 ; P9LE-NEXT: add r6, r7, r6
575 ; P9LE-NEXT: srwi r7, r6, 31
576 ; P9LE-NEXT: srawi r6, r6, 6
577 ; P9LE-NEXT: add r6, r6, r7
578 ; P9LE-NEXT: mulli r7, r6, 95
579 ; P9LE-NEXT: subf r3, r7, r3
580 ; P9LE-NEXT: xxswapd v3, vs0
581 ; P9LE-NEXT: mtvsrd f0, r3
582 ; P9LE-NEXT: li r3, 4
583 ; P9LE-NEXT: vextuhrx r3, r3, v2
584 ; P9LE-NEXT: extsh r7, r3
585 ; P9LE-NEXT: extsw r7, r7
586 ; P9LE-NEXT: mulld r8, r7, r5
587 ; P9LE-NEXT: rldicl r8, r8, 32, 32
588 ; P9LE-NEXT: add r7, r8, r7
589 ; P9LE-NEXT: srwi r8, r7, 31
590 ; P9LE-NEXT: srawi r7, r7, 6
591 ; P9LE-NEXT: add r7, r7, r8
592 ; P9LE-NEXT: mulli r8, r7, 95
593 ; P9LE-NEXT: subf r3, r8, r3
594 ; P9LE-NEXT: xxswapd v4, vs0
595 ; P9LE-NEXT: mtvsrd f0, r3
596 ; P9LE-NEXT: li r3, 6
597 ; P9LE-NEXT: vextuhrx r3, r3, v2
598 ; P9LE-NEXT: extsh r8, r3
599 ; P9LE-NEXT: extsw r8, r8
600 ; P9LE-NEXT: mulld r5, r8, r5
601 ; P9LE-NEXT: rldicl r5, r5, 32, 32
602 ; P9LE-NEXT: add r5, r5, r8
603 ; P9LE-NEXT: srwi r8, r5, 31
604 ; P9LE-NEXT: srawi r5, r5, 6
605 ; P9LE-NEXT: add r5, r5, r8
606 ; P9LE-NEXT: mulli r8, r5, 95
607 ; P9LE-NEXT: subf r3, r8, r3
608 ; P9LE-NEXT: vmrglh v3, v4, v3
609 ; P9LE-NEXT: xxswapd v4, vs0
610 ; P9LE-NEXT: mtvsrd f0, r3
611 ; P9LE-NEXT: xxswapd v2, vs0
612 ; P9LE-NEXT: mtvsrd f0, r4
613 ; P9LE-NEXT: vmrglh v2, v2, v4
614 ; P9LE-NEXT: vmrglw v2, v2, v3
615 ; P9LE-NEXT: xxswapd v3, vs0
616 ; P9LE-NEXT: mtvsrd f0, r6
617 ; P9LE-NEXT: xxswapd v4, vs0
618 ; P9LE-NEXT: mtvsrd f0, r7
619 ; P9LE-NEXT: vmrglh v3, v4, v3
620 ; P9LE-NEXT: xxswapd v4, vs0
621 ; P9LE-NEXT: mtvsrd f0, r5
622 ; P9LE-NEXT: xxswapd v5, vs0
623 ; P9LE-NEXT: vmrglh v4, v5, v4
624 ; P9LE-NEXT: vmrglw v3, v4, v3
625 ; P9LE-NEXT: vadduhm v2, v2, v3
626 ; P9LE-NEXT: blr
627 ;
628 ; P9BE-LABEL: combine_srem_sdiv:
629 ; P9BE: # %bb.0:
630 ; P9BE-NEXT: li r3, 6
631 ; P9BE-NEXT: vextuhlx r3, r3, v2
632 ; P9BE-NEXT: extsh r4, r3
633 ; P9BE-NEXT: lis r5, -21386
634 ; P9BE-NEXT: ori r5, r5, 37253
635 ; P9BE-NEXT: extsw r4, r4
636 ; P9BE-NEXT: mulld r6, r4, r5
637 ; P9BE-NEXT: rldicl r6, r6, 32, 32
638 ; P9BE-NEXT: add r4, r6, r4
639 ; P9BE-NEXT: srwi r6, r4, 31
640 ; P9BE-NEXT: srawi r4, r4, 6
641 ; P9BE-NEXT: add r4, r4, r6
642 ; P9BE-NEXT: mulli r6, r4, 95
643 ; P9BE-NEXT: subf r3, r6, r3
644 ; P9BE-NEXT: sldi r3, r3, 48
645 ; P9BE-NEXT: mtvsrd v3, r3
646 ; P9BE-NEXT: li r3, 4
647 ; P9BE-NEXT: vextuhlx r3, r3, v2
648 ; P9BE-NEXT: extsh r6, r3
649 ; P9BE-NEXT: extsw r6, r6
650 ; P9BE-NEXT: mulld r7, r6, r5
651 ; P9BE-NEXT: rldicl r7, r7, 32, 32
652 ; P9BE-NEXT: add r6, r7, r6
653 ; P9BE-NEXT: srwi r7, r6, 31
654 ; P9BE-NEXT: srawi r6, r6, 6
655 ; P9BE-NEXT: add r6, r6, r7
656 ; P9BE-NEXT: mulli r7, r6, 95
657 ; P9BE-NEXT: subf r3, r7, r3
658 ; P9BE-NEXT: sldi r3, r3, 48
659 ; P9BE-NEXT: mtvsrd v4, r3
660 ; P9BE-NEXT: li r3, 2
661 ; P9BE-NEXT: vextuhlx r3, r3, v2
662 ; P9BE-NEXT: extsh r7, r3
663 ; P9BE-NEXT: extsw r7, r7
664 ; P9BE-NEXT: mulld r8, r7, r5
665 ; P9BE-NEXT: rldicl r8, r8, 32, 32
666 ; P9BE-NEXT: add r7, r8, r7
667 ; P9BE-NEXT: srwi r8, r7, 31
668 ; P9BE-NEXT: srawi r7, r7, 6
669 ; P9BE-NEXT: add r7, r7, r8
670 ; P9BE-NEXT: mulli r8, r7, 95
671 ; P9BE-NEXT: subf r3, r8, r3
672 ; P9BE-NEXT: sldi r3, r3, 48
673 ; P9BE-NEXT: vmrghh v3, v4, v3
674 ; P9BE-NEXT: mtvsrd v4, r3
675 ; P9BE-NEXT: li r3, 0
676 ; P9BE-NEXT: vextuhlx r3, r3, v2
677 ; P9BE-NEXT: extsh r3, r3
678 ; P9BE-NEXT: extsw r3, r3
679 ; P9BE-NEXT: mulld r5, r3, r5
680 ; P9BE-NEXT: rldicl r5, r5, 32, 32
681 ; P9BE-NEXT: add r5, r5, r3
682 ; P9BE-NEXT: srwi r8, r5, 31
683 ; P9BE-NEXT: srawi r5, r5, 6
684 ; P9BE-NEXT: add r5, r5, r8
685 ; P9BE-NEXT: mulli r8, r5, 95
686 ; P9BE-NEXT: subf r3, r8, r3
687 ; P9BE-NEXT: sldi r3, r3, 48
688 ; P9BE-NEXT: mtvsrd v2, r3
689 ; P9BE-NEXT: sldi r3, r4, 48
690 ; P9BE-NEXT: vmrghh v2, v2, v4
691 ; P9BE-NEXT: vmrghw v2, v2, v3
692 ; P9BE-NEXT: mtvsrd v3, r3
693 ; P9BE-NEXT: sldi r3, r6, 48
694 ; P9BE-NEXT: mtvsrd v4, r3
695 ; P9BE-NEXT: sldi r3, r7, 48
696 ; P9BE-NEXT: vmrghh v3, v4, v3
697 ; P9BE-NEXT: mtvsrd v4, r3
698 ; P9BE-NEXT: sldi r3, r5, 48
699 ; P9BE-NEXT: mtvsrd v5, r3
700 ; P9BE-NEXT: vmrghh v4, v5, v4
701 ; P9BE-NEXT: vmrghw v3, v4, v3
702 ; P9BE-NEXT: vadduhm v2, v2, v3
703 ; P9BE-NEXT: blr
704 ;
705 ; P8LE-LABEL: combine_srem_sdiv:
706 ; P8LE: # %bb.0:
707 ; P8LE-NEXT: xxswapd vs0, v2
708 ; P8LE-NEXT: lis r5, -21386
709 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
710 ; P8LE-NEXT: ori r5, r5, 37253
711 ; P8LE-NEXT: mfvsrd r6, f0
712 ; P8LE-NEXT: clrldi r3, r6, 48
713 ; P8LE-NEXT: rldicl r4, r6, 48, 48
714 ; P8LE-NEXT: rldicl r7, r6, 32, 48
715 ; P8LE-NEXT: extsh r8, r3
716 ; P8LE-NEXT: extsh r9, r4
717 ; P8LE-NEXT: rldicl r6, r6, 16, 48
718 ; P8LE-NEXT: extsh r10, r7
719 ; P8LE-NEXT: extsw r8, r8
720 ; P8LE-NEXT: extsw r9, r9
721 ; P8LE-NEXT: extsh r11, r6
722 ; P8LE-NEXT: extsw r10, r10
723 ; P8LE-NEXT: mulld r12, r8, r5
724 ; P8LE-NEXT: extsw r11, r11
725 ; P8LE-NEXT: mulld r0, r9, r5
726 ; P8LE-NEXT: mulld r30, r10, r5
727 ; P8LE-NEXT: mulld r5, r11, r5
728 ; P8LE-NEXT: rldicl r12, r12, 32, 32
729 ; P8LE-NEXT: rldicl r0, r0, 32, 32
730 ; P8LE-NEXT: rldicl r30, r30, 32, 32
731 ; P8LE-NEXT: add r8, r12, r8
732 ; P8LE-NEXT: rldicl r5, r5, 32, 32
733 ; P8LE-NEXT: add r9, r0, r9
734 ; P8LE-NEXT: add r10, r30, r10
735 ; P8LE-NEXT: srwi r12, r8, 31
736 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
737 ; P8LE-NEXT: srawi r8, r8, 6
738 ; P8LE-NEXT: srawi r0, r9, 6
739 ; P8LE-NEXT: srwi r9, r9, 31
740 ; P8LE-NEXT: add r5, r5, r11
741 ; P8LE-NEXT: add r8, r8, r12
742 ; P8LE-NEXT: srawi r12, r10, 6
743 ; P8LE-NEXT: srwi r10, r10, 31
744 ; P8LE-NEXT: add r9, r0, r9
745 ; P8LE-NEXT: mulli r0, r8, 95
746 ; P8LE-NEXT: add r10, r12, r10
747 ; P8LE-NEXT: mtvsrd f0, r8
748 ; P8LE-NEXT: srwi r8, r5, 31
749 ; P8LE-NEXT: srawi r5, r5, 6
750 ; P8LE-NEXT: mulli r11, r9, 95
751 ; P8LE-NEXT: mtvsrd f1, r9
752 ; P8LE-NEXT: mulli r9, r10, 95
753 ; P8LE-NEXT: add r5, r5, r8
754 ; P8LE-NEXT: xxswapd v2, vs0
755 ; P8LE-NEXT: mtvsrd f2, r10
756 ; P8LE-NEXT: mtvsrd f3, r5
757 ; P8LE-NEXT: mulli r5, r5, 95
758 ; P8LE-NEXT: xxswapd v3, vs1
759 ; P8LE-NEXT: subf r3, r0, r3
760 ; P8LE-NEXT: xxswapd v1, vs2
761 ; P8LE-NEXT: mtvsrd f0, r3
762 ; P8LE-NEXT: subf r4, r11, r4
763 ; P8LE-NEXT: xxswapd v6, vs3
764 ; P8LE-NEXT: subf r3, r9, r7
765 ; P8LE-NEXT: mtvsrd f1, r4
766 ; P8LE-NEXT: mtvsrd f4, r3
767 ; P8LE-NEXT: subf r3, r5, r6
768 ; P8LE-NEXT: mtvsrd f5, r3
769 ; P8LE-NEXT: xxswapd v4, vs1
770 ; P8LE-NEXT: vmrglh v2, v3, v2
771 ; P8LE-NEXT: xxswapd v3, vs0
772 ; P8LE-NEXT: xxswapd v5, vs4
773 ; P8LE-NEXT: xxswapd v0, vs5
774 ; P8LE-NEXT: vmrglh v3, v4, v3
775 ; P8LE-NEXT: vmrglh v4, v0, v5
776 ; P8LE-NEXT: vmrglh v5, v6, v1
777 ; P8LE-NEXT: vmrglw v3, v4, v3
778 ; P8LE-NEXT: vmrglw v2, v5, v2
779 ; P8LE-NEXT: vadduhm v2, v3, v2
780 ; P8LE-NEXT: blr
781 ;
782 ; P8BE-LABEL: combine_srem_sdiv:
783 ; P8BE: # %bb.0:
784 ; P8BE-NEXT: mfvsrd r6, v2
785 ; P8BE-NEXT: lis r5, -21386
786 ; P8BE-NEXT: ori r5, r5, 37253
787 ; P8BE-NEXT: clrldi r3, r6, 48
788 ; P8BE-NEXT: rldicl r4, r6, 48, 48
789 ; P8BE-NEXT: extsh r8, r3
790 ; P8BE-NEXT: rldicl r7, r6, 32, 48
791 ; P8BE-NEXT: extsh r9, r4
792 ; P8BE-NEXT: rldicl r6, r6, 16, 48
793 ; P8BE-NEXT: extsw r8, r8
794 ; P8BE-NEXT: extsh r10, r7
795 ; P8BE-NEXT: extsw r9, r9
796 ; P8BE-NEXT: extsh r6, r6
797 ; P8BE-NEXT: mulld r11, r8, r5
798 ; P8BE-NEXT: extsw r10, r10
799 ; P8BE-NEXT: extsw r6, r6
800 ; P8BE-NEXT: mulld r12, r9, r5
801 ; P8BE-NEXT: mulld r0, r10, r5
802 ; P8BE-NEXT: mulld r5, r6, r5
803 ; P8BE-NEXT: rldicl r11, r11, 32, 32
804 ; P8BE-NEXT: rldicl r12, r12, 32, 32
805 ; P8BE-NEXT: add r8, r11, r8
806 ; P8BE-NEXT: rldicl r0, r0, 32, 32
807 ; P8BE-NEXT: rldicl r5, r5, 32, 32
808 ; P8BE-NEXT: add r9, r12, r9
809 ; P8BE-NEXT: srawi r11, r8, 6
810 ; P8BE-NEXT: srwi r8, r8, 31
811 ; P8BE-NEXT: add r10, r0, r10
812 ; P8BE-NEXT: add r5, r5, r6
813 ; P8BE-NEXT: srawi r12, r9, 6
814 ; P8BE-NEXT: srwi r9, r9, 31
815 ; P8BE-NEXT: add r8, r11, r8
816 ; P8BE-NEXT: srawi r0, r10, 6
817 ; P8BE-NEXT: srawi r11, r5, 6
818 ; P8BE-NEXT: srwi r10, r10, 31
819 ; P8BE-NEXT: add r9, r12, r9
820 ; P8BE-NEXT: srwi r5, r5, 31
821 ; P8BE-NEXT: mulli r12, r8, 95
822 ; P8BE-NEXT: add r10, r0, r10
823 ; P8BE-NEXT: add r5, r11, r5
824 ; P8BE-NEXT: mulli r0, r9, 95
825 ; P8BE-NEXT: sldi r9, r9, 48
826 ; P8BE-NEXT: sldi r8, r8, 48
827 ; P8BE-NEXT: mtvsrd v3, r9
828 ; P8BE-NEXT: mulli r9, r5, 95
829 ; P8BE-NEXT: mtvsrd v2, r8
830 ; P8BE-NEXT: mulli r8, r10, 95
831 ; P8BE-NEXT: sldi r10, r10, 48
832 ; P8BE-NEXT: subf r3, r12, r3
833 ; P8BE-NEXT: mtvsrd v4, r10
834 ; P8BE-NEXT: subf r4, r0, r4
835 ; P8BE-NEXT: sldi r3, r3, 48
836 ; P8BE-NEXT: vmrghh v2, v3, v2
837 ; P8BE-NEXT: sldi r4, r4, 48
838 ; P8BE-NEXT: mtvsrd v3, r3
839 ; P8BE-NEXT: subf r3, r9, r6
840 ; P8BE-NEXT: subf r7, r8, r7
841 ; P8BE-NEXT: mtvsrd v5, r4
842 ; P8BE-NEXT: sldi r3, r3, 48
843 ; P8BE-NEXT: sldi r6, r7, 48
844 ; P8BE-NEXT: mtvsrd v1, r3
845 ; P8BE-NEXT: sldi r3, r5, 48
846 ; P8BE-NEXT: mtvsrd v0, r6
847 ; P8BE-NEXT: vmrghh v3, v5, v3
848 ; P8BE-NEXT: mtvsrd v5, r3
849 ; P8BE-NEXT: vmrghh v0, v1, v0
850 ; P8BE-NEXT: vmrghh v4, v5, v4
851 ; P8BE-NEXT: vmrghw v3, v0, v3
852 ; P8BE-NEXT: vmrghw v2, v4, v2
853 ; P8BE-NEXT: vadduhm v2, v3, v2
854 ; P8BE-NEXT: blr
855 %1 = srem <4 x i16> %x,
856 %2 = sdiv <4 x i16> %x,
857 %3 = add <4 x i16> %1, %2
858 ret <4 x i16> %3
859 }
860
861 ; Don't fold for divisors that are a power of two.
862 define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
863 ; P9LE-LABEL: dont_fold_srem_power_of_two:
864 ; P9LE: # %bb.0:
865 ; P9LE-NEXT: li r3, 0
866 ; P9LE-NEXT: vextuhrx r3, r3, v2
867 ; P9LE-NEXT: extsh r4, r3
868 ; P9LE-NEXT: srawi r4, r4, 6
869 ; P9LE-NEXT: addze r4, r4
870 ; P9LE-NEXT: slwi r4, r4, 6
871 ; P9LE-NEXT: subf r3, r4, r3
872 ; P9LE-NEXT: mtvsrd f0, r3
873 ; P9LE-NEXT: li r3, 2
874 ; P9LE-NEXT: vextuhrx r3, r3, v2
875 ; P9LE-NEXT: extsh r4, r3
876 ; P9LE-NEXT: srawi r4, r4, 5
877 ; P9LE-NEXT: addze r4, r4
878 ; P9LE-NEXT: slwi r4, r4, 5
879 ; P9LE-NEXT: subf r3, r4, r3
880 ; P9LE-NEXT: xxswapd v3, vs0
881 ; P9LE-NEXT: mtvsrd f0, r3
882 ; P9LE-NEXT: li r3, 6
883 ; P9LE-NEXT: vextuhrx r3, r3, v2
884 ; P9LE-NEXT: extsh r4, r3
885 ; P9LE-NEXT: lis r5, -21386
886 ; P9LE-NEXT: ori r5, r5, 37253
887 ; P9LE-NEXT: xxswapd v4, vs0
888 ; P9LE-NEXT: vmrglh v3, v4, v3
889 ; P9LE-NEXT: extsw r4, r4
890 ; P9LE-NEXT: mulld r5, r4, r5
891 ; P9LE-NEXT: rldicl r5, r5, 32, 32
892 ; P9LE-NEXT: add r4, r5, r4
893 ; P9LE-NEXT: srwi r5, r4, 31
894 ; P9LE-NEXT: srawi r4, r4, 6
895 ; P9LE-NEXT: add r4, r4, r5
896 ; P9LE-NEXT: mulli r4, r4, 95
897 ; P9LE-NEXT: subf r3, r4, r3
898 ; P9LE-NEXT: mtvsrd f0, r3
899 ; P9LE-NEXT: li r3, 4
900 ; P9LE-NEXT: vextuhrx r3, r3, v2
901 ; P9LE-NEXT: extsh r4, r3
902 ; P9LE-NEXT: srawi r4, r4, 3
903 ; P9LE-NEXT: addze r4, r4
904 ; P9LE-NEXT: slwi r4, r4, 3
905 ; P9LE-NEXT: subf r3, r4, r3
906 ; P9LE-NEXT: xxswapd v4, vs0
907 ; P9LE-NEXT: mtvsrd f0, r3
908 ; P9LE-NEXT: xxswapd v2, vs0
909 ; P9LE-NEXT: vmrglh v2, v4, v2
910 ; P9LE-NEXT: vmrglw v2, v2, v3
911 ; P9LE-NEXT: blr
912 ;
913 ; P9BE-LABEL: dont_fold_srem_power_of_two:
914 ; P9BE: # %bb.0:
915 ; P9BE-NEXT: li r3, 2
916 ; P9BE-NEXT: vextuhlx r3, r3, v2
917 ; P9BE-NEXT: extsh r3, r3
918 ; P9BE-NEXT: srawi r4, r3, 5
919 ; P9BE-NEXT: addze r4, r4
920 ; P9BE-NEXT: slwi r4, r4, 5
921 ; P9BE-NEXT: subf r3, r4, r3
922 ; P9BE-NEXT: sldi r3, r3, 48
923 ; P9BE-NEXT: mtvsrd v3, r3
924 ; P9BE-NEXT: li r3, 0
925 ; P9BE-NEXT: vextuhlx r3, r3, v2
926 ; P9BE-NEXT: extsh r3, r3
927 ; P9BE-NEXT: srawi r4, r3, 6
928 ; P9BE-NEXT: addze r4, r4
929 ; P9BE-NEXT: slwi r4, r4, 6
930 ; P9BE-NEXT: subf r3, r4, r3
931 ; P9BE-NEXT: lis r4, -21386
932 ; P9BE-NEXT: sldi r3, r3, 48
933 ; P9BE-NEXT: mtvsrd v4, r3
934 ; P9BE-NEXT: li r3, 6
935 ; P9BE-NEXT: vextuhlx r3, r3, v2
936 ; P9BE-NEXT: extsh r3, r3
937 ; P9BE-NEXT: extsw r3, r3
938 ; P9BE-NEXT: ori r4, r4, 37253
939 ; P9BE-NEXT: mulld r4, r3, r4
940 ; P9BE-NEXT: rldicl r4, r4, 32, 32
941 ; P9BE-NEXT: add r4, r4, r3
942 ; P9BE-NEXT: srwi r5, r4, 31
943 ; P9BE-NEXT: srawi r4, r4, 6
944 ; P9BE-NEXT: add r4, r4, r5
945 ; P9BE-NEXT: mulli r4, r4, 95
946 ; P9BE-NEXT: subf r3, r4, r3
947 ; P9BE-NEXT: sldi r3, r3, 48
948 ; P9BE-NEXT: vmrghh v3, v4, v3
949 ; P9BE-NEXT: mtvsrd v4, r3
950 ; P9BE-NEXT: li r3, 4
951 ; P9BE-NEXT: vextuhlx r3, r3, v2
952 ; P9BE-NEXT: extsh r3, r3
953 ; P9BE-NEXT: srawi r4, r3, 3
954 ; P9BE-NEXT: addze r4, r4
955 ; P9BE-NEXT: slwi r4, r4, 3
956 ; P9BE-NEXT: subf r3, r4, r3
957 ; P9BE-NEXT: sldi r3, r3, 48
958 ; P9BE-NEXT: mtvsrd v2, r3
959 ; P9BE-NEXT: vmrghh v2, v2, v4
960 ; P9BE-NEXT: vmrghw v2, v3, v2
961 ; P9BE-NEXT: blr
962 ;
963 ; P8LE-LABEL: dont_fold_srem_power_of_two:
964 ; P8LE: # %bb.0:
965 ; P8LE-NEXT: xxswapd vs0, v2
966 ; P8LE-NEXT: lis r3, -21386
967 ; P8LE-NEXT: ori r3, r3, 37253
968 ; P8LE-NEXT: mfvsrd r4, f0
969 ; P8LE-NEXT: rldicl r5, r4, 16, 48
970 ; P8LE-NEXT: clrldi r7, r4, 48
971 ; P8LE-NEXT: extsh r6, r5
972 ; P8LE-NEXT: extsh r8, r7
973 ; P8LE-NEXT: extsw r6, r6
974 ; P8LE-NEXT: rldicl r9, r4, 48, 48
975 ; P8LE-NEXT: mulld r3, r6, r3
976 ; P8LE-NEXT: srawi r8, r8, 6
977 ; P8LE-NEXT: extsh r10, r9
978 ; P8LE-NEXT: addze r8, r8
979 ; P8LE-NEXT: rldicl r4, r4, 32, 48
980 ; P8LE-NEXT: srawi r10, r10, 5
981 ; P8LE-NEXT: slwi r8, r8, 6
982 ; P8LE-NEXT: subf r7, r8, r7
983 ; P8LE-NEXT: rldicl r3, r3, 32, 32
984 ; P8LE-NEXT: mtvsrd f0, r7
985 ; P8LE-NEXT: add r3, r3, r6
986 ; P8LE-NEXT: addze r6, r10
987 ; P8LE-NEXT: srwi r10, r3, 31
988 ; P8LE-NEXT: srawi r3, r3, 6
989 ; P8LE-NEXT: slwi r6, r6, 5
990 ; P8LE-NEXT: xxswapd v2, vs0
991 ; P8LE-NEXT: add r3, r3, r10
992 ; P8LE-NEXT: extsh r10, r4
993 ; P8LE-NEXT: subf r6, r6, r9
994 ; P8LE-NEXT: mulli r3, r3, 95
995 ; P8LE-NEXT: srawi r8, r10, 3
996 ; P8LE-NEXT: mtvsrd f1, r6
997 ; P8LE-NEXT: addze r7, r8
998 ; P8LE-NEXT: xxswapd v3, vs1
999 ; P8LE-NEXT: subf r3, r3, r5
1000 ; P8LE-NEXT: slwi r5, r7, 3
1001 ; P8LE-NEXT: subf r4, r5, r4
1002 ; P8LE-NEXT: mtvsrd f2, r3
1003 ; P8LE-NEXT: mtvsrd f3, r4
1004 ; P8LE-NEXT: xxswapd v4, vs2
1005 ; P8LE-NEXT: vmrglh v2, v3, v2
1006 ; P8LE-NEXT: xxswapd v5, vs3
1007 ; P8LE-NEXT: vmrglh v3, v4, v5
1008 ; P8LE-NEXT: vmrglw v2, v3, v2
1009 ; P8LE-NEXT: blr
1010 ;
1011 ; P8BE-LABEL: dont_fold_srem_power_of_two:
1012 ; P8BE: # %bb.0:
1013 ; P8BE-NEXT: mfvsrd r4, v2
1014 ; P8BE-NEXT: lis r3, -21386
1015 ; P8BE-NEXT: ori r3, r3, 37253
1016 ; P8BE-NEXT: clrldi r5, r4, 48
1017 ; P8BE-NEXT: rldicl r6, r4, 32, 48
1018 ; P8BE-NEXT: extsh r5, r5
1019 ; P8BE-NEXT: extsh r6, r6
1020 ; P8BE-NEXT: extsw r5, r5
1021 ; P8BE-NEXT: rldicl r7, r4, 16, 48
1022 ; P8BE-NEXT: mulld r3, r5, r3
1023 ; P8BE-NEXT: srawi r8, r6, 5
1024 ; P8BE-NEXT: extsh r7, r7
1025 ; P8BE-NEXT: addze r8, r8
1026 ; P8BE-NEXT: rldicl r4, r4, 48, 48
1027 ; P8BE-NEXT: srawi r9, r7, 6
1028 ; P8BE-NEXT: extsh r4, r4
1029 ; P8BE-NEXT: slwi r8, r8, 5
1030 ; P8BE-NEXT: addze r9, r9
1031 ; P8BE-NEXT: subf r6, r8, r6
1032 ; P8BE-NEXT: rldicl r3, r3, 32, 32
1033 ; P8BE-NEXT: slwi r8, r9, 6
1034 ; P8BE-NEXT: add r3, r3, r5
1035 ; P8BE-NEXT: subf r7, r8, r7
1036 ; P8BE-NEXT: srwi r10, r3, 31
1037 ; P8BE-NEXT: srawi r3, r3, 6
1038 ; P8BE-NEXT: add r3, r3, r10
1039 ; P8BE-NEXT: srawi r9, r4, 3
1040 ; P8BE-NEXT: mulli r3, r3, 95
1041 ; P8BE-NEXT: sldi r6, r6, 48
1042 ; P8BE-NEXT: addze r8, r9
1043 ; P8BE-NEXT: mtvsrd v2, r6
1044 ; P8BE-NEXT: slwi r6, r8, 3
1045 ; P8BE-NEXT: subf r4, r6, r4
1046 ; P8BE-NEXT: sldi r4, r4, 48
1047 ; P8BE-NEXT: subf r3, r3, r5
1048 ; P8BE-NEXT: sldi r5, r7, 48
1049 ; P8BE-NEXT: mtvsrd v5, r4
1050 ; P8BE-NEXT: sldi r3, r3, 48
1051 ; P8BE-NEXT: mtvsrd v3, r5
1052 ; P8BE-NEXT: mtvsrd v4, r3
1053 ; P8BE-NEXT: vmrghh v2, v3, v2
1054 ; P8BE-NEXT: vmrghh v3, v5, v4
1055 ; P8BE-NEXT: vmrghw v2, v2, v3
1056 ; P8BE-NEXT: blr
1057 %1 = srem <4 x i16> %x,
1058 ret <4 x i16> %1
1059 }
1060
1061 ; Don't fold if the divisor is one.
1062 define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
1063 ; P9LE-LABEL: dont_fold_srem_one:
1064 ; P9LE: # %bb.0:
1065 ; P9LE-NEXT: li r3, 2
1066 ; P9LE-NEXT: vextuhrx r3, r3, v2
1067 ; P9LE-NEXT: extsh r4, r3
1068 ; P9LE-NEXT: lis r5, -14230
1069 ; P9LE-NEXT: ori r5, r5, 30865
1070 ; P9LE-NEXT: extsw r4, r4
1071 ; P9LE-NEXT: mulld r5, r4, r5
1072 ; P9LE-NEXT: rldicl r5, r5, 32, 32
1073 ; P9LE-NEXT: xxlxor v4, v4, v4
1074 ; P9LE-NEXT: add r4, r5, r4
1075 ; P9LE-NEXT: srwi r5, r4, 31
1076 ; P9LE-NEXT: srawi r4, r4, 9
1077 ; P9LE-NEXT: add r4, r4, r5
1078 ; P9LE-NEXT: lis r5, -19946
1079 ; P9LE-NEXT: mulli r4, r4, 654
1080 ; P9LE-NEXT: subf r3, r4, r3
1081 ; P9LE-NEXT: mtvsrd f0, r3
1082 ; P9LE-NEXT: li r3, 4
1083 ; P9LE-NEXT: vextuhrx r3, r3, v2
1084 ; P9LE-NEXT: extsh r4, r3
1085 ; P9LE-NEXT: extsw r4, r4
1086 ; P9LE-NEXT: ori r5, r5, 17097
1087 ; P9LE-NEXT: mulld r5, r4, r5
1088 ; P9LE-NEXT: rldicl r5, r5, 32, 32
1089 ; P9LE-NEXT: add r4, r5, r4
1090 ; P9LE-NEXT: srwi r5, r4, 31
1091 ; P9LE-NEXT: srawi r4, r4, 4
1092 ; P9LE-NEXT: add r4, r4, r5
1093 ; P9LE-NEXT: lis r5, 24749
1094 ; P9LE-NEXT: mulli r4, r4, 23
1095 ; P9LE-NEXT: subf r3, r4, r3
1096 ; P9LE-NEXT: xxswapd v3, vs0
1097 ; P9LE-NEXT: mtvsrd f0, r3
1098 ; P9LE-NEXT: li r3, 6
1099 ; P9LE-NEXT: vextuhrx r3, r3, v2
1100 ; P9LE-NEXT: extsh r4, r3
1101 ; P9LE-NEXT: extsw r4, r4
1102 ; P9LE-NEXT: ori r5, r5, 47143
1103 ; P9LE-NEXT: mulld r4, r4, r5
1104 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1105 ; P9LE-NEXT: rldicl r4, r4, 32, 32
1106 ; P9LE-NEXT: srawi r4, r4, 11
1107 ; P9LE-NEXT: add r4, r4, r5
1108 ; P9LE-NEXT: mulli r4, r4, 5423
1109 ; P9LE-NEXT: subf r3, r4, r3
1110 ; P9LE-NEXT: vmrglh v3, v3, v4
1111 ; P9LE-NEXT: xxswapd v4, vs0
1112 ; P9LE-NEXT: mtvsrd f0, r3
1113 ; P9LE-NEXT: xxswapd v2, vs0
1114 ; P9LE-NEXT: vmrglh v2, v2, v4
1115 ; P9LE-NEXT: vmrglw v2, v2, v3
1116 ; P9LE-NEXT: blr
1117 ;
1118 ; P9BE-LABEL: dont_fold_srem_one:
1119 ; P9BE: # %bb.0:
1120 ; P9BE-NEXT: li r3, 4
1121 ; P9BE-NEXT: vextuhlx r3, r3, v2
1122 ; P9BE-NEXT: extsh r3, r3
1123 ; P9BE-NEXT: lis r4, -19946
1124 ; P9BE-NEXT: ori r4, r4, 17097
1125 ; P9BE-NEXT: extsw r3, r3
1126 ; P9BE-NEXT: mulld r4, r3, r4
1127 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1128 ; P9BE-NEXT: add r4, r4, r3
1129 ; P9BE-NEXT: srwi r5, r4, 31
1130 ; P9BE-NEXT: srawi r4, r4, 4
1131 ; P9BE-NEXT: add r4, r4, r5
1132 ; P9BE-NEXT: mulli r4, r4, 23
1133 ; P9BE-NEXT: subf r3, r4, r3
1134 ; P9BE-NEXT: lis r4, 24749
1135 ; P9BE-NEXT: sldi r3, r3, 48
1136 ; P9BE-NEXT: mtvsrd v3, r3
1137 ; P9BE-NEXT: li r3, 6
1138 ; P9BE-NEXT: vextuhlx r3, r3, v2
1139 ; P9BE-NEXT: extsh r3, r3
1140 ; P9BE-NEXT: extsw r3, r3
1141 ; P9BE-NEXT: ori r4, r4, 47143
1142 ; P9BE-NEXT: mulld r4, r3, r4
1143 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1144 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1145 ; P9BE-NEXT: srawi r4, r4, 11
1146 ; P9BE-NEXT: add r4, r4, r5
1147 ; P9BE-NEXT: mulli r4, r4, 5423
1148 ; P9BE-NEXT: subf r3, r4, r3
1149 ; P9BE-NEXT: lis r4, -14230
1150 ; P9BE-NEXT: sldi r3, r3, 48
1151 ; P9BE-NEXT: mtvsrd v4, r3
1152 ; P9BE-NEXT: li r3, 2
1153 ; P9BE-NEXT: vextuhlx r3, r3, v2
1154 ; P9BE-NEXT: extsh r3, r3
1155 ; P9BE-NEXT: extsw r3, r3
1156 ; P9BE-NEXT: ori r4, r4, 30865
1157 ; P9BE-NEXT: mulld r4, r3, r4
1158 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1159 ; P9BE-NEXT: add r4, r4, r3
1160 ; P9BE-NEXT: srwi r5, r4, 31
1161 ; P9BE-NEXT: srawi r4, r4, 9
1162 ; P9BE-NEXT: add r4, r4, r5
1163 ; P9BE-NEXT: mulli r4, r4, 654
1164 ; P9BE-NEXT: subf r3, r4, r3
1165 ; P9BE-NEXT: sldi r3, r3, 48
1166 ; P9BE-NEXT: mtvsrd v2, r3
1167 ; P9BE-NEXT: li r3, 0
1168 ; P9BE-NEXT: sldi r3, r3, 48
1169 ; P9BE-NEXT: vmrghh v3, v3, v4
1170 ; P9BE-NEXT: mtvsrd v4, r3
1171 ; P9BE-NEXT: vmrghh v2, v4, v2
1172 ; P9BE-NEXT: vmrghw v2, v2, v3
1173 ; P9BE-NEXT: blr
1174 ;
1175 ; P8LE-LABEL: dont_fold_srem_one:
1176 ; P8LE: # %bb.0:
1177 ; P8LE-NEXT: xxswapd vs0, v2
1178 ; P8LE-NEXT: lis r3, 24749
1179 ; P8LE-NEXT: lis r8, -19946
1180 ; P8LE-NEXT: lis r10, -14230
1181 ; P8LE-NEXT: xxlxor v5, v5, v5
1182 ; P8LE-NEXT: ori r3, r3, 47143
1183 ; P8LE-NEXT: ori r8, r8, 17097
1184 ; P8LE-NEXT: mfvsrd r4, f0
1185 ; P8LE-NEXT: rldicl r5, r4, 16, 48
1186 ; P8LE-NEXT: rldicl r6, r4, 32, 48
1187 ; P8LE-NEXT: rldicl r4, r4, 48, 48
1188 ; P8LE-NEXT: extsh r7, r5
1189 ; P8LE-NEXT: extsh r9, r6
1190 ; P8LE-NEXT: extsw r7, r7
1191 ; P8LE-NEXT: extsh r11, r4
1192 ; P8LE-NEXT: extsw r9, r9
1193 ; P8LE-NEXT: mulld r3, r7, r3
1194 ; P8LE-NEXT: ori r7, r10, 30865
1195 ; P8LE-NEXT: extsw r10, r11
1196 ; P8LE-NEXT: mulld r8, r9, r8
1197 ; P8LE-NEXT: mulld r7, r10, r7
1198 ; P8LE-NEXT: rldicl r11, r3, 1, 63
1199 ; P8LE-NEXT: rldicl r3, r3, 32, 32
1200 ; P8LE-NEXT: rldicl r8, r8, 32, 32
1201 ; P8LE-NEXT: rldicl r7, r7, 32, 32
1202 ; P8LE-NEXT: add r8, r8, r9
1203 ; P8LE-NEXT: srawi r3, r3, 11
1204 ; P8LE-NEXT: add r7, r7, r10
1205 ; P8LE-NEXT: srwi r9, r8, 31
1206 ; P8LE-NEXT: srawi r8, r8, 4
1207 ; P8LE-NEXT: add r3, r3, r11
1208 ; P8LE-NEXT: add r8, r8, r9
1209 ; P8LE-NEXT: srwi r9, r7, 31
1210 ; P8LE-NEXT: srawi r7, r7, 9
1211 ; P8LE-NEXT: mulli r3, r3, 5423
1212 ; P8LE-NEXT: add r7, r7, r9
1213 ; P8LE-NEXT: mulli r8, r8, 23
1214 ; P8LE-NEXT: mulli r7, r7, 654
1215 ; P8LE-NEXT: subf r3, r3, r5
1216 ; P8LE-NEXT: mtvsrd f0, r3
1217 ; P8LE-NEXT: subf r3, r8, r6
1218 ; P8LE-NEXT: subf r4, r7, r4
1219 ; P8LE-NEXT: mtvsrd f1, r3
1220 ; P8LE-NEXT: mtvsrd f2, r4
1221 ; P8LE-NEXT: xxswapd v2, vs0
1222 ; P8LE-NEXT: xxswapd v3, vs1
1223 ; P8LE-NEXT: xxswapd v4, vs2
1224 ; P8LE-NEXT: vmrglh v2, v2, v3
1225 ; P8LE-NEXT: vmrglh v3, v4, v5
1226 ; P8LE-NEXT: vmrglw v2, v2, v3
1227 ; P8LE-NEXT: blr
1228 ;
1229 ; P8BE-LABEL: dont_fold_srem_one:
1230 ; P8BE: # %bb.0:
1231 ; P8BE-NEXT: mfvsrd r4, v2
1232 ; P8BE-NEXT: lis r3, 24749
1233 ; P8BE-NEXT: lis r7, -19946
1234 ; P8BE-NEXT: lis r8, -14230
1235 ; P8BE-NEXT: ori r3, r3, 47143
1236 ; P8BE-NEXT: ori r7, r7, 17097
1237 ; P8BE-NEXT: ori r8, r8, 30865
1238 ; P8BE-NEXT: clrldi r5, r4, 48
1239 ; P8BE-NEXT: rldicl r6, r4, 48, 48
1240 ; P8BE-NEXT: rldicl r4, r4, 32, 48
1241 ; P8BE-NEXT: extsh r5, r5
1242 ; P8BE-NEXT: extsh r6, r6
1243 ; P8BE-NEXT: extsh r4, r4
1244 ; P8BE-NEXT: extsw r5, r5
1245 ; P8BE-NEXT: extsw r6, r6
1246 ; P8BE-NEXT: extsw r4, r4
1247 ; P8BE-NEXT: mulld r3, r5, r3
1248 ; P8BE-NEXT: mulld r7, r6, r7
1249 ; P8BE-NEXT: mulld r8, r4, r8
1250 ; P8BE-NEXT: rldicl r9, r3, 1, 63
1251 ; P8BE-NEXT: rldicl r3, r3, 32, 32
1252 ; P8BE-NEXT: rldicl r7, r7, 32, 32
1253 ; P8BE-NEXT: rldicl r8, r8, 32, 32
1254 ; P8BE-NEXT: srawi r3, r3, 11
1255 ; P8BE-NEXT: add r7, r7, r6
1256 ; P8BE-NEXT: add r8, r8, r4
1257 ; P8BE-NEXT: add r3, r3, r9
1258 ; P8BE-NEXT: srwi r9, r7, 31
1259 ; P8BE-NEXT: srawi r7, r7, 4
1260 ; P8BE-NEXT: mulli r3, r3, 5423
1261 ; P8BE-NEXT: add r7, r7, r9
1262 ; P8BE-NEXT: srwi r9, r8, 31
1263 ; P8BE-NEXT: srawi r8, r8, 9
1264 ; P8BE-NEXT: mulli r7, r7, 23
1265 ; P8BE-NEXT: add r8, r8, r9
1266 ; P8BE-NEXT: li r9, 0
1267 ; P8BE-NEXT: mulli r8, r8, 654
1268 ; P8BE-NEXT: subf r3, r3, r5
1269 ; P8BE-NEXT: sldi r5, r9, 48
1270 ; P8BE-NEXT: sldi r3, r3, 48
1271 ; P8BE-NEXT: mtvsrd v2, r5
1272 ; P8BE-NEXT: subf r5, r7, r6
1273 ; P8BE-NEXT: mtvsrd v3, r3
1274 ; P8BE-NEXT: sldi r3, r5, 48
1275 ; P8BE-NEXT: subf r4, r8, r4
1276 ; P8BE-NEXT: mtvsrd v4, r3
1277 ; P8BE-NEXT: sldi r4, r4, 48
1278 ; P8BE-NEXT: mtvsrd v5, r4
1279 ; P8BE-NEXT: vmrghh v3, v4, v3
1280 ; P8BE-NEXT: vmrghh v2, v2, v5
1281 ; P8BE-NEXT: vmrghw v2, v2, v3
1282 ; P8BE-NEXT: blr
1283 %1 = srem <4 x i16> %x,
1284 ret <4 x i16> %1
1285 }
1286
1287 ; Don't fold if the divisor is 2^15.
1288 define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
1289 ; P9LE-LABEL: dont_fold_urem_i16_smax:
1290 ; P9LE: # %bb.0:
1291 ; P9LE-NEXT: li r3, 4
1292 ; P9LE-NEXT: vextuhrx r3, r3, v2
1293 ; P9LE-NEXT: extsh r4, r3
1294 ; P9LE-NEXT: lis r5, -19946
1295 ; P9LE-NEXT: ori r5, r5, 17097
1296 ; P9LE-NEXT: extsw r4, r4
1297 ; P9LE-NEXT: mulld r5, r4, r5
1298 ; P9LE-NEXT: rldicl r5, r5, 32, 32
1299 ; P9LE-NEXT: add r4, r5, r4
1300 ; P9LE-NEXT: srwi r5, r4, 31
1301 ; P9LE-NEXT: srawi r4, r4, 4
1302 ; P9LE-NEXT: add r4, r4, r5
1303 ; P9LE-NEXT: lis r5, 24749
1304 ; P9LE-NEXT: mulli r4, r4, 23
1305 ; P9LE-NEXT: subf r3, r4, r3
1306 ; P9LE-NEXT: mtvsrd f0, r3
1307 ; P9LE-NEXT: li r3, 6
1308 ; P9LE-NEXT: vextuhrx r3, r3, v2
1309 ; P9LE-NEXT: extsh r4, r3
1310 ; P9LE-NEXT: extsw r4, r4
1311 ; P9LE-NEXT: ori r5, r5, 47143
1312 ; P9LE-NEXT: mulld r4, r4, r5
1313 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1314 ; P9LE-NEXT: rldicl r4, r4, 32, 32
1315 ; P9LE-NEXT: srawi r4, r4, 11
1316 ; P9LE-NEXT: add r4, r4, r5
1317 ; P9LE-NEXT: mulli r4, r4, 5423
1318 ; P9LE-NEXT: subf r3, r4, r3
1319 ; P9LE-NEXT: xxswapd v3, vs0
1320 ; P9LE-NEXT: mtvsrd f0, r3
1321 ; P9LE-NEXT: li r3, 2
1322 ; P9LE-NEXT: vextuhrx r3, r3, v2
1323 ; P9LE-NEXT: extsh r4, r3
1324 ; P9LE-NEXT: srawi r4, r4, 15
1325 ; P9LE-NEXT: addze r4, r4
1326 ; P9LE-NEXT: slwi r4, r4, 15
1327 ; P9LE-NEXT: subf r3, r4, r3
1328 ; P9LE-NEXT: xxswapd v4, vs0
1329 ; P9LE-NEXT: mtvsrd f0, r3
1330 ; P9LE-NEXT: xxswapd v2, vs0
1331 ; P9LE-NEXT: vmrglh v3, v4, v3
1332 ; P9LE-NEXT: xxlxor v4, v4, v4
1333 ; P9LE-NEXT: vmrglh v2, v2, v4
1334 ; P9LE-NEXT: vmrglw v2, v3, v2
1335 ; P9LE-NEXT: blr
1336 ;
1337 ; P9BE-LABEL: dont_fold_urem_i16_smax:
1338 ; P9BE: # %bb.0:
1339 ; P9BE-NEXT: li r3, 4
1340 ; P9BE-NEXT: vextuhlx r3, r3, v2
1341 ; P9BE-NEXT: extsh r3, r3
1342 ; P9BE-NEXT: lis r4, -19946
1343 ; P9BE-NEXT: ori r4, r4, 17097
1344 ; P9BE-NEXT: extsw r3, r3
1345 ; P9BE-NEXT: mulld r4, r3, r4
1346 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1347 ; P9BE-NEXT: add r4, r4, r3
1348 ; P9BE-NEXT: srwi r5, r4, 31
1349 ; P9BE-NEXT: srawi r4, r4, 4
1350 ; P9BE-NEXT: add r4, r4, r5
1351 ; P9BE-NEXT: mulli r4, r4, 23
1352 ; P9BE-NEXT: subf r3, r4, r3
1353 ; P9BE-NEXT: lis r4, 24749
1354 ; P9BE-NEXT: sldi r3, r3, 48
1355 ; P9BE-NEXT: mtvsrd v3, r3
1356 ; P9BE-NEXT: li r3, 6
1357 ; P9BE-NEXT: vextuhlx r3, r3, v2
1358 ; P9BE-NEXT: extsh r3, r3
1359 ; P9BE-NEXT: extsw r3, r3
1360 ; P9BE-NEXT: ori r4, r4, 47143
1361 ; P9BE-NEXT: mulld r4, r3, r4
1362 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1363 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1364 ; P9BE-NEXT: srawi r4, r4, 11
1365 ; P9BE-NEXT: add r4, r4, r5
1366 ; P9BE-NEXT: mulli r4, r4, 5423
1367 ; P9BE-NEXT: subf r3, r4, r3
1368 ; P9BE-NEXT: sldi r3, r3, 48
1369 ; P9BE-NEXT: mtvsrd v4, r3
1370 ; P9BE-NEXT: li r3, 2
1371 ; P9BE-NEXT: vextuhlx r3, r3, v2
1372 ; P9BE-NEXT: extsh r3, r3
1373 ; P9BE-NEXT: srawi r4, r3, 15
1374 ; P9BE-NEXT: addze r4, r4
1375 ; P9BE-NEXT: slwi r4, r4, 15
1376 ; P9BE-NEXT: subf r3, r4, r3
1377 ; P9BE-NEXT: sldi r3, r3, 48
1378 ; P9BE-NEXT: mtvsrd v2, r3
1379 ; P9BE-NEXT: li r3, 0
1380 ; P9BE-NEXT: sldi r3, r3, 48
1381 ; P9BE-NEXT: vmrghh v3, v3, v4
1382 ; P9BE-NEXT: mtvsrd v4, r3
1383 ; P9BE-NEXT: vmrghh v2, v4, v2
1384 ; P9BE-NEXT: vmrghw v2, v2, v3
1385 ; P9BE-NEXT: blr
1386 ;
1387 ; P8LE-LABEL: dont_fold_urem_i16_smax:
1388 ; P8LE: # %bb.0:
1389 ; P8LE-NEXT: xxswapd vs0, v2
1390 ; P8LE-NEXT: lis r6, 24749
1391 ; P8LE-NEXT: lis r7, -19946
1392 ; P8LE-NEXT: xxlxor v5, v5, v5
1393 ; P8LE-NEXT: ori r6, r6, 47143
1394 ; P8LE-NEXT: ori r7, r7, 17097
1395 ; P8LE-NEXT: mfvsrd r3, f0
1396 ; P8LE-NEXT: rldicl r4, r3, 16, 48
1397 ; P8LE-NEXT: rldicl r5, r3, 32, 48
1398 ; P8LE-NEXT: extsh r8, r4
1399 ; P8LE-NEXT: extsh r9, r5
1400 ; P8LE-NEXT: extsw r8, r8
1401 ; P8LE-NEXT: extsw r9, r9
1402 ; P8LE-NEXT: mulld r6, r8, r6
1403 ; P8LE-NEXT: mulld r7, r9, r7
1404 ; P8LE-NEXT: rldicl r3, r3, 48, 48
1405 ; P8LE-NEXT: rldicl r8, r6, 32, 32
1406 ; P8LE-NEXT: rldicl r7, r7, 32, 32
1407 ; P8LE-NEXT: rldicl r6, r6, 1, 63
1408 ; P8LE-NEXT: srawi r8, r8, 11
1409 ; P8LE-NEXT: add r7, r7, r9
1410 ; P8LE-NEXT: add r6, r8, r6
1411 ; P8LE-NEXT: srwi r8, r7, 31
1412 ; P8LE-NEXT: srawi r7, r7, 4
1413 ; P8LE-NEXT: mulli r6, r6, 5423
1414 ; P8LE-NEXT: add r7, r7, r8
1415 ; P8LE-NEXT: extsh r8, r3
1416 ; P8LE-NEXT: mulli r7, r7, 23
1417 ; P8LE-NEXT: srawi r8, r8, 15
1418 ; P8LE-NEXT: subf r4, r6, r4
1419 ; P8LE-NEXT: addze r6, r8
1420 ; P8LE-NEXT: mtvsrd f0, r4
1421 ; P8LE-NEXT: slwi r4, r6, 15
1422 ; P8LE-NEXT: subf r5, r7, r5
1423 ; P8LE-NEXT: subf r3, r4, r3
1424 ; P8LE-NEXT: mtvsrd f1, r5
1425 ; P8LE-NEXT: xxswapd v2, vs0
1426 ; P8LE-NEXT: mtvsrd f2, r3
1427 ; P8LE-NEXT: xxswapd v3, vs1
1428 ; P8LE-NEXT: xxswapd v4, vs2
1429 ; P8LE-NEXT: vmrglh v2, v2, v3
1430 ; P8LE-NEXT: vmrglh v3, v4, v5
1431 ; P8LE-NEXT: vmrglw v2, v2, v3
1432 ; P8LE-NEXT: blr
1433 ;
1434 ; P8BE-LABEL: dont_fold_urem_i16_smax:
1435 ; P8BE: # %bb.0:
1436 ; P8BE-NEXT: mfvsrd r4, v2
1437 ; P8BE-NEXT: lis r3, 24749
1438 ; P8BE-NEXT: lis r7, -19946
1439 ; P8BE-NEXT: ori r3, r3, 47143
1440 ; P8BE-NEXT: ori r7, r7, 17097
1441 ; P8BE-NEXT: clrldi r5, r4, 48
1442 ; P8BE-NEXT: rldicl r6, r4, 48, 48
1443 ; P8BE-NEXT: extsh r5, r5
1444 ; P8BE-NEXT: extsh r6, r6
1445 ; P8BE-NEXT: extsw r5, r5
1446 ; P8BE-NEXT: extsw r6, r6
1447 ; P8BE-NEXT: mulld r3, r5, r3
1448 ; P8BE-NEXT: mulld r7, r6, r7
1449 ; P8BE-NEXT: rldicl r4, r4, 32, 48
1450 ; P8BE-NEXT: extsh r4, r4
1451 ; P8BE-NEXT: rldicl r8, r3, 1, 63
1452 ; P8BE-NEXT: rldicl r3, r3, 32, 32
1453 ; P8BE-NEXT: rldicl r7, r7, 32, 32
1454 ; P8BE-NEXT: srawi r3, r3, 11
1455 ; P8BE-NEXT: add r7, r7, r6
1456 ; P8BE-NEXT: add r3, r3, r8
1457 ; P8BE-NEXT: srwi r8, r7, 31
1458 ; P8BE-NEXT: srawi r7, r7, 4
1459 ; P8BE-NEXT: mulli r3, r3, 5423
1460 ; P8BE-NEXT: add r7, r7, r8
1461 ; P8BE-NEXT: li r8, 0
1462 ; P8BE-NEXT: mulli r7, r7, 23
1463 ; P8BE-NEXT: srawi r9, r4, 15
1464 ; P8BE-NEXT: subf r3, r3, r5
1465 ; P8BE-NEXT: sldi r5, r8, 48
1466 ; P8BE-NEXT: addze r8, r9
1467 ; P8BE-NEXT: mtvsrd v2, r5
1468 ; P8BE-NEXT: subf r5, r7, r6
1469 ; P8BE-NEXT: slwi r6, r8, 15
1470 ; P8BE-NEXT: sldi r3, r3, 48
1471 ; P8BE-NEXT: subf r4, r6, r4
1472 ; P8BE-NEXT: mtvsrd v3, r3
1473 ; P8BE-NEXT: sldi r3, r5, 48
1474 ; P8BE-NEXT: sldi r4, r4, 48
1475 ; P8BE-NEXT: mtvsrd v4, r3
1476 ; P8BE-NEXT: mtvsrd v5, r4
1477 ; P8BE-NEXT: vmrghh v3, v4, v3
1478 ; P8BE-NEXT: vmrghh v2, v2, v5
1479 ; P8BE-NEXT: vmrghw v2, v2, v3
1480 ; P8BE-NEXT: blr
1481 %1 = srem <4 x i16> %x,
1482 ret <4 x i16> %1
1483 }
1484
1485 ; Don't fold i64 srem.
1486 define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
1487 ; P9LE-LABEL: dont_fold_srem_i64:
1488 ; P9LE: # %bb.0:
1489 ; P9LE-NEXT: lis r4, 24749
1490 ; P9LE-NEXT: ori r4, r4, 47142
1491 ; P9LE-NEXT: sldi r4, r4, 32
1492 ; P9LE-NEXT: oris r4, r4, 58853
1493 ; P9LE-NEXT: mfvsrd r3, v3
1494 ; P9LE-NEXT: ori r4, r4, 6055
1495 ; P9LE-NEXT: mulhd r4, r3, r4
1496 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1497 ; P9LE-NEXT: sradi r4, r4, 11
1498 ; P9LE-NEXT: add r4, r4, r5
1499 ; P9LE-NEXT: lis r5, -19946
1500 ; P9LE-NEXT: mulli r4, r4, 5423
1501 ; P9LE-NEXT: ori r5, r5, 17096
1502 ; P9LE-NEXT: sldi r5, r5, 32
1503 ; P9LE-NEXT: oris r5, r5, 22795
1504 ; P9LE-NEXT: sub r3, r3, r4
1505 ; P9LE-NEXT: mfvsrld r4, v3
1506 ; P9LE-NEXT: ori r5, r5, 8549
1507 ; P9LE-NEXT: mulhd r5, r4, r5
1508 ; P9LE-NEXT: add r5, r5, r4
1509 ; P9LE-NEXT: rldicl r6, r5, 1, 63
1510 ; P9LE-NEXT: sradi r5, r5, 4
1511 ; P9LE-NEXT: add r5, r5, r6
1512 ; P9LE-NEXT: mulli r5, r5, 23
1513 ; P9LE-NEXT: sub r4, r4, r5
1514 ; P9LE-NEXT: mtvsrdd v3, r3, r4
1515 ; P9LE-NEXT: lis r4, 25653
1516 ; P9LE-NEXT: ori r4, r4, 15432
1517 ; P9LE-NEXT: sldi r4, r4, 32
1518 ; P9LE-NEXT: oris r4, r4, 1603
1519 ; P9LE-NEXT: mfvsrd r3, v2
1520 ; P9LE-NEXT: ori r4, r4, 21445
1521 ; P9LE-NEXT: mulhd r4, r3, r4
1522 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1523 ; P9LE-NEXT: sradi r4, r4, 8
1524 ; P9LE-NEXT: add r4, r4, r5
1525 ; P9LE-NEXT: mulli r4, r4, 654
1526 ; P9LE-NEXT: sub r3, r3, r4
1527 ; P9LE-NEXT: li r4, 0
1528 ; P9LE-NEXT: mtvsrdd v2, r3, r4
1529 ; P9LE-NEXT: blr
1530 ;
1531 ; P9BE-LABEL: dont_fold_srem_i64:
1532 ; P9BE: # %bb.0:
1533 ; P9BE-NEXT: lis r4, 24749
1534 ; P9BE-NEXT: ori r4, r4, 47142
1535 ; P9BE-NEXT: sldi r4, r4, 32
1536 ; P9BE-NEXT: oris r4, r4, 58853
1537 ; P9BE-NEXT: mfvsrld r3, v3
1538 ; P9BE-NEXT: ori r4, r4, 6055
1539 ; P9BE-NEXT: mulhd r4, r3, r4
1540 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1541 ; P9BE-NEXT: sradi r4, r4, 11
1542 ; P9BE-NEXT: add r4, r4, r5
1543 ; P9BE-NEXT: lis r5, -19946
1544 ; P9BE-NEXT: ori r5, r5, 17096
1545 ; P9BE-NEXT: mulli r4, r4, 5423
1546 ; P9BE-NEXT: sldi r5, r5, 32
1547 ; P9BE-NEXT: oris r5, r5, 22795
1548 ; P9BE-NEXT: sub r3, r3, r4
1549 ; P9BE-NEXT: mfvsrd r4, v3
1550 ; P9BE-NEXT: ori r5, r5, 8549
1551 ; P9BE-NEXT: mulhd r5, r4, r5
1552 ; P9BE-NEXT: add r5, r5, r4
1553 ; P9BE-NEXT: rldicl r6, r5, 1, 63
1554 ; P9BE-NEXT: sradi r5, r5, 4
1555 ; P9BE-NEXT: add r5, r5, r6
1556 ; P9BE-NEXT: mulli r5, r5, 23
1557 ; P9BE-NEXT: sub r4, r4, r5
1558 ; P9BE-NEXT: mtvsrdd v3, r4, r3
1559 ; P9BE-NEXT: lis r4, 25653
1560 ; P9BE-NEXT: ori r4, r4, 15432
1561 ; P9BE-NEXT: sldi r4, r4, 32
1562 ; P9BE-NEXT: oris r4, r4, 1603
1563 ; P9BE-NEXT: mfvsrld r3, v2
1564 ; P9BE-NEXT: ori r4, r4, 21445
1565 ; P9BE-NEXT: mulhd r4, r3, r4
1566 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1567 ; P9BE-NEXT: sradi r4, r4, 8
1568 ; P9BE-NEXT: add r4, r4, r5
1569 ; P9BE-NEXT: mulli r4, r4, 654
1570 ; P9BE-NEXT: sub r3, r3, r4
1571 ; P9BE-NEXT: mtvsrdd v2, 0, r3
1572 ; P9BE-NEXT: blr
1573 ;
1574 ; P8LE-LABEL: dont_fold_srem_i64:
1575 ; P8LE: # %bb.0:
1576 ; P8LE-NEXT: lis r3, 24749
1577 ; P8LE-NEXT: lis r4, -19946
1578 ; P8LE-NEXT: lis r5, 25653
1579 ; P8LE-NEXT: xxswapd vs0, v3
1580 ; P8LE-NEXT: mfvsrd r6, v3
1581 ; P8LE-NEXT: ori r3, r3, 47142
1582 ; P8LE-NEXT: ori r4, r4, 17096
1583 ; P8LE-NEXT: ori r5, r5, 15432
1584 ; P8LE-NEXT: mfvsrd r7, v2
1585 ; P8LE-NEXT: sldi r3, r3, 32
1586 ; P8LE-NEXT: sldi r4, r4, 32
1587 ; P8LE-NEXT: sldi r5, r5, 32
1588 ; P8LE-NEXT: oris r3, r3, 58853
1589 ; P8LE-NEXT: oris r4, r4, 22795
1590 ; P8LE-NEXT: mfvsrd r8, f0
1591 ; P8LE-NEXT: oris r5, r5, 1603
1592 ; P8LE-NEXT: ori r3, r3, 6055
1593 ; P8LE-NEXT: ori r4, r4, 8549
1594 ; P8LE-NEXT: ori r5, r5, 21445
1595 ; P8LE-NEXT: mulhd r3, r6, r3
1596 ; P8LE-NEXT: mulhd r5, r7, r5
1597 ; P8LE-NEXT: mulhd r4, r8, r4
1598 ; P8LE-NEXT: rldicl r9, r3, 1, 63
1599 ; P8LE-NEXT: sradi r3, r3, 11
1600 ; P8LE-NEXT: add r3, r3, r9
1601 ; P8LE-NEXT: rldicl r9, r5, 1, 63
1602 ; P8LE-NEXT: add r4, r4, r8
1603 ; P8LE-NEXT: sradi r5, r5, 8
1604 ; P8LE-NEXT: mulli r3, r3, 5423
1605 ; P8LE-NEXT: add r5, r5, r9
1606 ; P8LE-NEXT: rldicl r9, r4, 1, 63
1607 ; P8LE-NEXT: sradi r4, r4, 4
1608 ; P8LE-NEXT: mulli r5, r5, 654
1609 ; P8LE-NEXT: add r4, r4, r9
1610 ; P8LE-NEXT: mulli r4, r4, 23
1611 ; P8LE-NEXT: sub r3, r6, r3
1612 ; P8LE-NEXT: mtvsrd f0, r3
1613 ; P8LE-NEXT: sub r5, r7, r5
1614 ; P8LE-NEXT: mtvsrd f1, r5
1615 ; P8LE-NEXT: sub r3, r8, r4
1616 ; P8LE-NEXT: li r4, 0
1617 ; P8LE-NEXT: mtvsrd f2, r3
1618 ; P8LE-NEXT: mtvsrd f3, r4
1619 ; P8LE-NEXT: xxmrghd v3, vs0, vs2
1620 ; P8LE-NEXT: xxmrghd v2, vs1, vs3
1621 ; P8LE-NEXT: blr
1622 ;
1623 ; P8BE-LABEL: dont_fold_srem_i64:
1624 ; P8BE: # %bb.0:
1625 ; P8BE-NEXT: lis r4, -19946
1626 ; P8BE-NEXT: lis r3, 24749
1627 ; P8BE-NEXT: xxswapd vs0, v3
1628 ; P8BE-NEXT: lis r5, 25653
1629 ; P8BE-NEXT: xxswapd vs1, v2
1630 ; P8BE-NEXT: ori r4, r4, 17096
1631 ; P8BE-NEXT: ori r3, r3, 47142
1632 ; P8BE-NEXT: ori r5, r5, 15432
1633 ; P8BE-NEXT: mfvsrd r6, v3
1634 ; P8BE-NEXT: sldi r4, r4, 32
1635 ; P8BE-NEXT: sldi r3, r3, 32
1636 ; P8BE-NEXT: oris r4, r4, 22795
1637 ; P8BE-NEXT: sldi r5, r5, 32
1638 ; P8BE-NEXT: oris r3, r3, 58853
1639 ; P8BE-NEXT: mfvsrd r7, f0
1640 ; P8BE-NEXT: ori r4, r4, 8549
1641 ; P8BE-NEXT: ori r3, r3, 6055
1642 ; P8BE-NEXT: oris r5, r5, 1603
1643 ; P8BE-NEXT: mfvsrd r8, f1
1644 ; P8BE-NEXT: mulhd r4, r6, r4
1645 ; P8BE-NEXT: mulhd r3, r7, r3
1646 ; P8BE-NEXT: ori r5, r5, 21445
1647 ; P8BE-NEXT: mulhd r5, r8, r5
1648 ; P8BE-NEXT: add r4, r4, r6
1649 ; P8BE-NEXT: rldicl r9, r3, 1, 63
1650 ; P8BE-NEXT: sradi r3, r3, 11
1651 ; P8BE-NEXT: rldicl r10, r4, 1, 63
1652 ; P8BE-NEXT: sradi r4, r4, 4
1653 ; P8BE-NEXT: add r3, r3, r9
1654 ; P8BE-NEXT: rldicl r9, r5, 1, 63
1655 ; P8BE-NEXT: add r4, r4, r10
1656 ; P8BE-NEXT: sradi r5, r5, 8
1657 ; P8BE-NEXT: mulli r3, r3, 5423
1658 ; P8BE-NEXT: add r5, r5, r9
1659 ; P8BE-NEXT: mulli r4, r4, 23
1660 ; P8BE-NEXT: mulli r5, r5, 654
1661 ; P8BE-NEXT: sub r3, r7, r3
1662 ; P8BE-NEXT: sub r4, r6, r4
1663 ; P8BE-NEXT: mtvsrd f0, r3
1664 ; P8BE-NEXT: sub r3, r8, r5
1665 ; P8BE-NEXT: mtvsrd f1, r4
1666 ; P8BE-NEXT: li r4, 0
1667 ; P8BE-NEXT: mtvsrd f2, r3
1668 ; P8BE-NEXT: mtvsrd f3, r4
1669 ; P8BE-NEXT: xxmrghd v3, vs1, vs0
1670 ; P8BE-NEXT: xxmrghd v2, vs3, vs2
1671 ; P8BE-NEXT: blr
1672 %1 = srem <4 x i64> %x,
1673 ret <4 x i64> %1
1674 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc64 < %s | FileCheck -check-prefixes=CHECK,CHECK64 %s
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc < %s | FileCheck -check-prefixes=CHECK,CHECK32 %s
3
4 define i32 @fold_urem_positive_odd(i32 %x) {
5 ; CHECK-LABEL: fold_urem_positive_odd:
6 ; CHECK: # %bb.0:
7 ; CHECK-NEXT: lis 4, 22765
8 ; CHECK-NEXT: ori 4, 4, 8969
9 ; CHECK-NEXT: mulhwu 4, 3, 4
10 ; CHECK-NEXT: subf 5, 4, 3
11 ; CHECK-NEXT: srwi 5, 5, 1
12 ; CHECK-NEXT: add 4, 5, 4
13 ; CHECK-NEXT: srwi 4, 4, 6
14 ; CHECK-NEXT: mulli 4, 4, 95
15 ; CHECK-NEXT: subf 3, 4, 3
16 ; CHECK-NEXT: blr
17 %1 = urem i32 %x, 95
18 ret i32 %1
19 }
20
21
22 define i32 @fold_urem_positive_even(i32 %x) {
23 ; CHECK-LABEL: fold_urem_positive_even:
24 ; CHECK: # %bb.0:
25 ; CHECK-NEXT: lis 4, -2226
26 ; CHECK-NEXT: ori 4, 4, 16323
27 ; CHECK-NEXT: mulhwu 4, 3, 4
28 ; CHECK-NEXT: srwi 4, 4, 10
29 ; CHECK-NEXT: mulli 4, 4, 1060
30 ; CHECK-NEXT: subf 3, 4, 3
31 ; CHECK-NEXT: blr
32 %1 = urem i32 %x, 1060
33 ret i32 %1
34 }
35
36
37 ; Don't fold if we can combine urem with udiv.
38 define i32 @combine_urem_udiv(i32 %x) {
39 ; CHECK-LABEL: combine_urem_udiv:
40 ; CHECK: # %bb.0:
41 ; CHECK-NEXT: lis 4, 22765
42 ; CHECK-NEXT: ori 4, 4, 8969
43 ; CHECK-NEXT: mulhwu 4, 3, 4
44 ; CHECK-NEXT: subf 5, 4, 3
45 ; CHECK-NEXT: srwi 5, 5, 1
46 ; CHECK-NEXT: add 4, 5, 4
47 ; CHECK-NEXT: srwi 4, 4, 6
48 ; CHECK-NEXT: mulli 5, 4, 95
49 ; CHECK-NEXT: subf 3, 5, 3
50 ; CHECK-NEXT: add 3, 3, 4
51 ; CHECK-NEXT: blr
52 %1 = urem i32 %x, 95
53 %2 = udiv i32 %x, 95
54 %3 = add i32 %1, %2
55 ret i32 %3
56 }
57
58 ; Don't fold for divisors that are a power of two.
59 define i32 @dont_fold_urem_power_of_two(i32 %x) {
60 ; CHECK-LABEL: dont_fold_urem_power_of_two:
61 ; CHECK: # %bb.0:
62 ; CHECK-NEXT: clrlwi 3, 3, 26
63 ; CHECK-NEXT: blr
64 %1 = urem i32 %x, 64
65 ret i32 %1
66 }
67
68 ; Don't fold if the divisor is one.
69 define i32 @dont_fold_urem_one(i32 %x) {
70 ; CHECK-LABEL: dont_fold_urem_one:
71 ; CHECK: # %bb.0:
72 ; CHECK-NEXT: li 3, 0
73 ; CHECK-NEXT: blr
74 %1 = urem i32 %x, 1
75 ret i32 %1
76 }
77
78 ; Don't fold if the divisor is 2^32.
79 define i32 @dont_fold_urem_i32_umax(i32 %x) {
80 ; CHECK-LABEL: dont_fold_urem_i32_umax:
81 ; CHECK: # %bb.0:
82 ; CHECK-NEXT: blr
83 %1 = urem i32 %x, 4294967296
84 ret i32 %1
85 }
86
87 ; Don't fold i64 urem
88 define i64 @dont_fold_urem_i64(i64 %x) {
89 ; CHECK-LABEL: dont_fold_urem_i64:
90 ; CHECK: # %bb.0:
91 ; CHECK-NEXT: mflr 0
92 ; CHECK-NEXT: stw 0, 4(1)
93 ; CHECK-NEXT: stwu 1, -16(1)
94 ; CHECK-NEXT: .cfi_def_cfa_offset 16
95 ; CHECK-NEXT: .cfi_offset lr, 4
96 ; CHECK-NEXT: li 5, 0
97 ; CHECK-NEXT: li 6, 98
98 ; CHECK-NEXT: bl __umoddi3@PLT
99 ; CHECK-NEXT: lwz 0, 20(1)
100 ; CHECK-NEXT: addi 1, 1, 16
101 ; CHECK-NEXT: mtlr 0
102 ; CHECK-NEXT: blr
103 %1 = urem i64 %x, 98
104 ret i64 %1
105 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
2 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE
3 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
4 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE
5 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
6 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE
7 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
8 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE
9
10 define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
11 ; P9LE-LABEL: fold_urem_vec_1:
12 ; P9LE: # %bb.0:
13 ; P9LE-NEXT: li r3, 4
14 ; P9LE-NEXT: vextuhrx r3, r3, v2
15 ; P9LE-NEXT: lis r5, 21399
16 ; P9LE-NEXT: ori r5, r5, 33437
17 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31
18 ; P9LE-NEXT: mulld r4, r4, r5
19 ; P9LE-NEXT: lis r5, 16727
20 ; P9LE-NEXT: ori r5, r5, 2287
21 ; P9LE-NEXT: rldicl r4, r4, 27, 37
22 ; P9LE-NEXT: mulli r4, r4, 98
23 ; P9LE-NEXT: subf r3, r4, r3
24 ; P9LE-NEXT: mtvsrd f0, r3
25 ; P9LE-NEXT: li r3, 6
26 ; P9LE-NEXT: vextuhrx r3, r3, v2
27 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31
28 ; P9LE-NEXT: mulld r4, r4, r5
29 ; P9LE-NEXT: lis r5, 8456
30 ; P9LE-NEXT: ori r5, r5, 16913
31 ; P9LE-NEXT: rldicl r4, r4, 24, 40
32 ; P9LE-NEXT: mulli r4, r4, 1003
33 ; P9LE-NEXT: subf r3, r4, r3
34 ; P9LE-NEXT: xxswapd v3, vs0
35 ; P9LE-NEXT: mtvsrd f0, r3
36 ; P9LE-NEXT: li r3, 2
37 ; P9LE-NEXT: vextuhrx r3, r3, v2
38 ; P9LE-NEXT: rlwinm r4, r3, 30, 18, 31
39 ; P9LE-NEXT: mulld r4, r4, r5
40 ; P9LE-NEXT: rldicl r4, r4, 30, 34
41 ; P9LE-NEXT: mulli r4, r4, 124
42 ; P9LE-NEXT: subf r3, r4, r3
43 ; P9LE-NEXT: xxswapd v4, vs0
44 ; P9LE-NEXT: mtvsrd f0, r3
45 ; P9LE-NEXT: li r3, 0
46 ; P9LE-NEXT: vextuhrx r3, r3, v2
47 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31
48 ; P9LE-NEXT: lis r6, 22765
49 ; P9LE-NEXT: ori r6, r6, 8969
50 ; P9LE-NEXT: vmrglh v3, v4, v3
51 ; P9LE-NEXT: xxswapd v4, vs0
52 ; P9LE-NEXT: clrldi r5, r4, 32
53 ; P9LE-NEXT: mulld r5, r5, r6
54 ; P9LE-NEXT: rldicl r5, r5, 32, 32
55 ; P9LE-NEXT: subf r4, r5, r4
56 ; P9LE-NEXT: srwi r4, r4, 1
57 ; P9LE-NEXT: add r4, r4, r5
58 ; P9LE-NEXT: srwi r4, r4, 6
59 ; P9LE-NEXT: mulli r4, r4, 95
60 ; P9LE-NEXT: subf r3, r4, r3
61 ; P9LE-NEXT: mtvsrd f0, r3
62 ; P9LE-NEXT: xxswapd v2, vs0
63 ; P9LE-NEXT: vmrglh v2, v4, v2
64 ; P9LE-NEXT: vmrglw v2, v3, v2
65 ; P9LE-NEXT: blr
66 ;
67 ; P9BE-LABEL: fold_urem_vec_1:
68 ; P9BE: # %bb.0:
69 ; P9BE-NEXT: li r3, 6
70 ; P9BE-NEXT: vextuhlx r3, r3, v2
71 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
72 ; P9BE-NEXT: lis r5, 16727
73 ; P9BE-NEXT: ori r5, r5, 2287
74 ; P9BE-NEXT: clrldi r4, r3, 32
75 ; P9BE-NEXT: mulld r4, r4, r5
76 ; P9BE-NEXT: lis r5, 21399
77 ; P9BE-NEXT: ori r5, r5, 33437
78 ; P9BE-NEXT: rldicl r4, r4, 24, 40
79 ; P9BE-NEXT: mulli r4, r4, 1003
80 ; P9BE-NEXT: subf r3, r4, r3
81 ; P9BE-NEXT: sldi r3, r3, 48
82 ; P9BE-NEXT: mtvsrd v3, r3
83 ; P9BE-NEXT: li r3, 4
84 ; P9BE-NEXT: vextuhlx r3, r3, v2
85 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
86 ; P9BE-NEXT: clrldi r4, r3, 32
87 ; P9BE-NEXT: mulld r4, r4, r5
88 ; P9BE-NEXT: lis r5, 8456
89 ; P9BE-NEXT: ori r5, r5, 16913
90 ; P9BE-NEXT: rldicl r4, r4, 27, 37
91 ; P9BE-NEXT: mulli r4, r4, 98
92 ; P9BE-NEXT: subf r3, r4, r3
93 ; P9BE-NEXT: sldi r3, r3, 48
94 ; P9BE-NEXT: mtvsrd v4, r3
95 ; P9BE-NEXT: li r3, 2
96 ; P9BE-NEXT: vextuhlx r3, r3, v2
97 ; P9BE-NEXT: clrlwi r4, r3, 16
98 ; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31
99 ; P9BE-NEXT: mulld r3, r3, r5
100 ; P9BE-NEXT: lis r5, 22765
101 ; P9BE-NEXT: ori r5, r5, 8969
102 ; P9BE-NEXT: rldicl r3, r3, 30, 34
103 ; P9BE-NEXT: mulli r3, r3, 124
104 ; P9BE-NEXT: subf r3, r3, r4
105 ; P9BE-NEXT: sldi r3, r3, 48
106 ; P9BE-NEXT: vmrghh v3, v4, v3
107 ; P9BE-NEXT: mtvsrd v4, r3
108 ; P9BE-NEXT: li r3, 0
109 ; P9BE-NEXT: vextuhlx r3, r3, v2
110 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
111 ; P9BE-NEXT: clrldi r4, r3, 32
112 ; P9BE-NEXT: mulld r4, r4, r5
113 ; P9BE-NEXT: rldicl r4, r4, 32, 32
114 ; P9BE-NEXT: subf r5, r4, r3
115 ; P9BE-NEXT: srwi r5, r5, 1
116 ; P9BE-NEXT: add r4, r5, r4
117 ; P9BE-NEXT: srwi r4, r4, 6
118 ; P9BE-NEXT: mulli r4, r4, 95
119 ; P9BE-NEXT: subf r3, r4, r3
120 ; P9BE-NEXT: sldi r3, r3, 48
121 ; P9BE-NEXT: mtvsrd v2, r3
122 ; P9BE-NEXT: vmrghh v2, v2, v4
123 ; P9BE-NEXT: vmrghw v2, v2, v3
124 ; P9BE-NEXT: blr
125 ;
126 ; P8LE-LABEL: fold_urem_vec_1:
127 ; P8LE: # %bb.0:
128 ; P8LE-NEXT: xxswapd vs0, v2
129 ; P8LE-NEXT: lis r3, 22765
130 ; P8LE-NEXT: lis r8, 21399
131 ; P8LE-NEXT: ori r3, r3, 8969
132 ; P8LE-NEXT: ori r8, r8, 33437
133 ; P8LE-NEXT: mfvsrd r4, f0
134 ; P8LE-NEXT: clrldi r5, r4, 48
135 ; P8LE-NEXT: rldicl r9, r4, 32, 48
136 ; P8LE-NEXT: rlwinm r6, r5, 0, 16, 31
137 ; P8LE-NEXT: rldicl r10, r4, 16, 48
138 ; P8LE-NEXT: rlwinm r11, r9, 0, 16, 31
139 ; P8LE-NEXT: clrldi r7, r6, 32
140 ; P8LE-NEXT: rlwinm r12, r10, 0, 16, 31
141 ; P8LE-NEXT: mulld r3, r7, r3
142 ; P8LE-NEXT: lis r7, 16727
143 ; P8LE-NEXT: ori r7, r7, 2287
144 ; P8LE-NEXT: mulld r8, r11, r8
145 ; P8LE-NEXT: lis r11, 8456
146 ; P8LE-NEXT: rldicl r4, r4, 48, 48
147 ; P8LE-NEXT: mulld r7, r12, r7
148 ; P8LE-NEXT: ori r11, r11, 16913
149 ; P8LE-NEXT: rlwinm r12, r4, 30, 18, 31
150 ; P8LE-NEXT: rldicl r3, r3, 32, 32
151 ; P8LE-NEXT: mulld r11, r12, r11
152 ; P8LE-NEXT: subf r6, r3, r6
153 ; P8LE-NEXT: rldicl r8, r8, 27, 37
154 ; P8LE-NEXT: srwi r6, r6, 1
155 ; P8LE-NEXT: add r3, r6, r3
156 ; P8LE-NEXT: rldicl r6, r7, 24, 40
157 ; P8LE-NEXT: mulli r7, r8, 98
158 ; P8LE-NEXT: srwi r3, r3, 6
159 ; P8LE-NEXT: rldicl r8, r11, 30, 34
160 ; P8LE-NEXT: mulli r6, r6, 1003
161 ; P8LE-NEXT: mulli r3, r3, 95
162 ; P8LE-NEXT: mulli r8, r8, 124
163 ; P8LE-NEXT: subf r7, r7, r9
164 ; P8LE-NEXT: subf r6, r6, r10
165 ; P8LE-NEXT: mtvsrd f0, r7
166 ; P8LE-NEXT: subf r3, r3, r5
167 ; P8LE-NEXT: subf r4, r8, r4
168 ; P8LE-NEXT: mtvsrd f1, r6
169 ; P8LE-NEXT: mtvsrd f2, r3
170 ; P8LE-NEXT: xxswapd v2, vs0
171 ; P8LE-NEXT: mtvsrd f3, r4
172 ; P8LE-NEXT: xxswapd v3, vs1
173 ; P8LE-NEXT: xxswapd v4, vs2
174 ; P8LE-NEXT: xxswapd v5, vs3
175 ; P8LE-NEXT: vmrglh v2, v3, v2
176 ; P8LE-NEXT: vmrglh v3, v5, v4
177 ; P8LE-NEXT: vmrglw v2, v2, v3
178 ; P8LE-NEXT: blr
179 ;
180 ; P8BE-LABEL: fold_urem_vec_1:
181 ; P8BE: # %bb.0:
182 ; P8BE-NEXT: mfvsrd r4, v2
183 ; P8BE-NEXT: lis r3, 22765
184 ; P8BE-NEXT: lis r9, 16727
185 ; P8BE-NEXT: ori r3, r3, 8969
186 ; P8BE-NEXT: ori r9, r9, 2287
187 ; P8BE-NEXT: rldicl r5, r4, 16, 48
188 ; P8BE-NEXT: clrldi r6, r4, 48
189 ; P8BE-NEXT: rlwinm r5, r5, 0, 16, 31
190 ; P8BE-NEXT: rldicl r7, r4, 48, 48
191 ; P8BE-NEXT: rlwinm r6, r6, 0, 16, 31
192 ; P8BE-NEXT: clrldi r8, r5, 32
193 ; P8BE-NEXT: rlwinm r7, r7, 0, 16, 31
194 ; P8BE-NEXT: mulld r3, r8, r3
195 ; P8BE-NEXT: lis r8, 21399
196 ; P8BE-NEXT: clrldi r10, r6, 32
197 ; P8BE-NEXT: ori r8, r8, 33437
198 ; P8BE-NEXT: clrldi r11, r7, 32
199 ; P8BE-NEXT: mulld r9, r10, r9
200 ; P8BE-NEXT: lis r10, 8456
201 ; P8BE-NEXT: rldicl r4, r4, 32, 48
202 ; P8BE-NEXT: mulld r8, r11, r8
203 ; P8BE-NEXT: ori r10, r10, 16913
204 ; P8BE-NEXT: rlwinm r11, r4, 30, 18, 31
205 ; P8BE-NEXT: rldicl r3, r3, 32, 32
206 ; P8BE-NEXT: rlwinm r4, r4, 0, 16, 31
207 ; P8BE-NEXT: mulld r10, r11, r10
208 ; P8BE-NEXT: subf r11, r3, r5
209 ; P8BE-NEXT: srwi r11, r11, 1
210 ; P8BE-NEXT: rldicl r9, r9, 24, 40
211 ; P8BE-NEXT: add r3, r11, r3
212 ; P8BE-NEXT: rldicl r8, r8, 27, 37
213 ; P8BE-NEXT: srwi r3, r3, 6
214 ; P8BE-NEXT: mulli r9, r9, 1003
215 ; P8BE-NEXT: rldicl r10, r10, 30, 34
216 ; P8BE-NEXT: mulli r8, r8, 98
217 ; P8BE-NEXT: mulli r3, r3, 95
218 ; P8BE-NEXT: mulli r10, r10, 124
219 ; P8BE-NEXT: subf r6, r9, r6
220 ; P8BE-NEXT: subf r7, r8, r7
221 ; P8BE-NEXT: sldi r6, r6, 48
222 ; P8BE-NEXT: subf r3, r3, r5
223 ; P8BE-NEXT: subf r4, r10, r4
224 ; P8BE-NEXT: mtvsrd v2, r6
225 ; P8BE-NEXT: sldi r5, r7, 48
226 ; P8BE-NEXT: sldi r3, r3, 48
227 ; P8BE-NEXT: sldi r4, r4, 48
228 ; P8BE-NEXT: mtvsrd v3, r5
229 ; P8BE-NEXT: mtvsrd v4, r3
230 ; P8BE-NEXT: mtvsrd v5, r4
231 ; P8BE-NEXT: vmrghh v2, v3, v2
232 ; P8BE-NEXT: vmrghh v3, v4, v5
233 ; P8BE-NEXT: vmrghw v2, v3, v2
234 ; P8BE-NEXT: blr
235 %1 = urem <4 x i16> %x,
236 ret <4 x i16> %1
237 }
238
239 define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
240 ; P9LE-LABEL: fold_urem_vec_2:
241 ; P9LE: # %bb.0:
242 ; P9LE-NEXT: li r3, 0
243 ; P9LE-NEXT: vextuhrx r3, r3, v2
244 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31
245 ; P9LE-NEXT: lis r6, 22765
246 ; P9LE-NEXT: ori r6, r6, 8969
247 ; P9LE-NEXT: clrldi r5, r4, 32
248 ; P9LE-NEXT: mulld r5, r5, r6
249 ; P9LE-NEXT: rldicl r5, r5, 32, 32
250 ; P9LE-NEXT: subf r4, r5, r4
251 ; P9LE-NEXT: srwi r4, r4, 1
252 ; P9LE-NEXT: add r4, r4, r5
253 ; P9LE-NEXT: srwi r4, r4, 6
254 ; P9LE-NEXT: mulli r4, r4, 95
255 ; P9LE-NEXT: subf r3, r4, r3
256 ; P9LE-NEXT: mtvsrd f0, r3
257 ; P9LE-NEXT: li r3, 2
258 ; P9LE-NEXT: vextuhrx r3, r3, v2
259 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31
260 ; P9LE-NEXT: clrldi r5, r4, 32
261 ; P9LE-NEXT: mulld r5, r5, r6
262 ; P9LE-NEXT: rldicl r5, r5, 32, 32
263 ; P9LE-NEXT: subf r4, r5, r4
264 ; P9LE-NEXT: srwi r4, r4, 1
265 ; P9LE-NEXT: add r4, r4, r5
266 ; P9LE-NEXT: srwi r4, r4, 6
267 ; P9LE-NEXT: mulli r4, r4, 95
268 ; P9LE-NEXT: subf r3, r4, r3
269 ; P9LE-NEXT: xxswapd v3, vs0
270 ; P9LE-NEXT: mtvsrd f0, r3
271 ; P9LE-NEXT: li r3, 4
272 ; P9LE-NEXT: vextuhrx r3, r3, v2
273 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31
274 ; P9LE-NEXT: clrldi r5, r4, 32
275 ; P9LE-NEXT: mulld r5, r5, r6
276 ; P9LE-NEXT: rldicl r5, r5, 32, 32
277 ; P9LE-NEXT: subf r4, r5, r4
278 ; P9LE-NEXT: srwi r4, r4, 1
279 ; P9LE-NEXT: add r4, r4, r5
280 ; P9LE-NEXT: srwi r4, r4, 6
281 ; P9LE-NEXT: mulli r4, r4, 95
282 ; P9LE-NEXT: subf r3, r4, r3
283 ; P9LE-NEXT: xxswapd v4, vs0
284 ; P9LE-NEXT: mtvsrd f0, r3
285 ; P9LE-NEXT: li r3, 6
286 ; P9LE-NEXT: vextuhrx r3, r3, v2
287 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31
288 ; P9LE-NEXT: clrldi r5, r4, 32
289 ; P9LE-NEXT: mulld r5, r5, r6
290 ; P9LE-NEXT: rldicl r5, r5, 32, 32
291 ; P9LE-NEXT: subf r4, r5, r4
292 ; P9LE-NEXT: srwi r4, r4, 1
293 ; P9LE-NEXT: add r4, r4, r5
294 ; P9LE-NEXT: srwi r4, r4, 6
295 ; P9LE-NEXT: mulli r4, r4, 95
296 ; P9LE-NEXT: subf r3, r4, r3
297 ; P9LE-NEXT: vmrglh v3, v4, v3
298 ; P9LE-NEXT: xxswapd v4, vs0
299 ; P9LE-NEXT: mtvsrd f0, r3
300 ; P9LE-NEXT: xxswapd v2, vs0
301 ; P9LE-NEXT: vmrglh v2, v2, v4
302 ; P9LE-NEXT: vmrglw v2, v2, v3
303 ; P9LE-NEXT: blr
304 ;
305 ; P9BE-LABEL: fold_urem_vec_2:
306 ; P9BE: # %bb.0:
307 ; P9BE-NEXT: li r3, 6
308 ; P9BE-NEXT: vextuhlx r3, r3, v2
309 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
310 ; P9BE-NEXT: lis r5, 22765
311 ; P9BE-NEXT: ori r5, r5, 8969
312 ; P9BE-NEXT: clrldi r4, r3, 32
313 ; P9BE-NEXT: mulld r4, r4, r5
314 ; P9BE-NEXT: rldicl r4, r4, 32, 32
315 ; P9BE-NEXT: subf r6, r4, r3
316 ; P9BE-NEXT: srwi r6, r6, 1
317 ; P9BE-NEXT: add r4, r6, r4
318 ; P9BE-NEXT: srwi r4, r4, 6
319 ; P9BE-NEXT: mulli r4, r4, 95
320 ; P9BE-NEXT: subf r3, r4, r3
321 ; P9BE-NEXT: sldi r3, r3, 48
322 ; P9BE-NEXT: mtvsrd v3, r3
323 ; P9BE-NEXT: li r3, 4
324 ; P9BE-NEXT: vextuhlx r3, r3, v2
325 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
326 ; P9BE-NEXT: clrldi r4, r3, 32
327 ; P9BE-NEXT: mulld r4, r4, r5
328 ; P9BE-NEXT: rldicl r4, r4, 32, 32
329 ; P9BE-NEXT: subf r6, r4, r3
330 ; P9BE-NEXT: srwi r6, r6, 1
331 ; P9BE-NEXT: add r4, r6, r4
332 ; P9BE-NEXT: srwi r4, r4, 6
333 ; P9BE-NEXT: mulli r4, r4, 95
334 ; P9BE-NEXT: subf r3, r4, r3
335 ; P9BE-NEXT: sldi r3, r3, 48
336 ; P9BE-NEXT: mtvsrd v4, r3
337 ; P9BE-NEXT: li r3, 2
338 ; P9BE-NEXT: vextuhlx r3, r3, v2
339 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
340 ; P9BE-NEXT: clrldi r4, r3, 32
341 ; P9BE-NEXT: mulld r4, r4, r5
342 ; P9BE-NEXT: rldicl r4, r4, 32, 32
343 ; P9BE-NEXT: subf r6, r4, r3
344 ; P9BE-NEXT: srwi r6, r6, 1
345 ; P9BE-NEXT: add r4, r6, r4
346 ; P9BE-NEXT: srwi r4, r4, 6
347 ; P9BE-NEXT: mulli r4, r4, 95
348 ; P9BE-NEXT: subf r3, r4, r3
349 ; P9BE-NEXT: sldi r3, r3, 48
350 ; P9BE-NEXT: vmrghh v3, v4, v3
351 ; P9BE-NEXT: mtvsrd v4, r3
352 ; P9BE-NEXT: li r3, 0
353 ; P9BE-NEXT: vextuhlx r3, r3, v2
354 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
355 ; P9BE-NEXT: clrldi r4, r3, 32
356 ; P9BE-NEXT: mulld r4, r4, r5
357 ; P9BE-NEXT: rldicl r4, r4, 32, 32
358 ; P9BE-NEXT: subf r5, r4, r3
359 ; P9BE-NEXT: srwi r5, r5, 1
360 ; P9BE-NEXT: add r4, r5, r4
361 ; P9BE-NEXT: srwi r4, r4, 6
362 ; P9BE-NEXT: mulli r4, r4, 95
363 ; P9BE-NEXT: subf r3, r4, r3
364 ; P9BE-NEXT: sldi r3, r3, 48
365 ; P9BE-NEXT: mtvsrd v2, r3
366 ; P9BE-NEXT: vmrghh v2, v2, v4
367 ; P9BE-NEXT: vmrghw v2, v2, v3
368 ; P9BE-NEXT: blr
369 ;
370 ; P8LE-LABEL: fold_urem_vec_2:
371 ; P8LE: # %bb.0:
372 ; P8LE-NEXT: xxswapd vs0, v2
373 ; P8LE-NEXT: lis r4, 22765
374 ; P8LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
375 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
376 ; P8LE-NEXT: ori r4, r4, 8969
377 ; P8LE-NEXT: mfvsrd r5, f0
378 ; P8LE-NEXT: clrldi r3, r5, 48
379 ; P8LE-NEXT: rldicl r6, r5, 48, 48
380 ; P8LE-NEXT: rlwinm r8, r3, 0, 16, 31
381 ; P8LE-NEXT: rldicl r7, r5, 32, 48
382 ; P8LE-NEXT: rlwinm r9, r6, 0, 16, 31
383 ; P8LE-NEXT: rldicl r5, r5, 16, 48
384 ; P8LE-NEXT: clrldi r11, r8, 32
385 ; P8LE-NEXT: rlwinm r10, r7, 0, 16, 31
386 ; P8LE-NEXT: rlwinm r12, r5, 0, 16, 31
387 ; P8LE-NEXT: mulld r11, r11, r4
388 ; P8LE-NEXT: clrldi r0, r9, 32
389 ; P8LE-NEXT: clrldi r30, r10, 32
390 ; P8LE-NEXT: clrldi r29, r12, 32
391 ; P8LE-NEXT: mulld r0, r0, r4
392 ; P8LE-NEXT: mulld r30, r30, r4
393 ; P8LE-NEXT: mulld r4, r29, r4
394 ; P8LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
395 ; P8LE-NEXT: rldicl r11, r11, 32, 32
396 ; P8LE-NEXT: subf r8, r11, r8
397 ; P8LE-NEXT: rldicl r0, r0, 32, 32
398 ; P8LE-NEXT: srwi r8, r8, 1
399 ; P8LE-NEXT: rldicl r30, r30, 32, 32
400 ; P8LE-NEXT: rldicl r4, r4, 32, 32
401 ; P8LE-NEXT: subf r9, r0, r9
402 ; P8LE-NEXT: add r8, r8, r11
403 ; P8LE-NEXT: subf r10, r30, r10
404 ; P8LE-NEXT: subf r11, r4, r12
405 ; P8LE-NEXT: srwi r9, r9, 1
406 ; P8LE-NEXT: srwi r8, r8, 6
407 ; P8LE-NEXT: srwi r10, r10, 1
408 ; P8LE-NEXT: srwi r11, r11, 1
409 ; P8LE-NEXT: add r9, r9, r0
410 ; P8LE-NEXT: add r10, r10, r30
411 ; P8LE-NEXT: add r4, r11, r4
412 ; P8LE-NEXT: srwi r9, r9, 6
413 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
414 ; P8LE-NEXT: mulli r8, r8, 95
415 ; P8LE-NEXT: srwi r10, r10, 6
416 ; P8LE-NEXT: srwi r4, r4, 6
417 ; P8LE-NEXT: mulli r9, r9, 95
418 ; P8LE-NEXT: mulli r10, r10, 95
419 ; P8LE-NEXT: mulli r4, r4, 95
420 ; P8LE-NEXT: subf r3, r8, r3
421 ; P8LE-NEXT: subf r6, r9, r6
422 ; P8LE-NEXT: mtvsrd f0, r3
423 ; P8LE-NEXT: subf r3, r10, r7
424 ; P8LE-NEXT: subf r4, r4, r5
425 ; P8LE-NEXT: mtvsrd f1, r6
426 ; P8LE-NEXT: mtvsrd f2, r3
427 ; P8LE-NEXT: xxswapd v2, vs0
428 ; P8LE-NEXT: mtvsrd f3, r4
429 ; P8LE-NEXT: xxswapd v3, vs1
430 ; P8LE-NEXT: xxswapd v4, vs2
431 ; P8LE-NEXT: xxswapd v5, vs3
432 ; P8LE-NEXT: vmrglh v2, v3, v2
433 ; P8LE-NEXT: vmrglh v3, v5, v4
434 ; P8LE-NEXT: vmrglw v2, v3, v2
435 ; P8LE-NEXT: blr
436 ;
437 ; P8BE-LABEL: fold_urem_vec_2:
438 ; P8BE: # %bb.0:
439 ; P8BE-NEXT: mfvsrd r4, v2
440 ; P8BE-NEXT: lis r3, 22765
441 ; P8BE-NEXT: ori r3, r3, 8969
442 ; P8BE-NEXT: clrldi r5, r4, 48
443 ; P8BE-NEXT: rldicl r6, r4, 48, 48
444 ; P8BE-NEXT: rlwinm r5, r5, 0, 16, 31
445 ; P8BE-NEXT: rldicl r7, r4, 32, 48
446 ; P8BE-NEXT: rlwinm r6, r6, 0, 16, 31
447 ; P8BE-NEXT: clrldi r8, r5, 32
448 ; P8BE-NEXT: rldicl r4, r4, 16, 48
449 ; P8BE-NEXT: rlwinm r7, r7, 0, 16, 31
450 ; P8BE-NEXT: clrldi r9, r6, 32
451 ; P8BE-NEXT: mulld r8, r8, r3
452 ; P8BE-NEXT: rlwinm r4, r4, 0, 16, 31
453 ; P8BE-NEXT: clrldi r10, r7, 32
454 ; P8BE-NEXT: mulld r9, r9, r3
455 ; P8BE-NEXT: clrldi r11, r4, 32
456 ; P8BE-NEXT: mulld r10, r10, r3
457 ; P8BE-NEXT: mulld r3, r11, r3
458 ; P8BE-NEXT: rldicl r8, r8, 32, 32
459 ; P8BE-NEXT: rldicl r9, r9, 32, 32
460 ; P8BE-NEXT: subf r11, r8, r5
461 ; P8BE-NEXT: rldicl r10, r10, 32, 32
462 ; P8BE-NEXT: subf r12, r9, r6
463 ; P8BE-NEXT: srwi r11, r11, 1
464 ; P8BE-NEXT: rldicl r3, r3, 32, 32
465 ; P8BE-NEXT: add r8, r11, r8
466 ; P8BE-NEXT: subf r11, r10, r7
467 ; P8BE-NEXT: srwi r12, r12, 1
468 ; P8BE-NEXT: add r9, r12, r9
469 ; P8BE-NEXT: subf r12, r3, r4
470 ; P8BE-NEXT: srwi r11, r11, 1
471 ; P8BE-NEXT: srwi r8, r8, 6
472 ; P8BE-NEXT: add r10, r11, r10
473 ; P8BE-NEXT: srwi r11, r12, 1
474 ; P8BE-NEXT: srwi r9, r9, 6
475 ; P8BE-NEXT: add r3, r11, r3
476 ; P8BE-NEXT: srwi r10, r10, 6
477 ; P8BE-NEXT: srwi r3, r3, 6
478 ; P8BE-NEXT: mulli r8, r8, 95
479 ; P8BE-NEXT: mulli r9, r9, 95
480 ; P8BE-NEXT: mulli r10, r10, 95
481 ; P8BE-NEXT: mulli r3, r3, 95
482 ; P8BE-NEXT: subf r5, r8, r5
483 ; P8BE-NEXT: subf r6, r9, r6
484 ; P8BE-NEXT: subf r7, r10, r7
485 ; P8BE-NEXT: subf r3, r3, r4
486 ; P8BE-NEXT: sldi r5, r5, 48
487 ; P8BE-NEXT: sldi r6, r6, 48
488 ; P8BE-NEXT: sldi r4, r7, 48
489 ; P8BE-NEXT: mtvsrd v2, r5
490 ; P8BE-NEXT: sldi r3, r3, 48
491 ; P8BE-NEXT: mtvsrd v3, r6
492 ; P8BE-NEXT: mtvsrd v4, r4
493 ; P8BE-NEXT: mtvsrd v5, r3
494 ; P8BE-NEXT: vmrghh v2, v3, v2
495 ; P8BE-NEXT: vmrghh v3, v5, v4
496 ; P8BE-NEXT: vmrghw v2, v3, v2
497 ; P8BE-NEXT: blr
498 %1 = urem <4 x i16> %x,
499 ret <4 x i16> %1
500 }
501
502
503 ; Don't fold if we can combine urem with udiv.
504 define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
505 ; P9LE-LABEL: combine_urem_udiv:
506 ; P9LE: # %bb.0:
507 ; P9LE-NEXT: li r3, 0
508 ; P9LE-NEXT: vextuhrx r3, r3, v2
509 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31
510 ; P9LE-NEXT: lis r6, 22765
511 ; P9LE-NEXT: ori r6, r6, 8969
512 ; P9LE-NEXT: clrldi r5, r4, 32
513 ; P9LE-NEXT: mulld r5, r5, r6
514 ; P9LE-NEXT: rldicl r5, r5, 32, 32
515 ; P9LE-NEXT: subf r4, r5, r4
516 ; P9LE-NEXT: srwi r4, r4, 1
517 ; P9LE-NEXT: add r4, r4, r5
518 ; P9LE-NEXT: srwi r4, r4, 6
519 ; P9LE-NEXT: mulli r5, r4, 95
520 ; P9LE-NEXT: subf r3, r5, r3
521 ; P9LE-NEXT: mtvsrd f0, r3
522 ; P9LE-NEXT: li r3, 2
523 ; P9LE-NEXT: vextuhrx r3, r3, v2
524 ; P9LE-NEXT: rlwinm r5, r3, 0, 16, 31
525 ; P9LE-NEXT: clrldi r7, r5, 32
526 ; P9LE-NEXT: mulld r7, r7, r6
527 ; P9LE-NEXT: rldicl r7, r7, 32, 32
528 ; P9LE-NEXT: subf r5, r7, r5
529 ; P9LE-NEXT: srwi r5, r5, 1
530 ; P9LE-NEXT: add r5, r5, r7
531 ; P9LE-NEXT: srwi r5, r5, 6
532 ; P9LE-NEXT: mulli r7, r5, 95
533 ; P9LE-NEXT: subf r3, r7, r3
534 ; P9LE-NEXT: xxswapd v3, vs0
535 ; P9LE-NEXT: mtvsrd f0, r3
536 ; P9LE-NEXT: li r3, 4
537 ; P9LE-NEXT: vextuhrx r3, r3, v2
538 ; P9LE-NEXT: rlwinm r7, r3, 0, 16, 31
539 ; P9LE-NEXT: clrldi r8, r7, 32
540 ; P9LE-NEXT: mulld r8, r8, r6
541 ; P9LE-NEXT: rldicl r8, r8, 32, 32
542 ; P9LE-NEXT: subf r7, r8, r7
543 ; P9LE-NEXT: srwi r7, r7, 1
544 ; P9LE-NEXT: add r7, r7, r8
545 ; P9LE-NEXT: srwi r7, r7, 6
546 ; P9LE-NEXT: mulli r8, r7, 95
547 ; P9LE-NEXT: subf r3, r8, r3
548 ; P9LE-NEXT: xxswapd v4, vs0
549 ; P9LE-NEXT: mtvsrd f0, r3
550 ; P9LE-NEXT: li r3, 6
551 ; P9LE-NEXT: vextuhrx r3, r3, v2
552 ; P9LE-NEXT: rlwinm r8, r3, 0, 16, 31
553 ; P9LE-NEXT: clrldi r9, r8, 32
554 ; P9LE-NEXT: mulld r6, r9, r6
555 ; P9LE-NEXT: rldicl r6, r6, 32, 32
556 ; P9LE-NEXT: subf r8, r6, r8
557 ; P9LE-NEXT: srwi r8, r8, 1
558 ; P9LE-NEXT: add r6, r8, r6
559 ; P9LE-NEXT: srwi r6, r6, 6
560 ; P9LE-NEXT: mulli r8, r6, 95
561 ; P9LE-NEXT: subf r3, r8, r3
562 ; P9LE-NEXT: vmrglh v3, v4, v3
563 ; P9LE-NEXT: xxswapd v4, vs0
564 ; P9LE-NEXT: mtvsrd f0, r3
565 ; P9LE-NEXT: xxswapd v2, vs0
566 ; P9LE-NEXT: mtvsrd f0, r4
567 ; P9LE-NEXT: vmrglh v2, v2, v4
568 ; P9LE-NEXT: vmrglw v2, v2, v3
569 ; P9LE-NEXT: xxswapd v3, vs0
570 ; P9LE-NEXT: mtvsrd f0, r5
571 ; P9LE-NEXT: xxswapd v4, vs0
572 ; P9LE-NEXT: mtvsrd f0, r7
573 ; P9LE-NEXT: vmrglh v3, v4, v3
574 ; P9LE-NEXT: xxswapd v4, vs0
575 ; P9LE-NEXT: mtvsrd f0, r6
576 ; P9LE-NEXT: xxswapd v5, vs0
577 ; P9LE-NEXT: vmrglh v4, v5, v4
578 ; P9LE-NEXT: vmrglw v3, v4, v3
579 ; P9LE-NEXT: vadduhm v2, v2, v3
580 ; P9LE-NEXT: blr
581 ;
582 ; P9BE-LABEL: combine_urem_udiv:
583 ; P9BE: # %bb.0:
584 ; P9BE-NEXT: li r3, 6
585 ; P9BE-NEXT: vextuhlx r3, r3, v2
586 ; P9BE-NEXT: rlwinm r4, r3, 0, 16, 31
587 ; P9BE-NEXT: lis r6, 22765
588 ; P9BE-NEXT: ori r6, r6, 8969
589 ; P9BE-NEXT: clrldi r5, r4, 32
590 ; P9BE-NEXT: mulld r5, r5, r6
591 ; P9BE-NEXT: rldicl r5, r5, 32, 32
592 ; P9BE-NEXT: subf r4, r5, r4
593 ; P9BE-NEXT: srwi r4, r4, 1
594 ; P9BE-NEXT: add r4, r4, r5
595 ; P9BE-NEXT: srwi r4, r4, 6
596 ; P9BE-NEXT: mulli r5, r4, 95
597 ; P9BE-NEXT: subf r3, r5, r3
598 ; P9BE-NEXT: sldi r3, r3, 48
599 ; P9BE-NEXT: mtvsrd v3, r3
600 ; P9BE-NEXT: li r3, 4
601 ; P9BE-NEXT: vextuhlx r3, r3, v2
602 ; P9BE-NEXT: rlwinm r5, r3, 0, 16, 31
603 ; P9BE-NEXT: clrldi r7, r5, 32
604 ; P9BE-NEXT: mulld r7, r7, r6
605 ; P9BE-NEXT: rldicl r7, r7, 32, 32
606 ; P9BE-NEXT: subf r5, r7, r5
607 ; P9BE-NEXT: srwi r5, r5, 1
608 ; P9BE-NEXT: add r5, r5, r7
609 ; P9BE-NEXT: srwi r5, r5, 6
610 ; P9BE-NEXT: mulli r7, r5, 95
611 ; P9BE-NEXT: subf r3, r7, r3
612 ; P9BE-NEXT: sldi r3, r3, 48
613 ; P9BE-NEXT: mtvsrd v4, r3
614 ; P9BE-NEXT: li r3, 2
615 ; P9BE-NEXT: vextuhlx r3, r3, v2
616 ; P9BE-NEXT: rlwinm r7, r3, 0, 16, 31
617 ; P9BE-NEXT: clrldi r8, r7, 32
618 ; P9BE-NEXT: mulld r8, r8, r6
619 ; P9BE-NEXT: rldicl r8, r8, 32, 32
620 ; P9BE-NEXT: subf r7, r8, r7
621 ; P9BE-NEXT: srwi r7, r7, 1
622 ; P9BE-NEXT: add r7, r7, r8
623 ; P9BE-NEXT: srwi r7, r7, 6
624 ; P9BE-NEXT: mulli r8, r7, 95
625 ; P9BE-NEXT: subf r3, r8, r3
626 ; P9BE-NEXT: sldi r3, r3, 48
627 ; P9BE-NEXT: vmrghh v3, v4, v3
628 ; P9BE-NEXT: mtvsrd v4, r3
629 ; P9BE-NEXT: li r3, 0
630 ; P9BE-NEXT: vextuhlx r3, r3, v2
631 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
632 ; P9BE-NEXT: clrldi r8, r3, 32
633 ; P9BE-NEXT: mulld r6, r8, r6
634 ; P9BE-NEXT: rldicl r6, r6, 32, 32
635 ; P9BE-NEXT: subf r8, r6, r3
636 ; P9BE-NEXT: srwi r8, r8, 1
637 ; P9BE-NEXT: add r6, r8, r6
638 ; P9BE-NEXT: srwi r6, r6, 6
639 ; P9BE-NEXT: mulli r8, r6, 95
640 ; P9BE-NEXT: subf r3, r8, r3
641 ; P9BE-NEXT: sldi r3, r3, 48
642 ; P9BE-NEXT: mtvsrd v2, r3
643 ; P9BE-NEXT: sldi r3, r4, 48
644 ; P9BE-NEXT: vmrghh v2, v2, v4
645 ; P9BE-NEXT: vmrghw v2, v2, v3
646 ; P9BE-NEXT: mtvsrd v3, r3
647 ; P9BE-NEXT: sldi r3, r5, 48
648 ; P9BE-NEXT: mtvsrd v4, r3
649 ; P9BE-NEXT: sldi r3, r7, 48
650 ; P9BE-NEXT: vmrghh v3, v4, v3
651 ; P9BE-NEXT: mtvsrd v4, r3
652 ; P9BE-NEXT: sldi r3, r6, 48
653 ; P9BE-NEXT: mtvsrd v5, r3
654 ; P9BE-NEXT: vmrghh v4, v5, v4
655 ; P9BE-NEXT: vmrghw v3, v4, v3
656 ; P9BE-NEXT: vadduhm v2, v2, v3
657 ; P9BE-NEXT: blr
658 ;
659 ; P8LE-LABEL: combine_urem_udiv:
660 ; P8LE: # %bb.0:
661 ; P8LE-NEXT: xxswapd vs0, v2
662 ; P8LE-NEXT: lis r5, 22765
663 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
664 ; P8LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
665 ; P8LE-NEXT: ori r5, r5, 8969
666 ; P8LE-NEXT: mfvsrd r6, f0
667 ; P8LE-NEXT: clrldi r3, r6, 48
668 ; P8LE-NEXT: rldicl r4, r6, 48, 48
669 ; P8LE-NEXT: rldicl r7, r6, 32, 48
670 ; P8LE-NEXT: rlwinm r8, r3, 0, 16, 31
671 ; P8LE-NEXT: rlwinm r9, r4, 0, 16, 31
672 ; P8LE-NEXT: rldicl r6, r6, 16, 48
673 ; P8LE-NEXT: rlwinm r10, r7, 0, 16, 31
674 ; P8LE-NEXT: clrldi r11, r8, 32
675 ; P8LE-NEXT: rlwinm r12, r6, 0, 16, 31
676 ; P8LE-NEXT: clrldi r0, r9, 32
677 ; P8LE-NEXT: clrldi r30, r10, 32
678 ; P8LE-NEXT: mulld r11, r11, r5
679 ; P8LE-NEXT: clrldi r29, r12, 32
680 ; P8LE-NEXT: mulld r0, r0, r5
681 ; P8LE-NEXT: mulld r30, r30, r5
682 ; P8LE-NEXT: mulld r5, r29, r5
683 ; P8LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
684 ; P8LE-NEXT: rldicl r11, r11, 32, 32
685 ; P8LE-NEXT: rldicl r0, r0, 32, 32
686 ; P8LE-NEXT: rldicl r30, r30, 32, 32
687 ; P8LE-NEXT: subf r8, r11, r8
688 ; P8LE-NEXT: rldicl r5, r5, 32, 32
689 ; P8LE-NEXT: subf r9, r0, r9
690 ; P8LE-NEXT: srwi r8, r8, 1
691 ; P8LE-NEXT: subf r10, r30, r10
692 ; P8LE-NEXT: add r8, r8, r11
693 ; P8LE-NEXT: srwi r9, r9, 1
694 ; P8LE-NEXT: srwi r10, r10, 1
695 ; P8LE-NEXT: subf r11, r5, r12
696 ; P8LE-NEXT: add r9, r9, r0
697 ; P8LE-NEXT: srwi r8, r8, 6
698 ; P8LE-NEXT: add r10, r10, r30
699 ; P8LE-NEXT: srwi r11, r11, 1
700 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
701 ; P8LE-NEXT: srwi r9, r9, 6
702 ; P8LE-NEXT: mulli r12, r8, 95
703 ; P8LE-NEXT: srwi r10, r10, 6
704 ; P8LE-NEXT: add r5, r11, r5
705 ; P8LE-NEXT: mtvsrd f0, r8
706 ; P8LE-NEXT: mulli r8, r9, 95
707 ; P8LE-NEXT: mtvsrd f1, r9
708 ; P8LE-NEXT: mulli r9, r10, 95
709 ; P8LE-NEXT: srwi r5, r5, 6
710 ; P8LE-NEXT: mtvsrd f3, r5
711 ; P8LE-NEXT: mulli r5, r5, 95
712 ; P8LE-NEXT: xxswapd v2, vs0
713 ; P8LE-NEXT: xxswapd v3, vs1
714 ; P8LE-NEXT: mtvsrd f2, r10
715 ; P8LE-NEXT: subf r3, r12, r3
716 ; P8LE-NEXT: xxswapd v6, vs3
717 ; P8LE-NEXT: mtvsrd f0, r3
718 ; P8LE-NEXT: subf r3, r9, r7
719 ; P8LE-NEXT: subf r4, r8, r4
720 ; P8LE-NEXT: xxswapd v1, vs2
721 ; P8LE-NEXT: mtvsrd f4, r3
722 ; P8LE-NEXT: subf r3, r5, r6
723 ; P8LE-NEXT: mtvsrd f1, r4
724 ; P8LE-NEXT: mtvsrd f5, r3
725 ; P8LE-NEXT: xxswapd v5, vs4
726 ; P8LE-NEXT: vmrglh v2, v3, v2
727 ; P8LE-NEXT: xxswapd v3, vs0
728 ; P8LE-NEXT: xxswapd v4, vs1
729 ; P8LE-NEXT: xxswapd v0, vs5
730 ; P8LE-NEXT: vmrglh v3, v4, v3
731 ; P8LE-NEXT: vmrglh v4, v0, v5
732 ; P8LE-NEXT: vmrglh v5, v6, v1
733 ; P8LE-NEXT: vmrglw v3, v4, v3
734 ; P8LE-NEXT: vmrglw v2, v5, v2
735 ; P8LE-NEXT: vadduhm v2, v3, v2
736 ; P8LE-NEXT: blr
737 ;
738 ; P8BE-LABEL: combine_urem_udiv:
739 ; P8BE: # %bb.0:
740 ; P8BE-NEXT: mfvsrd r6, v2
741 ; P8BE-NEXT: lis r5, 22765
742 ; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
743 ; P8BE-NEXT: ori r5, r5, 8969
744 ; P8BE-NEXT: clrldi r3, r6, 48
745 ; P8BE-NEXT: rldicl r4, r6, 48, 48
746 ; P8BE-NEXT: rlwinm r8, r3, 0, 16, 31
747 ; P8BE-NEXT: rldicl r7, r6, 32, 48
748 ; P8BE-NEXT: rlwinm r9, r4, 0, 16, 31
749 ; P8BE-NEXT: rldicl r6, r6, 16, 48
750 ; P8BE-NEXT: clrldi r11, r8, 32
751 ; P8BE-NEXT: rlwinm r10, r7, 0, 16, 31
752 ; P8BE-NEXT: rlwinm r6, r6, 0, 16, 31
753 ; P8BE-NEXT: clrldi r12, r9, 32
754 ; P8BE-NEXT: mulld r11, r11, r5
755 ; P8BE-NEXT: clrldi r0, r10, 32
756 ; P8BE-NEXT: clrldi r30, r6, 32
757 ; P8BE-NEXT: mulld r12, r12, r5
758 ; P8BE-NEXT: mulld r0, r0, r5
759 ; P8BE-NEXT: mulld r5, r30, r5
760 ; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
761 ; P8BE-NEXT: rldicl r11, r11, 32, 32
762 ; P8BE-NEXT: rldicl r12, r12, 32, 32
763 ; P8BE-NEXT: subf r8, r11