llvm.org GIT mirror llvm / 32893f0
[ARM] Lower sadd_sat to qadd8 and qadd16 Lower the target independent signed saturating intrinsics to qadd8 and qadd16. This custom lowers them from a sadd_sat, catching the node early before it is promoted. It also adds a QADD8b and QADD16b node to mean the bottom "lane" of a qadd8/qadd16, so that we can call demand bits on it to show that it does not use the upper bits. Also handles QSUB8 and QSUB16. Differential Revision: https://reviews.llvm.org/D68974 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375402 91177308-0d34-0410-b5e6-96231b3b80d8 David Green 1 year, 1 month ago
8 changed file(s) with 334 addition(s) and 169 deletion(s). Raw diff Collapse all Expand all
10201020
10211021 setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
10221022 setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
1023 if (Subtarget->hasDSP()) {
1024 setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
1025 setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
1026 setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
1027 setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
1028 }
10231029
10241030 // i64 operation support.
10251031 setOperationAction(ISD::MUL, MVT::i64, Expand);
16211627 case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
16221628 case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
16231629 case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1630 case ARMISD::QADD16b: return "ARMISD::QADD16b";
1631 case ARMISD::QSUB16b: return "ARMISD::QSUB16b";
1632 case ARMISD::QADD8b: return "ARMISD::QADD8b";
1633 case ARMISD::QSUB8b: return "ARMISD::QSUB8b";
16241634 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
16251635 case ARMISD::BFI: return "ARMISD::BFI";
16261636 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
44424452 }
44434453
44444454 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4455 }
4456
4457 static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG,
4458 const ARMSubtarget *Subtarget) {
4459 EVT VT = Op.getValueType();
4460 if (!Subtarget->hasDSP())
4461 return SDValue();
4462 if (!VT.isSimple())
4463 return SDValue();
4464
4465 unsigned NewOpcode;
4466 bool IsAdd = Op->getOpcode() == ISD::SADDSAT;
4467 switch (VT.getSimpleVT().SimpleTy) {
4468 default:
4469 return SDValue();
4470 case MVT::i8:
4471 NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b;
4472 break;
4473 case MVT::i16:
4474 NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b;
4475 break;
4476 }
4477
4478 SDLoc dl(Op);
4479 SDValue Add =
4480 DAG.getNode(NewOpcode, dl, MVT::i32,
4481 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
4482 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
4483 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
44454484 }
44464485
44474486 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
91209159 case ISD::UADDO:
91219160 case ISD::USUBO:
91229161 return LowerUnsignedALUO(Op, DAG);
9162 case ISD::SADDSAT:
9163 case ISD::SSUBSAT:
9164 return LowerSADDSUBSAT(Op, DAG, Subtarget);
91239165 case ISD::LOAD:
91249166 return LowerPredicateLoad(Op, DAG);
91259167 case ISD::STORE:
92049246 Results.push_back(Res.getValue(0));
92059247 Results.push_back(Res.getValue(1));
92069248 return;
9249 case ISD::SADDSAT:
9250 case ISD::SSUBSAT:
9251 Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
9252 break;
92079253 case ISD::READCYCLECOUNTER:
92089254 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
92099255 return;
1438114427 return SDValue();
1438214428 break;
1438314429 }
14384 case ARMISD::SMLALBB: {
14430 case ARMISD::SMLALBB:
14431 case ARMISD::QADD16b:
14432 case ARMISD::QSUB16b: {
1438514433 unsigned BitWidth = N->getValueType(0).getSizeInBits();
1438614434 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
1438714435 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
1441214460 case ARMISD::SMLALTT: {
1441314461 unsigned BitWidth = N->getValueType(0).getSizeInBits();
1441414462 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
14463 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
14464 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
14465 return SDValue();
14466 break;
14467 }
14468 case ARMISD::QADD8b:
14469 case ARMISD::QSUB8b: {
14470 unsigned BitWidth = N->getValueType(0).getSizeInBits();
14471 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
1441514472 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
1441614473 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
1441714474 return SDValue();
217217 SMLSLDX, // Signed multiply subtract long dual exchange
218218 SMMLAR, // Signed multiply long, round and add
219219 SMMLSR, // Signed multiply long, subtract and round
220
221 // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b stands for.
222 QADD8b,
223 QSUB8b,
224 QADD16b,
225 QSUB16b,
220226
221227 // Operands of the standard BUILD_VECTOR node are not legalized, which
222228 // is fine if BUILD_VECTORs are always lowered to shuffles or other
236236 def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
237237 def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
238238 def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
239
240 def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>;
241 def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
242 def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
243 def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
239244
240245 // Vector operations shared between NEON and MVE
241246
37493754 [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>;
37503755 }
37513756
3757 def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
3758 (QADD8 rGPR:$Rm, rGPR:$Rn)>;
3759 def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
3760 (QSUB8 rGPR:$Rm, rGPR:$Rn)>;
3761 def : ARMV6Pat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn),
3762 (QADD16 rGPR:$Rm, rGPR:$Rn)>;
3763 def : ARMV6Pat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn),
3764 (QSUB16 rGPR:$Rm, rGPR:$Rn)>;
3765
37523766 def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>;
37533767 def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>;
37543768 def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>;
23932393 (t2QDADD rGPR:$Rm, rGPR:$Rn)>;
23942394 def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)),
23952395 (t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
2396
2397 def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
2398 (t2QADD8 rGPR:$Rm, rGPR:$Rn)>;
2399 def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
2400 (t2QSUB8 rGPR:$Rm, rGPR:$Rn)>;
2401 def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn),
2402 (t2QADD16 rGPR:$Rm, rGPR:$Rn)>;
2403 def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn),
2404 (t2QSUB16 rGPR:$Rm, rGPR:$Rn)>;
23962405
23972406 // Signed/Unsigned add/subtract
23982407
232232 ; CHECK-T1-NEXT: .LCPI2_1:
233233 ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000
234234 ;
235 ; CHECK-T2-LABEL: func16:
236 ; CHECK-T2: @ %bb.0:
237 ; CHECK-T2-NEXT: add r0, r1
238 ; CHECK-T2-NEXT: movw r1, #32767
239 ; CHECK-T2-NEXT: cmp r0, r1
240 ; CHECK-T2-NEXT: it lt
241 ; CHECK-T2-NEXT: movlt r1, r0
242 ; CHECK-T2-NEXT: movw r0, #32768
243 ; CHECK-T2-NEXT: cmn.w r1, #32768
244 ; CHECK-T2-NEXT: movt r0, #65535
245 ; CHECK-T2-NEXT: it gt
246 ; CHECK-T2-NEXT: movgt r0, r1
247 ; CHECK-T2-NEXT: bx lr
248 ;
249 ; CHECK-ARM-LABEL: func16:
250 ; CHECK-ARM: @ %bb.0:
251 ; CHECK-ARM-NEXT: add r0, r0, r1
252 ; CHECK-ARM-NEXT: mov r1, #255
253 ; CHECK-ARM-NEXT: orr r1, r1, #32512
254 ; CHECK-ARM-NEXT: cmp r0, r1
255 ; CHECK-ARM-NEXT: movlt r1, r0
256 ; CHECK-ARM-NEXT: ldr r0, .LCPI2_0
257 ; CHECK-ARM-NEXT: cmn r1, #32768
258 ; CHECK-ARM-NEXT: movgt r0, r1
259 ; CHECK-ARM-NEXT: bx lr
260 ; CHECK-ARM-NEXT: .p2align 2
261 ; CHECK-ARM-NEXT: @ %bb.1:
262 ; CHECK-ARM-NEXT: .LCPI2_0:
263 ; CHECK-ARM-NEXT: .long 4294934528 @ 0xffff8000
235 ; CHECK-T2NODSP-LABEL: func16:
236 ; CHECK-T2NODSP: @ %bb.0:
237 ; CHECK-T2NODSP-NEXT: add r0, r1
238 ; CHECK-T2NODSP-NEXT: movw r1, #32767
239 ; CHECK-T2NODSP-NEXT: cmp r0, r1
240 ; CHECK-T2NODSP-NEXT: it lt
241 ; CHECK-T2NODSP-NEXT: movlt r1, r0
242 ; CHECK-T2NODSP-NEXT: movw r0, #32768
243 ; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
244 ; CHECK-T2NODSP-NEXT: movt r0, #65535
245 ; CHECK-T2NODSP-NEXT: it gt
246 ; CHECK-T2NODSP-NEXT: movgt r0, r1
247 ; CHECK-T2NODSP-NEXT: bx lr
248 ;
249 ; CHECK-T2DSP-LABEL: func16:
250 ; CHECK-T2DSP: @ %bb.0:
251 ; CHECK-T2DSP-NEXT: qadd16 r0, r0, r1
252 ; CHECK-T2DSP-NEXT: sxth r0, r0
253 ; CHECK-T2DSP-NEXT: bx lr
254 ;
255 ; CHECK-ARMNODPS-LABEL: func16:
256 ; CHECK-ARMNODPS: @ %bb.0:
257 ; CHECK-ARMNODPS-NEXT: add r0, r0, r1
258 ; CHECK-ARMNODPS-NEXT: mov r1, #255
259 ; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512
260 ; CHECK-ARMNODPS-NEXT: cmp r0, r1
261 ; CHECK-ARMNODPS-NEXT: movlt r1, r0
262 ; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0
263 ; CHECK-ARMNODPS-NEXT: cmn r1, #32768
264 ; CHECK-ARMNODPS-NEXT: movgt r0, r1
265 ; CHECK-ARMNODPS-NEXT: bx lr
266 ; CHECK-ARMNODPS-NEXT: .p2align 2
267 ; CHECK-ARMNODPS-NEXT: @ %bb.1:
268 ; CHECK-ARMNODPS-NEXT: .LCPI2_0:
269 ; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000
270 ;
271 ; CHECK-ARMBASEDSP-LABEL: func16:
272 ; CHECK-ARMBASEDSP: @ %bb.0:
273 ; CHECK-ARMBASEDSP-NEXT: add r0, r0, r1
274 ; CHECK-ARMBASEDSP-NEXT: mov r1, #255
275 ; CHECK-ARMBASEDSP-NEXT: orr r1, r1, #32512
276 ; CHECK-ARMBASEDSP-NEXT: cmp r0, r1
277 ; CHECK-ARMBASEDSP-NEXT: movlt r1, r0
278 ; CHECK-ARMBASEDSP-NEXT: ldr r0, .LCPI2_0
279 ; CHECK-ARMBASEDSP-NEXT: cmn r1, #32768
280 ; CHECK-ARMBASEDSP-NEXT: movgt r0, r1
281 ; CHECK-ARMBASEDSP-NEXT: bx lr
282 ; CHECK-ARMBASEDSP-NEXT: .p2align 2
283 ; CHECK-ARMBASEDSP-NEXT: @ %bb.1:
284 ; CHECK-ARMBASEDSP-NEXT: .LCPI2_0:
285 ; CHECK-ARMBASEDSP-NEXT: .long 4294934528 @ 0xffff8000
286 ;
287 ; CHECK-ARMDSP-LABEL: func16:
288 ; CHECK-ARMDSP: @ %bb.0:
289 ; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r1
290 ; CHECK-ARMDSP-NEXT: sxth r0, r0
291 ; CHECK-ARMDSP-NEXT: bx lr
264292 %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y)
265293 ret i16 %tmp
266294 }
283311 ; CHECK-T1-NEXT: .LBB3_4:
284312 ; CHECK-T1-NEXT: bx lr
285313 ;
286 ; CHECK-T2-LABEL: func8:
287 ; CHECK-T2: @ %bb.0:
288 ; CHECK-T2-NEXT: add r0, r1
289 ; CHECK-T2-NEXT: cmp r0, #127
290 ; CHECK-T2-NEXT: it ge
291 ; CHECK-T2-NEXT: movge r0, #127
292 ; CHECK-T2-NEXT: cmn.w r0, #128
293 ; CHECK-T2-NEXT: it le
294 ; CHECK-T2-NEXT: mvnle r0, #127
295 ; CHECK-T2-NEXT: bx lr
296 ;
297 ; CHECK-ARM-LABEL: func8:
298 ; CHECK-ARM: @ %bb.0:
299 ; CHECK-ARM-NEXT: add r0, r0, r1
300 ; CHECK-ARM-NEXT: cmp r0, #127
301 ; CHECK-ARM-NEXT: movge r0, #127
302 ; CHECK-ARM-NEXT: cmn r0, #128
303 ; CHECK-ARM-NEXT: mvnle r0, #127
304 ; CHECK-ARM-NEXT: bx lr
314 ; CHECK-T2NODSP-LABEL: func8:
315 ; CHECK-T2NODSP: @ %bb.0:
316 ; CHECK-T2NODSP-NEXT: add r0, r1
317 ; CHECK-T2NODSP-NEXT: cmp r0, #127
318 ; CHECK-T2NODSP-NEXT: it ge
319 ; CHECK-T2NODSP-NEXT: movge r0, #127
320 ; CHECK-T2NODSP-NEXT: cmn.w r0, #128
321 ; CHECK-T2NODSP-NEXT: it le
322 ; CHECK-T2NODSP-NEXT: mvnle r0, #127
323 ; CHECK-T2NODSP-NEXT: bx lr
324 ;
325 ; CHECK-T2DSP-LABEL: func8:
326 ; CHECK-T2DSP: @ %bb.0:
327 ; CHECK-T2DSP-NEXT: qadd8 r0, r0, r1
328 ; CHECK-T2DSP-NEXT: sxtb r0, r0
329 ; CHECK-T2DSP-NEXT: bx lr
330 ;
331 ; CHECK-ARMNODPS-LABEL: func8:
332 ; CHECK-ARMNODPS: @ %bb.0:
333 ; CHECK-ARMNODPS-NEXT: add r0, r0, r1
334 ; CHECK-ARMNODPS-NEXT: cmp r0, #127
335 ; CHECK-ARMNODPS-NEXT: movge r0, #127
336 ; CHECK-ARMNODPS-NEXT: cmn r0, #128
337 ; CHECK-ARMNODPS-NEXT: mvnle r0, #127
338 ; CHECK-ARMNODPS-NEXT: bx lr
339 ;
340 ; CHECK-ARMBASEDSP-LABEL: func8:
341 ; CHECK-ARMBASEDSP: @ %bb.0:
342 ; CHECK-ARMBASEDSP-NEXT: add r0, r0, r1
343 ; CHECK-ARMBASEDSP-NEXT: cmp r0, #127
344 ; CHECK-ARMBASEDSP-NEXT: movge r0, #127
345 ; CHECK-ARMBASEDSP-NEXT: cmn r0, #128
346 ; CHECK-ARMBASEDSP-NEXT: mvnle r0, #127
347 ; CHECK-ARMBASEDSP-NEXT: bx lr
348 ;
349 ; CHECK-ARMDSP-LABEL: func8:
350 ; CHECK-ARMDSP: @ %bb.0:
351 ; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r1
352 ; CHECK-ARMDSP-NEXT: sxtb r0, r0
353 ; CHECK-ARMDSP-NEXT: bx lr
305354 %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y)
306355 ret i8 %tmp
307356 }
257257 ; CHECK-T2DSP-LABEL: func16:
258258 ; CHECK-T2DSP: @ %bb.0:
259259 ; CHECK-T2DSP-NEXT: muls r1, r2, r1
260 ; CHECK-T2DSP-NEXT: sxtah r0, r0, r1
261 ; CHECK-T2DSP-NEXT: movw r1, #32767
262 ; CHECK-T2DSP-NEXT: cmp r0, r1
263 ; CHECK-T2DSP-NEXT: it lt
264 ; CHECK-T2DSP-NEXT: movlt r1, r0
265 ; CHECK-T2DSP-NEXT: movw r0, #32768
266 ; CHECK-T2DSP-NEXT: cmn.w r1, #32768
267 ; CHECK-T2DSP-NEXT: movt r0, #65535
268 ; CHECK-T2DSP-NEXT: it gt
269 ; CHECK-T2DSP-NEXT: movgt r0, r1
260 ; CHECK-T2DSP-NEXT: qadd16 r0, r0, r1
261 ; CHECK-T2DSP-NEXT: sxth r0, r0
270262 ; CHECK-T2DSP-NEXT: bx lr
271263 ;
272264 ; CHECK-ARM-LABEL: func16:
273265 ; CHECK-ARM: @ %bb.0:
274266 ; CHECK-ARM-NEXT: smulbb r1, r1, r2
275 ; CHECK-ARM-NEXT: sxtah r0, r0, r1
276 ; CHECK-ARM-NEXT: movw r1, #32767
277 ; CHECK-ARM-NEXT: cmp r0, r1
278 ; CHECK-ARM-NEXT: movlt r1, r0
279 ; CHECK-ARM-NEXT: movw r0, #32768
280 ; CHECK-ARM-NEXT: movt r0, #65535
281 ; CHECK-ARM-NEXT: cmn r1, #32768
282 ; CHECK-ARM-NEXT: movgt r0, r1
267 ; CHECK-ARM-NEXT: qadd16 r0, r0, r1
268 ; CHECK-ARM-NEXT: sxth r0, r0
283269 ; CHECK-ARM-NEXT: bx lr
284270 %a = mul i16 %y, %z
285271 %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a)
322308 ; CHECK-T2DSP-LABEL: func8:
323309 ; CHECK-T2DSP: @ %bb.0:
324310 ; CHECK-T2DSP-NEXT: muls r1, r2, r1
325 ; CHECK-T2DSP-NEXT: sxtab r0, r0, r1
326 ; CHECK-T2DSP-NEXT: cmp r0, #127
327 ; CHECK-T2DSP-NEXT: it ge
328 ; CHECK-T2DSP-NEXT: movge r0, #127
329 ; CHECK-T2DSP-NEXT: cmn.w r0, #128
330 ; CHECK-T2DSP-NEXT: it le
331 ; CHECK-T2DSP-NEXT: mvnle r0, #127
311 ; CHECK-T2DSP-NEXT: qadd8 r0, r0, r1
312 ; CHECK-T2DSP-NEXT: sxtb r0, r0
332313 ; CHECK-T2DSP-NEXT: bx lr
333314 ;
334315 ; CHECK-ARM-LABEL: func8:
335316 ; CHECK-ARM: @ %bb.0:
336317 ; CHECK-ARM-NEXT: smulbb r1, r1, r2
337 ; CHECK-ARM-NEXT: sxtab r0, r0, r1
338 ; CHECK-ARM-NEXT: cmp r0, #127
339 ; CHECK-ARM-NEXT: movge r0, #127
340 ; CHECK-ARM-NEXT: cmn r0, #128
341 ; CHECK-ARM-NEXT: mvnle r0, #127
318 ; CHECK-ARM-NEXT: qadd8 r0, r0, r1
319 ; CHECK-ARM-NEXT: sxtb r0, r0
342320 ; CHECK-ARM-NEXT: bx lr
343321 %a = mul i8 %y, %z
344322 %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a)
234234 ; CHECK-T1-NEXT: .LCPI2_1:
235235 ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000
236236 ;
237 ; CHECK-T2-LABEL: func16:
238 ; CHECK-T2: @ %bb.0:
239 ; CHECK-T2-NEXT: subs r0, r0, r1
240 ; CHECK-T2-NEXT: movw r1, #32767
241 ; CHECK-T2-NEXT: cmp r0, r1
242 ; CHECK-T2-NEXT: it lt
243 ; CHECK-T2-NEXT: movlt r1, r0
244 ; CHECK-T2-NEXT: movw r0, #32768
245 ; CHECK-T2-NEXT: cmn.w r1, #32768
246 ; CHECK-T2-NEXT: movt r0, #65535
247 ; CHECK-T2-NEXT: it gt
248 ; CHECK-T2-NEXT: movgt r0, r1
249 ; CHECK-T2-NEXT: bx lr
250 ;
251 ; CHECK-ARM-LABEL: func16:
252 ; CHECK-ARM: @ %bb.0:
253 ; CHECK-ARM-NEXT: sub r0, r0, r1
254 ; CHECK-ARM-NEXT: mov r1, #255
255 ; CHECK-ARM-NEXT: orr r1, r1, #32512
256 ; CHECK-ARM-NEXT: cmp r0, r1
257 ; CHECK-ARM-NEXT: movlt r1, r0
258 ; CHECK-ARM-NEXT: ldr r0, .LCPI2_0
259 ; CHECK-ARM-NEXT: cmn r1, #32768
260 ; CHECK-ARM-NEXT: movgt r0, r1
261 ; CHECK-ARM-NEXT: bx lr
262 ; CHECK-ARM-NEXT: .p2align 2
263 ; CHECK-ARM-NEXT: @ %bb.1:
264 ; CHECK-ARM-NEXT: .LCPI2_0:
265 ; CHECK-ARM-NEXT: .long 4294934528 @ 0xffff8000
237 ; CHECK-T2NODSP-LABEL: func16:
238 ; CHECK-T2NODSP: @ %bb.0:
239 ; CHECK-T2NODSP-NEXT: subs r0, r0, r1
240 ; CHECK-T2NODSP-NEXT: movw r1, #32767
241 ; CHECK-T2NODSP-NEXT: cmp r0, r1
242 ; CHECK-T2NODSP-NEXT: it lt
243 ; CHECK-T2NODSP-NEXT: movlt r1, r0
244 ; CHECK-T2NODSP-NEXT: movw r0, #32768
245 ; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
246 ; CHECK-T2NODSP-NEXT: movt r0, #65535
247 ; CHECK-T2NODSP-NEXT: it gt
248 ; CHECK-T2NODSP-NEXT: movgt r0, r1
249 ; CHECK-T2NODSP-NEXT: bx lr
250 ;
251 ; CHECK-T2DSP-LABEL: func16:
252 ; CHECK-T2DSP: @ %bb.0:
253 ; CHECK-T2DSP-NEXT: qsub16 r0, r0, r1
254 ; CHECK-T2DSP-NEXT: sxth r0, r0
255 ; CHECK-T2DSP-NEXT: bx lr
256 ;
257 ; CHECK-ARMNODPS-LABEL: func16:
258 ; CHECK-ARMNODPS: @ %bb.0:
259 ; CHECK-ARMNODPS-NEXT: sub r0, r0, r1
260 ; CHECK-ARMNODPS-NEXT: mov r1, #255
261 ; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512
262 ; CHECK-ARMNODPS-NEXT: cmp r0, r1
263 ; CHECK-ARMNODPS-NEXT: movlt r1, r0
264 ; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0
265 ; CHECK-ARMNODPS-NEXT: cmn r1, #32768
266 ; CHECK-ARMNODPS-NEXT: movgt r0, r1
267 ; CHECK-ARMNODPS-NEXT: bx lr
268 ; CHECK-ARMNODPS-NEXT: .p2align 2
269 ; CHECK-ARMNODPS-NEXT: @ %bb.1:
270 ; CHECK-ARMNODPS-NEXT: .LCPI2_0:
271 ; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000
272 ;
273 ; CHECK-ARMBASEDSP-LABEL: func16:
274 ; CHECK-ARMBASEDSP: @ %bb.0:
275 ; CHECK-ARMBASEDSP-NEXT: sub r0, r0, r1
276 ; CHECK-ARMBASEDSP-NEXT: mov r1, #255
277 ; CHECK-ARMBASEDSP-NEXT: orr r1, r1, #32512
278 ; CHECK-ARMBASEDSP-NEXT: cmp r0, r1
279 ; CHECK-ARMBASEDSP-NEXT: movlt r1, r0
280 ; CHECK-ARMBASEDSP-NEXT: ldr r0, .LCPI2_0
281 ; CHECK-ARMBASEDSP-NEXT: cmn r1, #32768
282 ; CHECK-ARMBASEDSP-NEXT: movgt r0, r1
283 ; CHECK-ARMBASEDSP-NEXT: bx lr
284 ; CHECK-ARMBASEDSP-NEXT: .p2align 2
285 ; CHECK-ARMBASEDSP-NEXT: @ %bb.1:
286 ; CHECK-ARMBASEDSP-NEXT: .LCPI2_0:
287 ; CHECK-ARMBASEDSP-NEXT: .long 4294934528 @ 0xffff8000
288 ;
289 ; CHECK-ARMDSP-LABEL: func16:
290 ; CHECK-ARMDSP: @ %bb.0:
291 ; CHECK-ARMDSP-NEXT: qsub16 r0, r0, r1
292 ; CHECK-ARMDSP-NEXT: sxth r0, r0
293 ; CHECK-ARMDSP-NEXT: bx lr
266294 %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y)
267295 ret i16 %tmp
268296 }
285313 ; CHECK-T1-NEXT: .LBB3_4:
286314 ; CHECK-T1-NEXT: bx lr
287315 ;
288 ; CHECK-T2-LABEL: func8:
289 ; CHECK-T2: @ %bb.0:
290 ; CHECK-T2-NEXT: subs r0, r0, r1
291 ; CHECK-T2-NEXT: cmp r0, #127
292 ; CHECK-T2-NEXT: it ge
293 ; CHECK-T2-NEXT: movge r0, #127
294 ; CHECK-T2-NEXT: cmn.w r0, #128
295 ; CHECK-T2-NEXT: it le
296 ; CHECK-T2-NEXT: mvnle r0, #127
297 ; CHECK-T2-NEXT: bx lr
298 ;
299 ; CHECK-ARM-LABEL: func8:
300 ; CHECK-ARM: @ %bb.0:
301 ; CHECK-ARM-NEXT: sub r0, r0, r1
302 ; CHECK-ARM-NEXT: cmp r0, #127
303 ; CHECK-ARM-NEXT: movge r0, #127
304 ; CHECK-ARM-NEXT: cmn r0, #128
305 ; CHECK-ARM-NEXT: mvnle r0, #127
306 ; CHECK-ARM-NEXT: bx lr
316 ; CHECK-T2NODSP-LABEL: func8:
317 ; CHECK-T2NODSP: @ %bb.0:
318 ; CHECK-T2NODSP-NEXT: subs r0, r0, r1
319 ; CHECK-T2NODSP-NEXT: cmp r0, #127
320 ; CHECK-T2NODSP-NEXT: it ge
321 ; CHECK-T2NODSP-NEXT: movge r0, #127
322 ; CHECK-T2NODSP-NEXT: cmn.w r0, #128
323 ; CHECK-T2NODSP-NEXT: it le
324 ; CHECK-T2NODSP-NEXT: mvnle r0, #127
325 ; CHECK-T2NODSP-NEXT: bx lr
326 ;
327 ; CHECK-T2DSP-LABEL: func8:
328 ; CHECK-T2DSP: @ %bb.0:
329 ; CHECK-T2DSP-NEXT: qsub8 r0, r0, r1
330 ; CHECK-T2DSP-NEXT: sxtb r0, r0
331 ; CHECK-T2DSP-NEXT: bx lr
332 ;
333 ; CHECK-ARMNODPS-LABEL: func8:
334 ; CHECK-ARMNODPS: @ %bb.0:
335 ; CHECK-ARMNODPS-NEXT: sub r0, r0, r1
336 ; CHECK-ARMNODPS-NEXT: cmp r0, #127
337 ; CHECK-ARMNODPS-NEXT: movge r0, #127
338 ; CHECK-ARMNODPS-NEXT: cmn r0, #128
339 ; CHECK-ARMNODPS-NEXT: mvnle r0, #127
340 ; CHECK-ARMNODPS-NEXT: bx lr
341 ;
342 ; CHECK-ARMBASEDSP-LABEL: func8:
343 ; CHECK-ARMBASEDSP: @ %bb.0:
344 ; CHECK-ARMBASEDSP-NEXT: sub r0, r0, r1
345 ; CHECK-ARMBASEDSP-NEXT: cmp r0, #127
346 ; CHECK-ARMBASEDSP-NEXT: movge r0, #127
347 ; CHECK-ARMBASEDSP-NEXT: cmn r0, #128
348 ; CHECK-ARMBASEDSP-NEXT: mvnle r0, #127
349 ; CHECK-ARMBASEDSP-NEXT: bx lr
350 ;
351 ; CHECK-ARMDSP-LABEL: func8:
352 ; CHECK-ARMDSP: @ %bb.0:
353 ; CHECK-ARMDSP-NEXT: qsub8 r0, r0, r1
354 ; CHECK-ARMDSP-NEXT: sxtb r0, r0
355 ; CHECK-ARMDSP-NEXT: bx lr
307356 %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y)
308357 ret i8 %tmp
309358 }
244244 ; CHECK-T1-NEXT: .LCPI2_1:
245245 ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000
246246 ;
247 ; CHECK-T2-LABEL: func16:
248 ; CHECK-T2: @ %bb.0:
249 ; CHECK-T2-NEXT: muls r1, r2, r1
250 ; CHECK-T2-NEXT: sxth r1, r1
251 ; CHECK-T2-NEXT: subs r0, r0, r1
252 ; CHECK-T2-NEXT: movw r1, #32767
253 ; CHECK-T2-NEXT: cmp r0, r1
254 ; CHECK-T2-NEXT: it lt
255 ; CHECK-T2-NEXT: movlt r1, r0
256 ; CHECK-T2-NEXT: movw r0, #32768
257 ; CHECK-T2-NEXT: movt r0, #65535
258 ; CHECK-T2-NEXT: cmn.w r1, #32768
259 ; CHECK-T2-NEXT: it gt
260 ; CHECK-T2-NEXT: movgt r0, r1
261 ; CHECK-T2-NEXT: bx lr
247 ; CHECK-T2NODSP-LABEL: func16:
248 ; CHECK-T2NODSP: @ %bb.0:
249 ; CHECK-T2NODSP-NEXT: muls r1, r2, r1
250 ; CHECK-T2NODSP-NEXT: sxth r1, r1
251 ; CHECK-T2NODSP-NEXT: subs r0, r0, r1
252 ; CHECK-T2NODSP-NEXT: movw r1, #32767
253 ; CHECK-T2NODSP-NEXT: cmp r0, r1
254 ; CHECK-T2NODSP-NEXT: it lt
255 ; CHECK-T2NODSP-NEXT: movlt r1, r0
256 ; CHECK-T2NODSP-NEXT: movw r0, #32768
257 ; CHECK-T2NODSP-NEXT: movt r0, #65535
258 ; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
259 ; CHECK-T2NODSP-NEXT: it gt
260 ; CHECK-T2NODSP-NEXT: movgt r0, r1
261 ; CHECK-T2NODSP-NEXT: bx lr
262 ;
263 ; CHECK-T2DSP-LABEL: func16:
264 ; CHECK-T2DSP: @ %bb.0:
265 ; CHECK-T2DSP-NEXT: muls r1, r2, r1
266 ; CHECK-T2DSP-NEXT: qsub16 r0, r0, r1
267 ; CHECK-T2DSP-NEXT: sxth r0, r0
268 ; CHECK-T2DSP-NEXT: bx lr
262269 ;
263270 ; CHECK-ARM-LABEL: func16:
264271 ; CHECK-ARM: @ %bb.0:
265272 ; CHECK-ARM-NEXT: smulbb r1, r1, r2
266 ; CHECK-ARM-NEXT: sxth r1, r1
267 ; CHECK-ARM-NEXT: sub r0, r0, r1
268 ; CHECK-ARM-NEXT: movw r1, #32767
269 ; CHECK-ARM-NEXT: cmp r0, r1
270 ; CHECK-ARM-NEXT: movlt r1, r0
271 ; CHECK-ARM-NEXT: movw r0, #32768
272 ; CHECK-ARM-NEXT: movt r0, #65535
273 ; CHECK-ARM-NEXT: cmn r1, #32768
274 ; CHECK-ARM-NEXT: movgt r0, r1
273 ; CHECK-ARM-NEXT: qsub16 r0, r0, r1
274 ; CHECK-ARM-NEXT: sxth r0, r0
275275 ; CHECK-ARM-NEXT: bx lr
276276 %a = mul i16 %y, %z
277277 %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %a)
298298 ; CHECK-T1-NEXT: .LBB3_4:
299299 ; CHECK-T1-NEXT: bx lr
300300 ;
301 ; CHECK-T2-LABEL: func8:
302 ; CHECK-T2: @ %bb.0:
303 ; CHECK-T2-NEXT: muls r1, r2, r1
304 ; CHECK-T2-NEXT: sxtb r1, r1
305 ; CHECK-T2-NEXT: subs r0, r0, r1
306 ; CHECK-T2-NEXT: cmp r0, #127
307 ; CHECK-T2-NEXT: it ge
308 ; CHECK-T2-NEXT: movge r0, #127
309 ; CHECK-T2-NEXT: cmn.w r0, #128
310 ; CHECK-T2-NEXT: it le
311 ; CHECK-T2-NEXT: mvnle r0, #127
312 ; CHECK-T2-NEXT: bx lr
301 ; CHECK-T2NODSP-LABEL: func8:
302 ; CHECK-T2NODSP: @ %bb.0:
303 ; CHECK-T2NODSP-NEXT: muls r1, r2, r1
304 ; CHECK-T2NODSP-NEXT: sxtb r1, r1
305 ; CHECK-T2NODSP-NEXT: subs r0, r0, r1
306 ; CHECK-T2NODSP-NEXT: cmp r0, #127
307 ; CHECK-T2NODSP-NEXT: it ge
308 ; CHECK-T2NODSP-NEXT: movge r0, #127
309 ; CHECK-T2NODSP-NEXT: cmn.w r0, #128
310 ; CHECK-T2NODSP-NEXT: it le
311 ; CHECK-T2NODSP-NEXT: mvnle r0, #127
312 ; CHECK-T2NODSP-NEXT: bx lr
313 ;
314 ; CHECK-T2DSP-LABEL: func8:
315 ; CHECK-T2DSP: @ %bb.0:
316 ; CHECK-T2DSP-NEXT: muls r1, r2, r1
317 ; CHECK-T2DSP-NEXT: qsub8 r0, r0, r1
318 ; CHECK-T2DSP-NEXT: sxtb r0, r0
319 ; CHECK-T2DSP-NEXT: bx lr
313320 ;
314321 ; CHECK-ARM-LABEL: func8:
315322 ; CHECK-ARM: @ %bb.0:
316323 ; CHECK-ARM-NEXT: smulbb r1, r1, r2
317 ; CHECK-ARM-NEXT: sxtb r1, r1
318 ; CHECK-ARM-NEXT: sub r0, r0, r1
319 ; CHECK-ARM-NEXT: cmp r0, #127
320 ; CHECK-ARM-NEXT: movge r0, #127
321 ; CHECK-ARM-NEXT: cmn r0, #128
322 ; CHECK-ARM-NEXT: mvnle r0, #127
324 ; CHECK-ARM-NEXT: qsub8 r0, r0, r1
325 ; CHECK-ARM-NEXT: sxtb r0, r0
323326 ; CHECK-ARM-NEXT: bx lr
324327 %a = mul i8 %y, %z
325328 %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %a)