llvm.org GIT mirror llvm / 20a90ab
Adds code to PPC ISEL lowering to recognize byte inserts from vector_shuffles, and use P9 shift and vector insert byte instructions instead of vperm. Extends tests from vector insert half-word. Differential Revision: https://reviews.llvm.org/D34497 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317503 91177308-0d34-0410-b5e6-96231b3b80d8 Graham Yiu 2 years ago
4 changed file(s) with 695 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
78897889 return DAG.getNode(ISD::BITCAST, dl, VT, T);
78907890 }
78917891
7892 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
7893 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
7894 /// SDValue.
7895 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
7896 SelectionDAG &DAG) const {
7897 const unsigned BytesInVector = 16;
7898 bool IsLE = Subtarget.isLittleEndian();
7899 SDLoc dl(N);
7900 SDValue V1 = N->getOperand(0);
7901 SDValue V2 = N->getOperand(1);
7902 unsigned ShiftElts = 0, InsertAtByte = 0;
7903 bool Swap = false;
7904
7905 // Shifts required to get the byte we want at element 7.
7906 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
7907 0, 15, 14, 13, 12, 11, 10, 9};
7908 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
7909 1, 2, 3, 4, 5, 6, 7, 8};
7910
7911 ArrayRef Mask = N->getMask();
7912 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
7913
7914 // For each mask element, find out if we're just inserting something
7915 // from V2 into V1 or vice versa.
7916 // Possible permutations inserting an element from V2 into V1:
7917 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
7918 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
7919 // ...
7920 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
7921 // Inserting from V1 into V2 will be similar, except mask range will be
7922 // [16,31].
7923
7924 bool FoundCandidate = false;
7925 // If both vector operands for the shuffle are the same vector, the mask
7926 // will contain only elements from the first one and the second one will be
7927 // undef.
7928 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
7929 // Go through the mask of half-words to find an element that's being moved
7930 // from one vector to the other.
7931 for (unsigned i = 0; i < BytesInVector; ++i) {
7932 unsigned CurrentElement = Mask[i];
7933 // If 2nd operand is undefined, we should only look for element 7 in the
7934 // Mask.
7935 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
7936 continue;
7937
7938 bool OtherElementsInOrder = true;
7939 // Examine the other elements in the Mask to see if they're in original
7940 // order.
7941 for (unsigned j = 0; j < BytesInVector; ++j) {
7942 if (j == i)
7943 continue;
7944 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
7945 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
7946 // in which we always assume we're always picking from the 1st operand.
7947 int MaskOffset =
7948 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
7949 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
7950 OtherElementsInOrder = false;
7951 break;
7952 }
7953 }
7954 // If other elements are in original order, we record the number of shifts
7955 // we need to get the element we want into element 7. Also record which byte
7956 // in the vector we should insert into.
7957 if (OtherElementsInOrder) {
7958 // If 2nd operand is undefined, we assume no shifts and no swapping.
7959 if (V2.isUndef()) {
7960 ShiftElts = 0;
7961 Swap = false;
7962 } else {
7963 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
7964 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
7965 : BigEndianShifts[CurrentElement & 0xF];
7966 Swap = CurrentElement < BytesInVector;
7967 }
7968 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
7969 FoundCandidate = true;
7970 break;
7971 }
7972 }
7973
7974 if (!FoundCandidate)
7975 return SDValue();
7976
7977 // Candidate found, construct the proper SDAG sequence with VINSERTB,
7978 // optionally with VECSHL if shift is required.
7979 if (Swap)
7980 std::swap(V1, V2);
7981 if (V2.isUndef())
7982 V2 = V1;
7983 if (ShiftElts) {
7984 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
7985 DAG.getConstant(ShiftElts, dl, MVT::i32));
7986 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
7987 DAG.getConstant(InsertAtByte, dl, MVT::i32));
7988 }
7989 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
7990 DAG.getConstant(InsertAtByte, dl, MVT::i32));
7991 }
7992
78927993 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
78937994 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
78947995 /// SDValue.
80368137 }
80378138
80388139 if (Subtarget.hasP9Altivec()) {
8039 SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG);
8040 if (NewISDNode)
8140 SDValue NewISDNode;
8141 if (NewISDNode = lowerToVINSERTH(SVOp, DAG))
8142 return NewISDNode;
8143
8144 if (NewISDNode = lowerToVINSERTB(SVOp, DAG))
80418145 return NewISDNode;
80428146 }
80438147
10791079 /// from one vector into the other.
10801080 SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
10811081
1082 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
1083 /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
1084 /// essentially v16i8 vector version of VINSERTH.
1085 SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1086
10821087 }; // end class PPCTargetLowering
10831088
10841089 namespace PPC {
13111311 def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>;
13121312
13131313 // Vector Insert Element Instructions
1314 def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>;
1314 def VINSERTB : VXForm_1<781, (outs vrrc:$vD),
1315 (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
1316 "vinsertb $vD, $vB, $UIM", IIC_VecGeneral,
1317 [(set v16i8:$vD, (PPCvecinsert v16i8:$vDi, v16i8:$vB,
1318 imm32SExt16:$UIM))]>,
1319 RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
13151320 def VINSERTH : VXForm_1<845, (outs vrrc:$vD),
13161321 (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
13171322 "vinserth $vD, $vB, $UIM", IIC_VecGeneral,
297297 ret <8 x i16> %vecins
298298 }
299299
300 ; The following testcases take one byte element from the second vector and
301 ; inserts it at various locations in the first vector
302 define <16 x i8> @shuffle_vector_byte_0_16(<16 x i8> %a, <16 x i8> %b) {
303 entry:
304 ; CHECK-LABEL: shuffle_vector_byte_0_16
305 ; CHECK: vsldoi 3, 3, 3, 8
306 ; CHECK: vinsertb 2, 3, 15
307 ; CHECK-BE-LABEL: shuffle_vector_byte_0_16
308 ; CHECK-BE: vsldoi 3, 3, 3, 9
309 ; CHECK-BE: vinsertb 2, 3, 0
310 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
311 ret <16 x i8> %vecins
312 }
313
314 define <16 x i8> @shuffle_vector_byte_1_25(<16 x i8> %a, <16 x i8> %b) {
315 entry:
316 ; CHECK-LABEL: shuffle_vector_byte_1_25
317 ; CHECK: vsldoi 3, 3, 3, 15
318 ; CHECK: vinsertb 2, 3, 14
319 ; CHECK-BE-LABEL: shuffle_vector_byte_1_25
320 ; CHECK-BE: vsldoi 3, 3, 3, 2
321 ; CHECK-BE: vinsertb 2, 3, 1
322 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
323 ret <16 x i8> %vecins
324 }
325
326 define <16 x i8> @shuffle_vector_byte_2_18(<16 x i8> %a, <16 x i8> %b) {
327 entry:
328 ; CHECK-LABEL: shuffle_vector_byte_2_18
329 ; CHECK: vsldoi 3, 3, 3, 6
330 ; CHECK: vinsertb 2, 3, 13
331 ; CHECK-BE-LABEL: shuffle_vector_byte_2_18
332 ; CHECK-BE: vsldoi 3, 3, 3, 11
333 ; CHECK-BE: vinsertb 2, 3, 2
334 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
335 ret <16 x i8> %vecins
336 }
337
338 define <16 x i8> @shuffle_vector_byte_3_27(<16 x i8> %a, <16 x i8> %b) {
339 entry:
340 ; CHECK-LABEL: shuffle_vector_byte_3_27
341 ; CHECK: vsldoi 3, 3, 3, 13
342 ; CHECK: vinsertb 2, 3, 12
343 ; CHECK-BE-LABEL: shuffle_vector_byte_3_27
344 ; CHECK-BE: vsldoi 3, 3, 3, 4
345 ; CHECK-BE: vinsertb 2, 3, 3
346 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
347 ret <16 x i8> %vecins
348 }
349
350 define <16 x i8> @shuffle_vector_byte_4_20(<16 x i8> %a, <16 x i8> %b) {
351 entry:
352 ; CHECK-LABEL: shuffle_vector_byte_4_20
353 ; CHECK: vsldoi 3, 3, 3, 4
354 ; CHECK: vinsertb 2, 3, 11
355 ; CHECK-BE-LABEL: shuffle_vector_byte_4_20
356 ; CHECK-BE: vsldoi 3, 3, 3, 13
357 ; CHECK-BE: vinsertb 2, 3, 4
358 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
359 ret <16 x i8> %vecins
360 }
361
362 define <16 x i8> @shuffle_vector_byte_5_29(<16 x i8> %a, <16 x i8> %b) {
363 entry:
364 ; CHECK-LABEL: shuffle_vector_byte_5_29
365 ; CHECK: vsldoi 3, 3, 3, 11
366 ; CHECK: vinsertb 2, 3, 10
367 ; CHECK-BE-LABEL: shuffle_vector_byte_5_29
368 ; CHECK-BE: vsldoi 3, 3, 3, 6
369 ; CHECK-BE: vinsertb 2, 3, 5
370 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
371 ret <16 x i8> %vecins
372 }
373
374 define <16 x i8> @shuffle_vector_byte_6_22(<16 x i8> %a, <16 x i8> %b) {
375 entry:
376 ; CHECK-LABEL: shuffle_vector_byte_6_22
377 ; CHECK: vsldoi 3, 3, 3, 2
378 ; CHECK: vinsertb 2, 3, 9
379 ; CHECK-BE-LABEL: shuffle_vector_byte_6_22
380 ; CHECK-BE: vsldoi 3, 3, 3, 15
381 ; CHECK-BE: vinsertb 2, 3, 6
382 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
383 ret <16 x i8> %vecins
384 }
385
386 define <16 x i8> @shuffle_vector_byte_7_31(<16 x i8> %a, <16 x i8> %b) {
387 entry:
388 ; CHECK-LABEL: shuffle_vector_byte_7_31
389 ; CHECK: vsldoi 3, 3, 3, 9
390 ; CHECK: vinsertb 2, 3, 8
391 ; CHECK-BE-LABEL: shuffle_vector_byte_7_31
392 ; CHECK-BE: vsldoi 3, 3, 3, 8
393 ; CHECK-BE: vinsertb 2, 3, 7
394 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
395 ret <16 x i8> %vecins
396 }
397
398 define <16 x i8> @shuffle_vector_byte_8_24(<16 x i8> %a, <16 x i8> %b) {
399 entry:
400 ; CHECK-LABEL: shuffle_vector_byte_8_24
401 ; CHECK: vinsertb 2, 3, 7
402 ; CHECK-BE-LABEL: shuffle_vector_byte_8_24
403 ; CHECK-BE: vsldoi 3, 3, 3, 1
404 ; CHECK-BE: vinsertb 2, 3, 8
405 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
406 ret <16 x i8> %vecins
407 }
408
409 define <16 x i8> @shuffle_vector_byte_9_17(<16 x i8> %a, <16 x i8> %b) {
410 entry:
411 ; CHECK-LABEL: shuffle_vector_byte_9_17
412 ; CHECK: vsldoi 3, 3, 3, 7
413 ; CHECK: vinsertb 2, 3, 6
414 ; CHECK-BE-LABEL: shuffle_vector_byte_9_17
415 ; CHECK-BE: vsldoi 3, 3, 3, 10
416 ; CHECK-BE: vinsertb 2, 3, 9
417 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
418 ret <16 x i8> %vecins
419 }
420
421 define <16 x i8> @shuffle_vector_byte_10_26(<16 x i8> %a, <16 x i8> %b) {
422 entry:
423 ; CHECK-LABEL: shuffle_vector_byte_10_26
424 ; CHECK: vsldoi 3, 3, 3, 14
425 ; CHECK: vinsertb 2, 3, 5
426 ; CHECK-BE-LABEL: shuffle_vector_byte_10_26
427 ; CHECK-BE: vsldoi 3, 3, 3, 3
428 ; CHECK-BE: vinsertb 2, 3, 10
429 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
430 ret <16 x i8> %vecins
431 }
432
433 define <16 x i8> @shuffle_vector_byte_11_19(<16 x i8> %a, <16 x i8> %b) {
434 entry:
435 ; CHECK-LABEL: shuffle_vector_byte_11_19
436 ; CHECK: vsldoi 3, 3, 3, 5
437 ; CHECK: vinsertb 2, 3, 4
438 ; CHECK-BE-LABEL: shuffle_vector_byte_11_19
439 ; CHECK-BE: vsldoi 3, 3, 3, 12
440 ; CHECK-BE: vinsertb 2, 3, 11
441 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
442 ret <16 x i8> %vecins
443 }
444
445 define <16 x i8> @shuffle_vector_byte_12_28(<16 x i8> %a, <16 x i8> %b) {
446 entry:
447 ; CHECK-LABEL: shuffle_vector_byte_12_28
448 ; CHECK: vsldoi 3, 3, 3, 12
449 ; CHECK: vinsertb 2, 3, 3
450 ; CHECK-BE-LABEL: shuffle_vector_byte_12_28
451 ; CHECK-BE: vsldoi 3, 3, 3, 5
452 ; CHECK-BE: vinsertb 2, 3, 12
453 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
454 ret <16 x i8> %vecins
455 }
456
457 define <16 x i8> @shuffle_vector_byte_13_21(<16 x i8> %a, <16 x i8> %b) {
458 entry:
459 ; CHECK-LABEL: shuffle_vector_byte_13_21
460 ; CHECK: vsldoi 3, 3, 3, 3
461 ; CHECK: vinsertb 2, 3, 2
462 ; CHECK-BE-LABEL: shuffle_vector_byte_13_21
463 ; CHECK-BE: vsldoi 3, 3, 3, 14
464 ; CHECK-BE: vinsertb 2, 3, 13
465 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
466 ret <16 x i8> %vecins
467 }
468
469 define <16 x i8> @shuffle_vector_byte_14_30(<16 x i8> %a, <16 x i8> %b) {
470 entry:
471 ; CHECK-LABEL: shuffle_vector_byte_14_30
472 ; CHECK: vsldoi 3, 3, 3, 10
473 ; CHECK: vinsertb 2, 3, 1
474 ; CHECK-BE-LABEL: shuffle_vector_byte_14_30
475 ; CHECK-BE: vsldoi 3, 3, 3, 7
476 ; CHECK-BE: vinsertb 2, 3, 14
477 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
478 ret <16 x i8> %vecins
479 }
480
481 define <16 x i8> @shuffle_vector_byte_15_23(<16 x i8> %a, <16 x i8> %b) {
482 entry:
483 ; CHECK-LABEL: shuffle_vector_byte_15_23
484 ; CHECK: vsldoi 3, 3, 3, 1
485 ; CHECK: vinsertb 2, 3, 0
486 ; CHECK-BE-LABEL: shuffle_vector_byte_15_23
487 ; CHECK-BE: vinsertb 2, 3, 15
488 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
489 ret <16 x i8> %vecins
490 }
491
492 ; The following testcases take one byte element from the first vector and
493 ; inserts it at various locations in the second vector
494 define <16 x i8> @shuffle_vector_byte_16_8(<16 x i8> %a, <16 x i8> %b) {
495 entry:
496 ; CHECK-LABEL: shuffle_vector_byte_16_8
497 ; CHECK: vinsertb 3, 2, 15
498 ; CHECK: vmr 2, 3
499 ; CHECK-BE-LABEL: shuffle_vector_byte_16_8
500 ; CHECK-BE: vsldoi 2, 2, 2, 1
501 ; CHECK-BE: vinsertb 3, 2, 0
502 ; CHECK-BE: vmr 2, 3
503 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
504 ret <16 x i8> %vecins
505 }
506
507 define <16 x i8> @shuffle_vector_byte_17_1(<16 x i8> %a, <16 x i8> %b) {
508 entry:
509 ; CHECK-LABEL: shuffle_vector_byte_17_1
510 ; CHECK: vsldoi 2, 2, 2, 7
511 ; CHECK: vinsertb 3, 2, 14
512 ; CHECK: vmr 2, 3
513 ; CHECK-BE-LABEL: shuffle_vector_byte_17_1
514 ; CHECK-BE: vsldoi 2, 2, 2, 10
515 ; CHECK-BE: vinsertb 3, 2, 1
516 ; CHECK-BE: vmr 2, 3
517 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
518 ret <16 x i8> %vecins
519 }
520
521 define <16 x i8> @shuffle_vector_byte_18_10(<16 x i8> %a, <16 x i8> %b) {
522 entry:
523 ; CHECK-LABEL: shuffle_vector_byte_18_10
524 ; CHECK: vsldoi 2, 2, 2, 14
525 ; CHECK: vinsertb 3, 2, 13
526 ; CHECK: vmr 2, 3
527 ; CHECK-BE-LABEL: shuffle_vector_byte_18_10
528 ; CHECK-BE: vsldoi 2, 2, 2, 3
529 ; CHECK-BE: vinsertb 3, 2, 2
530 ; CHECK-BE: vmr 2, 3
531 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
532 ret <16 x i8> %vecins
533 }
534
535 define <16 x i8> @shuffle_vector_byte_19_3(<16 x i8> %a, <16 x i8> %b) {
536 entry:
537 ; CHECK-LABEL: shuffle_vector_byte_19_3
538 ; CHECK: vsldoi 2, 2, 2, 5
539 ; CHECK: vinsertb 3, 2, 12
540 ; CHECK: vmr 2, 3
541 ; CHECK-BE-LABEL: shuffle_vector_byte_19_3
542 ; CHECK-BE: vsldoi 2, 2, 2, 12
543 ; CHECK-BE: vinsertb 3, 2, 3
544 ; CHECK-BE: vmr 2, 3
545 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
546 ret <16 x i8> %vecins
547 }
548
549 define <16 x i8> @shuffle_vector_byte_20_12(<16 x i8> %a, <16 x i8> %b) {
550 entry:
551 ; CHECK-LABEL: shuffle_vector_byte_20_12
552 ; CHECK: vsldoi 2, 2, 2, 12
553 ; CHECK: vinsertb 3, 2, 11
554 ; CHECK: vmr 2, 3
555 ; CHECK-BE-LABEL: shuffle_vector_byte_20_12
556 ; CHECK-BE: vsldoi 2, 2, 2, 5
557 ; CHECK-BE: vinsertb 3, 2, 4
558 ; CHECK-BE: vmr 2, 3
559 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
560 ret <16 x i8> %vecins
561 }
562
563 define <16 x i8> @shuffle_vector_byte_21_5(<16 x i8> %a, <16 x i8> %b) {
564 entry:
565 ; CHECK-LABEL: shuffle_vector_byte_21_5
566 ; CHECK: vsldoi 2, 2, 2, 3
567 ; CHECK: vinsertb 3, 2, 10
568 ; CHECK: vmr 2, 3
569 ; CHECK-BE-LABEL: shuffle_vector_byte_21_5
570 ; CHECK-BE: vsldoi 2, 2, 2, 14
571 ; CHECK-BE: vinsertb 3, 2, 5
572 ; CHECK-BE: vmr 2, 3
573 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
574 ret <16 x i8> %vecins
575 }
576
577 define <16 x i8> @shuffle_vector_byte_22_14(<16 x i8> %a, <16 x i8> %b) {
578 entry:
579 ; CHECK-LABEL: shuffle_vector_byte_22_14
580 ; CHECK: vsldoi 2, 2, 2, 10
581 ; CHECK: vinsertb 3, 2, 9
582 ; CHECK: vmr 2, 3
583 ; CHECK-BE-LABEL: shuffle_vector_byte_22_14
584 ; CHECK-BE: vsldoi 2, 2, 2, 7
585 ; CHECK-BE: vinsertb 3, 2, 6
586 ; CHECK-BE: vmr 2, 3
587 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
588 ret <16 x i8> %vecins
589 }
590
591 define <16 x i8> @shuffle_vector_byte_23_7(<16 x i8> %a, <16 x i8> %b) {
592 entry:
593 ; CHECK-LABEL: shuffle_vector_byte_23_7
594 ; CHECK: vsldoi 2, 2, 2, 1
595 ; CHECK: vinsertb 3, 2, 8
596 ; CHECK: vmr 2, 3
597 ; CHECK-BE-LABEL: shuffle_vector_byte_23_7
598 ; CHECK-BE: vinsertb 3, 2, 7
599 ; CHECK-BE: vmr 2, 3
600 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
601 ret <16 x i8> %vecins
602 }
603
604 define <16 x i8> @shuffle_vector_byte_24_0(<16 x i8> %a, <16 x i8> %b) {
605 entry:
606 ; CHECK-LABEL: shuffle_vector_byte_24_0
607 ; CHECK: vsldoi 2, 2, 2, 8
608 ; CHECK: vinsertb 3, 2, 7
609 ; CHECK: vmr 2, 3
610 ; CHECK-BE-LABEL: shuffle_vector_byte_24_0
611 ; CHECK-BE: vsldoi 2, 2, 2, 9
612 ; CHECK-BE: vinsertb 3, 2, 8
613 ; CHECK-BE: vmr 2, 3
614 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
615 ret <16 x i8> %vecins
616 }
617
618 define <16 x i8> @shuffle_vector_byte_25_9(<16 x i8> %a, <16 x i8> %b) {
619 entry:
620 ; CHECK-LABEL: shuffle_vector_byte_25_9
621 ; CHECK: vsldoi 2, 2, 2, 15
622 ; CHECK: vinsertb 3, 2, 6
623 ; CHECK: vmr 2, 3
624 ; CHECK-BE-LABEL: shuffle_vector_byte_25_9
625 ; CHECK-BE: vsldoi 2, 2, 2, 2
626 ; CHECK-BE: vinsertb 3, 2, 9
627 ; CHECK-BE: vmr 2, 3
628 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
629 ret <16 x i8> %vecins
630 }
631
632 define <16 x i8> @shuffle_vector_byte_26_2(<16 x i8> %a, <16 x i8> %b) {
633 entry:
634 ; CHECK-LABEL: shuffle_vector_byte_26_2
635 ; CHECK: vsldoi 2, 2, 2, 6
636 ; CHECK: vinsertb 3, 2, 5
637 ; CHECK: vmr 2, 3
638 ; CHECK-BE-LABEL: shuffle_vector_byte_26_2
639 ; CHECK-BE: vsldoi 2, 2, 2, 11
640 ; CHECK-BE: vinsertb 3, 2, 10
641 ; CHECK-BE: vmr 2, 3
642 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
643 ret <16 x i8> %vecins
644 }
645
646 define <16 x i8> @shuffle_vector_byte_27_11(<16 x i8> %a, <16 x i8> %b) {
647 entry:
648 ; CHECK-LABEL: shuffle_vector_byte_27_11
649 ; CHECK: vsldoi 2, 2, 2, 13
650 ; CHECK: vinsertb 3, 2, 4
651 ; CHECK: vmr 2, 3
652 ; CHECK-BE-LABEL: shuffle_vector_byte_27_11
653 ; CHECK-BE: vsldoi 2, 2, 2, 4
654 ; CHECK-BE: vinsertb 3, 2, 11
655 ; CHECK-BE: vmr 2, 3
656 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
657 ret <16 x i8> %vecins
658 }
659
660 define <16 x i8> @shuffle_vector_byte_28_4(<16 x i8> %a, <16 x i8> %b) {
661 entry:
662 ; CHECK-LABEL: shuffle_vector_byte_28_4
663 ; CHECK: vsldoi 2, 2, 2, 4
664 ; CHECK: vinsertb 3, 2, 3
665 ; CHECK: vmr 2, 3
666 ; CHECK-BE-LABEL: shuffle_vector_byte_28_4
667 ; CHECK-BE: vsldoi 2, 2, 2, 13
668 ; CHECK-BE: vinsertb 3, 2, 12
669 ; CHECK-BE: vmr 2, 3
670 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
671 ret <16 x i8> %vecins
672 }
673
674 define <16 x i8> @shuffle_vector_byte_29_13(<16 x i8> %a, <16 x i8> %b) {
675 entry:
676 ; CHECK-LABEL: shuffle_vector_byte_29_13
677 ; CHECK: vsldoi 2, 2, 2, 11
678 ; CHECK: vinsertb 3, 2, 2
679 ; CHECK: vmr 2, 3
680 ; CHECK-BE-LABEL: shuffle_vector_byte_29_13
681 ; CHECK-BE: vsldoi 2, 2, 2, 6
682 ; CHECK-BE: vinsertb 3, 2, 13
683 ; CHECK-BE: vmr 2, 3
684 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
685 ret <16 x i8> %vecins
686 }
687
688 define <16 x i8> @shuffle_vector_byte_30_6(<16 x i8> %a, <16 x i8> %b) {
689 entry:
690 ; CHECK-LABEL: shuffle_vector_byte_30_6
691 ; CHECK: vsldoi 2, 2, 2, 2
692 ; CHECK: vinsertb 3, 2, 1
693 ; CHECK: vmr 2, 3
694 ; CHECK-BE-LABEL: shuffle_vector_byte_30_6
695 ; CHECK-BE: vsldoi 2, 2, 2, 15
696 ; CHECK-BE: vinsertb 3, 2, 14
697 ; CHECK-BE: vmr 2, 3
698 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
699 ret <16 x i8> %vecins
700 }
701
702 define <16 x i8> @shuffle_vector_byte_31_15(<16 x i8> %a, <16 x i8> %b) {
703 entry:
704 ; CHECK-LABEL: shuffle_vector_byte_31_15
705 ; CHECK: vsldoi 2, 2, 2, 9
706 ; CHECK: vinsertb 3, 2, 0
707 ; CHECK: vmr 2, 3
708 ; CHECK-BE-LABEL: shuffle_vector_byte_31_15
709 ; CHECK-BE: vsldoi 2, 2, 2, 8
710 ; CHECK-BE: vinsertb 3, 2, 15
711 ; CHECK-BE: vmr 2, 3
712 %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32>
713 ret <16 x i8> %vecins
714 }
715
716 ; The following testcases use the same vector in both arguments of the
717 ; shufflevector. If byte element 7 in BE mode(or 8 in LE mode) is the one
718 ; we're attempting to insert, then we can use the vector insert instruction
719 define <16 x i8> @shuffle_vector_byte_0_7(<16 x i8> %a) {
720 entry:
721 ; CHECK-LABEL: shuffle_vector_byte_0_7
722 ; CHECK-NOT: vinsertb
723 ; CHECK-BE-LABEL: shuffle_vector_byte_0_7
724 ; CHECK-BE: vinsertb 2, 2, 0
725 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
726 ret <16 x i8> %vecins
727 }
728
729 define <16 x i8> @shuffle_vector_byte_1_8(<16 x i8> %a) {
730 entry:
731 ; CHECK-LABEL: shuffle_vector_byte_1_8
732 ; CHECK: vinsertb 2, 2, 14
733 ; CHECK-BE-LABEL: shuffle_vector_byte_1_8
734 ; CHECK-BE-NOT: vinsertb
735 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
736 ret <16 x i8> %vecins
737 }
738
739 define <16 x i8> @shuffle_vector_byte_2_8(<16 x i8> %a) {
740 entry:
741 ; CHECK-LABEL: shuffle_vector_byte_2_8
742 ; CHECK: vinsertb 2, 2, 13
743 ; CHECK-BE-LABEL: shuffle_vector_byte_2_8
744 ; CHECK-BE-NOT: vinsertb
745 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
746 ret <16 x i8> %vecins
747 }
748
749 define <16 x i8> @shuffle_vector_byte_3_7(<16 x i8> %a) {
750 entry:
751 ; CHECK-LABEL: shuffle_vector_byte_3_7
752 ; CHECK-NOT: vinsertb
753 ; CHECK-BE-LABEL: shuffle_vector_byte_3_7
754 ; CHECK-BE: vinsertb 2, 2, 3
755 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
756 ret <16 x i8> %vecins
757 }
758
759 define <16 x i8> @shuffle_vector_byte_4_7(<16 x i8> %a) {
760 entry:
761 ; CHECK-LABEL: shuffle_vector_byte_4_7
762 ; CHECK-NOT: vinsertb
763 ; CHECK-BE-LABEL: shuffle_vector_byte_4_7
764 ; CHECK-BE: vinsertb 2, 2, 4
765 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
766 ret <16 x i8> %vecins
767 }
768
769 define <16 x i8> @shuffle_vector_byte_5_8(<16 x i8> %a) {
770 entry:
771 ; CHECK-LABEL: shuffle_vector_byte_5_8
772 ; CHECK: vinsertb 2, 2, 10
773 ; CHECK-BE-LABEL: shuffle_vector_byte_5_8
774 ; CHECK-BE-NOT: vinsertb
775 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
776 ret <16 x i8> %vecins
777 }
778
779 define <16 x i8> @shuffle_vector_byte_6_8(<16 x i8> %a) {
780 entry:
781 ; CHECK-LABEL: shuffle_vector_byte_6_8
782 ; CHECK: vinsertb 2, 2, 9
783 ; CHECK-BE-LABEL: shuffle_vector_byte_6_8
784 ; CHECK-BE-NOT: vinsertb
785 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
786 ret <16 x i8> %vecins
787 }
788
789 define <16 x i8> @shuffle_vector_byte_7_8(<16 x i8> %a) {
790 entry:
791 ; CHECK-LABEL: shuffle_vector_byte_7_8
792 ; CHECK: vinsertb 2, 2, 8
793 ; CHECK-BE-LABEL: shuffle_vector_byte_7_8
794 ; CHECK-BE-NOT: vinsertb
795 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
796 ret <16 x i8> %vecins
797 }
798
799 define <16 x i8> @shuffle_vector_byte_8_7(<16 x i8> %a) {
800 entry:
801 ; CHECK-LABEL: shuffle_vector_byte_8_7
802 ; CHECK-NOT: vinsertb
803 ; CHECK-BE-LABEL: shuffle_vector_byte_8_7
804 ; CHECK-BE: vinsertb 2, 2, 8
805 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
806 ret <16 x i8> %vecins
807 }
808
809 define <16 x i8> @shuffle_vector_byte_9_7(<16 x i8> %a) {
810 entry:
811 ; CHECK-LABEL: shuffle_vector_byte_9_7
812 ; CHECK-NOT: vinsertb
813 ; CHECK-BE-LABEL: shuffle_vector_byte_9_7
814 ; CHECK-BE: vinsertb 2, 2, 9
815 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
816 ret <16 x i8> %vecins
817 }
818
819 define <16 x i8> @shuffle_vector_byte_10_7(<16 x i8> %a) {
820 entry:
821 ; CHECK-LABEL: shuffle_vector_byte_10_7
822 ; CHECK-NOT: vinsertb
823 ; CHECK-BE-LABEL: shuffle_vector_byte_10_7
824 ; CHECK-BE: vinsertb 2, 2, 10
825 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
826 ret <16 x i8> %vecins
827 }
828
829 define <16 x i8> @shuffle_vector_byte_11_8(<16 x i8> %a) {
830 entry:
831 ; CHECK-LABEL: shuffle_vector_byte_11_8
832 ; CHECK: vinsertb 2, 2, 4
833 ; CHECK-BE-LABEL: shuffle_vector_byte_11_8
834 ; CHECK-BE-NOT: vinsertb
835 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
836 ret <16 x i8> %vecins
837 }
838
839 define <16 x i8> @shuffle_vector_byte_12_8(<16 x i8> %a) {
840 entry:
841 ; CHECK-LABEL: shuffle_vector_byte_12_8
842 ; CHECK: vinsertb 2, 2, 3
843 ; CHECK-BE-LABEL: shuffle_vector_byte_12_8
844 ; CHECK-BE-NOT: vinsertb
845 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
846 ret <16 x i8> %vecins
847 }
848
849 define <16 x i8> @shuffle_vector_byte_13_7(<16 x i8> %a) {
850 entry:
851 ; CHECK-LABEL: shuffle_vector_byte_13_7
852 ; CHECK-NOT: vinsertb
853 ; CHECK-BE-LABEL: shuffle_vector_byte_13_7
854 ; CHECK-BE: vinsertb 2, 2, 13
855 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
856 ret <16 x i8> %vecins
857 }
858
859 define <16 x i8> @shuffle_vector_byte_14_7(<16 x i8> %a) {
860 entry:
861 ; CHECK-LABEL: shuffle_vector_byte_14_7
862 ; CHECK-NOT: vinsertb
863 ; CHECK-BE-LABEL: shuffle_vector_byte_14_7
864 ; CHECK-BE: vinsertb 2, 2, 14
865 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
866 ret <16 x i8> %vecins
867 }
868
869 define <16 x i8> @shuffle_vector_byte_15_8(<16 x i8> %a) {
870 entry:
871 ; CHECK-LABEL: shuffle_vector_byte_15_8
872 ; CHECK: vinsertb 2, 2, 0
873 ; CHECK-BE-LABEL: shuffle_vector_byte_15_8
874 ; CHECK-BE-NOT: vinsertb
875 %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32>
876 ret <16 x i8> %vecins
877 }