llvm.org GIT mirror llvm / 0af7a7d
[AArch64 NEON] Try to generate CONCAT_VECTOR when lowering BUILD_VECTOR or SHUFFLE_VECTOR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199791 91177308-0d34-0410-b5e6-96231b3b80d8 Kevin Qin 6 years ago
3 changed file(s) with 360 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
41534153 return false;
41544154 }
41554155
4156 // Check whether a Build Vector could be presented as Shuffle Vector. If yes,
4157 // try to call LowerVECTOR_SHUFFLE to lower it.
4156 // Check whether a shuffle_vecotor could be presented as conact_vector.
4157 bool AArch64TargetLowering::isConactVector(SDValue Op,SelectionDAG &DAG,
4158 SDValue V0, SDValue V1,
4159 const int* Mask,
4160 SDValue &Res) const {
4161 SDLoc DL(Op);
4162 EVT VT = Op.getValueType();
4163 unsigned NumElts = VT.getVectorNumElements();
4164 unsigned V0NumElts = V0.getValueType().getVectorNumElements();
4165 bool isContactVector = true;
4166 bool splitV0 = false;
4167 int offset = 0;
4168 for (int I = 0, E = NumElts; I != E; I++){
4169 if (Mask[I] != I + offset) {
4170 if(I && !splitV0 && Mask[I] == I + (int)V0NumElts / 2) {
4171 splitV0 = true;
4172 offset = V0NumElts / 2;
4173 } else {
4174 isContactVector = false;
4175 break;
4176 }
4177 }
4178 }
4179 if (isContactVector) {
4180 EVT CastVT = EVT::getVectorVT(*DAG.getContext(),
4181 VT.getVectorElementType(), NumElts / 2);
4182 if(CastVT.getSizeInBits() < 64)
4183 return false;
4184
4185 if (splitV0) {
4186 assert(V0NumElts >= NumElts / 2 &&
4187 "invalid operand for extract_subvector!");
4188 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
4189 DAG.getConstant(0, MVT::i64));
4190 }
4191 if (NumElts != V1.getValueType().getVectorNumElements() * 2) {
4192 assert(V1.getValueType().getVectorNumElements() >= NumElts / 2 &&
4193 "invalid operand for extract_subvector!");
4194 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
4195 DAG.getConstant(0, MVT::i64));
4196 }
4197 Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
4198 return true;
4199 }
4200 return false;
4201 }
4202
4203 // Check whether a Build Vector could be presented as Shuffle Vector.
4204 // This Shuffle Vector maybe not legalized, so the length of its operand and
4205 // the length of result may not equal.
41584206 bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
4159 SDValue &Res) const {
4207 SDValue &V0, SDValue &V1,
4208 int *Mask) const {
41604209 SDLoc DL(Op);
41614210 EVT VT = Op.getValueType();
41624211 unsigned NumElts = VT.getVectorNumElements();
41634212 unsigned V0NumElts = 0;
4164 int Mask[16];
4165 SDValue V0, V1;
41664213
41674214 // Check if all elements are extracted from less than 3 vectors.
41684215 for (unsigned i = 0; i < NumElts; ++i) {
41694216 SDValue Elt = Op.getOperand(i);
4170 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
4217 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
4218 Elt.getOperand(0).getValueType().getVectorElementType() !=
4219 VT.getVectorElementType())
41714220 return false;
41724221
41734222 if (V0.getNode() == 0) {
41884237 return false;
41894238 }
41904239 }
4191
4192 if (!V1.getNode() && V0NumElts == NumElts * 2) {
4193 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
4194 DAG.getConstant(NumElts, MVT::i64));
4195 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
4196 DAG.getConstant(0, MVT::i64));
4197 V0NumElts = V0.getValueType().getVectorNumElements();
4198 }
4199
4200 if (V1.getNode() && NumElts == V0NumElts &&
4201 V0NumElts == V1.getValueType().getVectorNumElements()) {
4202 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
4203 if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
4204 Res = Shuffle;
4205 else
4206 Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
4207 return true;
4208 } else
4209 return false;
4240 return true;
42104241 }
42114242
42124243 // If this is a case we can't handle, return null and let the default
44124443 return SDValue();
44134444
44144445 // Try to lower this in lowering ShuffleVector way.
4415 SDValue Shuf;
4416 if (isKnownShuffleVector(Op, DAG, Shuf))
4417 return Shuf;
4446 SDValue V0, V1;
4447 int Mask[16];
4448 if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) {
4449 unsigned V0NumElts = V0.getValueType().getVectorNumElements();
4450 if (!V1.getNode() && V0NumElts == NumElts * 2) {
4451 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
4452 DAG.getConstant(NumElts, MVT::i64));
4453 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
4454 DAG.getConstant(0, MVT::i64));
4455 V0NumElts = V0.getValueType().getVectorNumElements();
4456 }
4457
4458 if (V1.getNode() && NumElts == V0NumElts &&
4459 V0NumElts == V1.getValueType().getVectorNumElements()) {
4460 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
4461 if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
4462 return Shuffle;
4463 else
4464 return LowerVECTOR_SHUFFLE(Shuffle, DAG);
4465 } else {
4466 SDValue Res;
4467 if(isConactVector(Op, DAG, V0, V1, Mask, Res))
4468 return Res;
4469 }
4470 }
44184471
44194472 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
44204473 // know the default expansion would otherwise fall back on something even
45994652 else
46004653 return DAG.getNode(ISDNo, dl, VT, V1, V2);
46014654 }
4655
4656 SDValue Res;
4657 if (isConactVector(Op, DAG, V1, V2, &ShuffleMask[0], Res))
4658 return Res;
46024659
46034660 // If the element of shuffle mask are all the same constant, we can
46044661 // transform it into either NEON_VDUP or NEON_VDUPLANE
231231 SDLoc dl, SelectionDAG &DAG,
232232 SmallVectorImpl &InVals) const;
233233
234 bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;
234 bool isConactVector(SDValue Op,SelectionDAG &DAG, SDValue V0, SDValue V1,
235 const int* Mask, SDValue &Res) const;
236
237 bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0,
238 SDValue &V1, int *Mask) const;
235239
236240 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
237241 const AArch64Subtarget *ST) const;
978978 %0 = extractelement <1 x float> %a, i32 0
979979 %vecinit1.i = insertelement <4 x float> undef, float %0, i32 0
980980 ret <4 x float> %vecinit1.i
981 }
982
983 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
984 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
985 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
986 entry:
987 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32>
988 ret <16 x i8> %vecinit30
989 }
990
991 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
992 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
993 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
994 entry:
995 %vecext = extractelement <8 x i8> %x, i32 0
996 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
997 %vecext1 = extractelement <8 x i8> %x, i32 1
998 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
999 %vecext3 = extractelement <8 x i8> %x, i32 2
1000 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1001 %vecext5 = extractelement <8 x i8> %x, i32 3
1002 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1003 %vecext7 = extractelement <8 x i8> %x, i32 4
1004 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1005 %vecext9 = extractelement <8 x i8> %x, i32 5
1006 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1007 %vecext11 = extractelement <8 x i8> %x, i32 6
1008 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1009 %vecext13 = extractelement <8 x i8> %x, i32 7
1010 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1011 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32>
1012 ret <16 x i8> %vecinit30
1013 }
1014
1015 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1016 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1017 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1018 entry:
1019 %vecext = extractelement <16 x i8> %x, i32 0
1020 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1021 %vecext1 = extractelement <16 x i8> %x, i32 1
1022 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1023 %vecext3 = extractelement <16 x i8> %x, i32 2
1024 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1025 %vecext5 = extractelement <16 x i8> %x, i32 3
1026 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1027 %vecext7 = extractelement <16 x i8> %x, i32 4
1028 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1029 %vecext9 = extractelement <16 x i8> %x, i32 5
1030 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1031 %vecext11 = extractelement <16 x i8> %x, i32 6
1032 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1033 %vecext13 = extractelement <16 x i8> %x, i32 7
1034 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1035 %vecext15 = extractelement <8 x i8> %y, i32 0
1036 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1037 %vecext17 = extractelement <8 x i8> %y, i32 1
1038 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1039 %vecext19 = extractelement <8 x i8> %y, i32 2
1040 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1041 %vecext21 = extractelement <8 x i8> %y, i32 3
1042 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1043 %vecext23 = extractelement <8 x i8> %y, i32 4
1044 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1045 %vecext25 = extractelement <8 x i8> %y, i32 5
1046 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1047 %vecext27 = extractelement <8 x i8> %y, i32 6
1048 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1049 %vecext29 = extractelement <8 x i8> %y, i32 7
1050 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1051 ret <16 x i8> %vecinit30
1052 }
1053
1054 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1055 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1056 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1057 entry:
1058 %vecext = extractelement <8 x i8> %x, i32 0
1059 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1060 %vecext1 = extractelement <8 x i8> %x, i32 1
1061 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1062 %vecext3 = extractelement <8 x i8> %x, i32 2
1063 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1064 %vecext5 = extractelement <8 x i8> %x, i32 3
1065 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1066 %vecext7 = extractelement <8 x i8> %x, i32 4
1067 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1068 %vecext9 = extractelement <8 x i8> %x, i32 5
1069 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1070 %vecext11 = extractelement <8 x i8> %x, i32 6
1071 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1072 %vecext13 = extractelement <8 x i8> %x, i32 7
1073 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1074 %vecext15 = extractelement <8 x i8> %y, i32 0
1075 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1076 %vecext17 = extractelement <8 x i8> %y, i32 1
1077 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1078 %vecext19 = extractelement <8 x i8> %y, i32 2
1079 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1080 %vecext21 = extractelement <8 x i8> %y, i32 3
1081 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1082 %vecext23 = extractelement <8 x i8> %y, i32 4
1083 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1084 %vecext25 = extractelement <8 x i8> %y, i32 5
1085 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1086 %vecext27 = extractelement <8 x i8> %y, i32 6
1087 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1088 %vecext29 = extractelement <8 x i8> %y, i32 7
1089 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1090 ret <16 x i8> %vecinit30
1091 }
1092
1093 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1094 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1095 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1096 entry:
1097 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32>
1098 ret <8 x i16> %vecinit14
1099 }
1100
1101 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1102 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1103 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1104 entry:
1105 %vecext = extractelement <4 x i16> %x, i32 0
1106 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1107 %vecext1 = extractelement <4 x i16> %x, i32 1
1108 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1109 %vecext3 = extractelement <4 x i16> %x, i32 2
1110 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1111 %vecext5 = extractelement <4 x i16> %x, i32 3
1112 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1113 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32>
1114 ret <8 x i16> %vecinit14
1115 }
1116
1117 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1118 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1119 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1120 entry:
1121 %vecext = extractelement <8 x i16> %x, i32 0
1122 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1123 %vecext1 = extractelement <8 x i16> %x, i32 1
1124 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1125 %vecext3 = extractelement <8 x i16> %x, i32 2
1126 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1127 %vecext5 = extractelement <8 x i16> %x, i32 3
1128 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1129 %vecext7 = extractelement <4 x i16> %y, i32 0
1130 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1131 %vecext9 = extractelement <4 x i16> %y, i32 1
1132 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1133 %vecext11 = extractelement <4 x i16> %y, i32 2
1134 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1135 %vecext13 = extractelement <4 x i16> %y, i32 3
1136 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1137 ret <8 x i16> %vecinit14
1138 }
1139
1140 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1141 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1142 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1143 entry:
1144 %vecext = extractelement <4 x i16> %x, i32 0
1145 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1146 %vecext1 = extractelement <4 x i16> %x, i32 1
1147 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1148 %vecext3 = extractelement <4 x i16> %x, i32 2
1149 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1150 %vecext5 = extractelement <4 x i16> %x, i32 3
1151 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1152 %vecext7 = extractelement <4 x i16> %y, i32 0
1153 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1154 %vecext9 = extractelement <4 x i16> %y, i32 1
1155 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1156 %vecext11 = extractelement <4 x i16> %y, i32 2
1157 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1158 %vecext13 = extractelement <4 x i16> %y, i32 3
1159 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1160 ret <8 x i16> %vecinit14
1161 }
1162
1163 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1164 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1165 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1166 entry:
1167 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32>
1168 ret <4 x i32> %vecinit6
1169 }
1170
1171 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1172 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1173 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1174 entry:
1175 %vecext = extractelement <2 x i32> %x, i32 0
1176 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1177 %vecext1 = extractelement <2 x i32> %x, i32 1
1178 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1179 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32>
1180 ret <4 x i32> %vecinit6
1181 }
1182
1183 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1184 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1185 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1186 entry:
1187 %vecext = extractelement <4 x i32> %x, i32 0
1188 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1189 %vecext1 = extractelement <4 x i32> %x, i32 1
1190 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1191 %vecext3 = extractelement <2 x i32> %y, i32 0
1192 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1193 %vecext5 = extractelement <2 x i32> %y, i32 1
1194 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1195 ret <4 x i32> %vecinit6
1196 }
1197
1198 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1199 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1200 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1201 entry:
1202 %vecext = extractelement <2 x i32> %x, i32 0
1203 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1204 %vecext1 = extractelement <2 x i32> %x, i32 1
1205 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1206 %vecext3 = extractelement <2 x i32> %y, i32 0
1207 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1208 %vecext5 = extractelement <2 x i32> %y, i32 1
1209 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1210 ret <4 x i32> %vecinit6
1211 }
1212
1213 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1214 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1215 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1216 entry:
1217 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32>
1218 ret <2 x i64> %vecinit2
1219 }
1220
1221 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1222 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1223 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1224 entry:
1225 %vecext = extractelement <1 x i64> %x, i32 0
1226 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1227 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32>
1228 ret <2 x i64> %vecinit2
1229 }
1230
1231 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1232 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1233 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1234 entry:
1235 %vecext = extractelement <2 x i64> %x, i32 0
1236 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1237 %vecext1 = extractelement <1 x i64> %y, i32 0
1238 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1239 ret <2 x i64> %vecinit2
1240 }
1241
1242 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1243 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1244 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1245 entry:
1246 %vecext = extractelement <1 x i64> %x, i32 0
1247 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1248 %vecext1 = extractelement <1 x i64> %y, i32 0
1249 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1250 ret <2 x i64> %vecinit2
9811251 }