llvm.org GIT mirror llvm / b6dac61
[X86][XOP] Added VPERMIL2PD/VPERMIL2PS shuffle mask comment decoding git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271809 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
5 changed file(s) with 198 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
14211421 }
14221422 break;
14231423 }
1424
1425 case X86::VPERMIL2PDrm:
1426 case X86::VPERMIL2PSrm:
1427 case X86::VPERMIL2PDrmY:
1428 case X86::VPERMIL2PSrmY: {
1429 if (!OutStreamer->isVerboseAsm())
1430 break;
1431 assert(MI->getNumOperands() > 7 &&
1432 "We should always have at least 7 operands!");
1433 const MachineOperand &DstOp = MI->getOperand(0);
1434 const MachineOperand &SrcOp1 = MI->getOperand(1);
1435 const MachineOperand &SrcOp2 = MI->getOperand(2);
1436 const MachineOperand &MaskOp = MI->getOperand(6);
1437 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
1438
1439 if (!CtrlOp.isImm())
1440 break;
1441
1442 unsigned ElSize;
1443 switch (MI->getOpcode()) {
1444 default: llvm_unreachable("Invalid opcode");
1445 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSrmY: ElSize = 32; break;
1446 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break;
1447 }
1448
1449 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1450 SmallVector Mask;
1451 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
1452 if (!Mask.empty())
1453 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
1454 }
1455 break;
1456 }
1457
14241458 case X86::VPPERMrrm: {
14251459 if (!OutStreamer->isVerboseAsm())
14261460 break;
146146 Index += (Element >> 1) & 0x1;
147147 else
148148 Index += Element & 0x3;
149 ShuffleMask.push_back(Index);
150 }
151
152 // TODO: Handle funny-looking vectors too.
153 }
154
155 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
156 SmallVectorImpl &ShuffleMask) {
157 Type *MaskTy = C->getType();
158
159 unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
160 if (MaskTySize != 128 && MaskTySize != 256)
161 return;
162
163 // Only support vector types.
164 if (!MaskTy->isVectorTy())
165 return;
166
167 // Make sure its an integer type.
168 Type *VecEltTy = MaskTy->getVectorElementType();
169 if (!VecEltTy->isIntegerTy())
170 return;
171
172 // Support any element type from byte up to element size.
173 // This is necessary primarily because 64-bit elements get split to 32-bit
174 // in the constant pool on 32-bit target.
175 unsigned EltTySize = VecEltTy->getIntegerBitWidth();
176 if (EltTySize < 8 || EltTySize > ElSize)
177 return;
178
179 unsigned NumElements = MaskTySize / ElSize;
180 assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
181 "Unexpected number of vector elements.");
182 ShuffleMask.reserve(NumElements);
183 unsigned NumElementsPerLane = 128 / ElSize;
184 unsigned Factor = ElSize / EltTySize;
185
186 for (unsigned i = 0; i < NumElements; ++i) {
187 Constant *COp = C->getAggregateElement(i * Factor);
188 if (!COp) {
189 ShuffleMask.clear();
190 return;
191 } else if (isa(COp)) {
192 ShuffleMask.push_back(SM_SentinelUndef);
193 continue;
194 }
195
196 // VPERMIL2 Operation.
197 // Bits[3] - Match Bit.
198 // Bits[2:1] - (Per Lane) PD Shuffle Mask.
199 // Bits[2:0] - (Per Lane) PS Shuffle Mask.
200 uint64_t Selector = cast(COp)->getZExtValue();
201 int MatchBit = (Selector >> 3) & 0x1;
202
203 // M2Z[0:1] MatchBit
204 // 0Xb X Source selected by Selector index.
205 // 10b 0 Source selected by Selector index.
206 // 10b 1 Zero.
207 // 11b 0 Zero.
208 // 11b 1 Source selected by Selector index.
209 if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
210 ShuffleMask.push_back(SM_SentinelZero);
211 continue;
212 }
213
214 int Index = Selector & 0x3;
215 Index >>= (ElSize == 64 ? 1 : 0);
216 Index += (i / NumElementsPerLane) * NumElementsPerLane;
217
218 int Src = (Selector >> 2) & 0x1;
219 Index += Src * NumElements;
149220 ShuffleMask.push_back(Index);
150221 }
151222
3131 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
3232 SmallVectorImpl &ShuffleMask);
3333
34 /// Decode a VPERMILP2 variable mask from an IR-level vector constant.
35 void DecodeVPERMIL2PMask(const Constant *C, unsigned MatchImm, unsigned ElSize,
36 SmallVectorImpl &ShuffleMask);
37
3438 /// Decode a VPPERM variable mask from an IR-level vector constant.
3539 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl &ShuffleMask);
3640
6161 define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x float> %a1) {
6262 ; CHECK-LABEL: combine_vpermil2ps_blend_with_zero:
6363 ; CHECK: # BB#0:
64 ; CHECK-NEXT: vpermil2ps $2, {{.*}}(%rip), %xmm1, %xmm0, %xmm0
64 ; CHECK-NEXT: vpermil2ps {{.*#+}} xmm0 = zero,xmm0[1,2,3]
6565 ; CHECK-NEXT: retq
6666 %res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> , i8 2)
6767 ret <4 x float> %res0
9494 ; VPERMIL2
9595 ;
9696
97 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
98 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
97 define <2 x double> @vpermil2pd_21(<2 x double> %a0, <2 x double> %a1) {
98 ; X32-LABEL: vpermil2pd_21:
99 ; X32: # BB#0:
100 ; X32-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
101 ; X32-NEXT: retl
102 ;
103 ; X64-LABEL: vpermil2pd_21:
104 ; X64: # BB#0:
105 ; X64-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
106 ; X64-NEXT: retq
107 %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> , i8 0)
108 ret <2 x double> %1
109 }
99110
100 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
101 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
111 define <4 x double> @vpermil2pd256_0062(<4 x double> %a0, <4 x double> %a1) {
112 ; X32-LABEL: vpermil2pd256_0062:
113 ; X32: # BB#0:
114 ; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
115 ; X32-NEXT: retl
116 ;
117 ; X64-LABEL: vpermil2pd256_0062:
118 ; X64: # BB#0:
119 ; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
120 ; X64-NEXT: retq
121 %1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> , i8 0)
122 ret <4 x double> %1
123 }
124
125 define <4 x double> @vpermil2pd256_zz73(<4 x double> %a0, <4 x double> %a1) {
126 ; X32-LABEL: vpermil2pd256_zz73:
127 ; X32: # BB#0:
128 ; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
129 ; X32-NEXT: retl
130 ;
131 ; X64-LABEL: vpermil2pd256_zz73:
132 ; X64: # BB#0:
133 ; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
134 ; X64-NEXT: retq
135 %1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> , i8 3)
136 ret <4 x double> %1
137 }
138
139 define <4 x float> @vpermil2ps_0561(<4 x float> %a0, <4 x float> %a1) {
140 ; X32-LABEL: vpermil2ps_0561:
141 ; X32: # BB#0:
142 ; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
143 ; X32-NEXT: retl
144 ;
145 ; X64-LABEL: vpermil2ps_0561:
146 ; X64: # BB#0:
147 ; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
148 ; X64-NEXT: retq
149 %1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> , i8 0)
150 ret <4 x float> %1
151 }
152
153 define <8 x float> @vpermil2ps256_098144FE(<8 x float> %a0, <8 x float> %a1) {
154 ; X32-LABEL: vpermil2ps256_098144FE:
155 ; X32: # BB#0:
156 ; X32-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
157 ; X32-NEXT: retl
158 ;
159 ; X64-LABEL: vpermil2ps256_098144FE:
160 ; X64: # BB#0:
161 ; X64-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
162 ; X64-NEXT: retq
163 %1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> , i8 0)
164 ret <8 x float> %1
165 }
166
167 define <8 x float> @vpermil2ps256_0zz8BzzA(<8 x float> %a0, <8 x float> %a1) {
168 ; X32-LABEL: vpermil2ps256_0zz8BzzA:
169 ; X32: # BB#0:
170 ; X32-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
171 ; X32-NEXT: retl
172 ;
173 ; X64-LABEL: vpermil2ps256_0zz8BzzA:
174 ; X64: # BB#0:
175 ; X64-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
176 ; X64-NEXT: retq
177 %1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> , i8 2)
178 ret <8 x float> %1
179 }
180
181 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
182 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
183
184 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
185 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
102186
103187 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone