llvm.org GIT mirror llvm / c4db4e5
[AVX] Add decode support for VUNPCKLPS/D instructions, both 128-bit and 256-bit forms. Because the number of elements in a vector does not determine the vector type (4 elements could be v4f32 or v4f64), pass the full type of the vector to decode routines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126664 91177308-0d34-0410-b5e6-96231b3b80d8 David Greene 9 years ago
4 changed file(s) with 122 addition(s) and 32 deletion(s). Raw diff Collapse all Expand all
110110 // FALL THROUGH.
111111 case X86::PUNPCKLBWrm:
112112 Src1Name = getRegName(MI->getOperand(0).getReg());
113 DecodePUNPCKLMask(16, ShuffleMask);
113 DecodePUNPCKLBWMask(16, ShuffleMask);
114114 break;
115115 case X86::PUNPCKLWDrr:
116116 Src2Name = getRegName(MI->getOperand(2).getReg());
117117 // FALL THROUGH.
118118 case X86::PUNPCKLWDrm:
119119 Src1Name = getRegName(MI->getOperand(0).getReg());
120 DecodePUNPCKLMask(8, ShuffleMask);
120 DecodePUNPCKLWDMask(8, ShuffleMask);
121121 break;
122122 case X86::PUNPCKLDQrr:
123123 Src2Name = getRegName(MI->getOperand(2).getReg());
124124 // FALL THROUGH.
125125 case X86::PUNPCKLDQrm:
126126 Src1Name = getRegName(MI->getOperand(0).getReg());
127 DecodePUNPCKLMask(4, ShuffleMask);
127 DecodePUNPCKLDQMask(4, ShuffleMask);
128128 break;
129129 case X86::PUNPCKLQDQrr:
130130 Src2Name = getRegName(MI->getOperand(2).getReg());
131131 // FALL THROUGH.
132132 case X86::PUNPCKLQDQrm:
133133 Src1Name = getRegName(MI->getOperand(0).getReg());
134 DecodePUNPCKLMask(2, ShuffleMask);
134 DecodePUNPCKLQDQMask(2, ShuffleMask);
135135 break;
136136
137137 case X86::SHUFPDrri:
152152 Src2Name = getRegName(MI->getOperand(2).getReg());
153153 // FALL THROUGH.
154154 case X86::UNPCKLPDrm:
155 DecodeUNPCKLPMask(2, ShuffleMask);
156 Src1Name = getRegName(MI->getOperand(0).getReg());
155 DecodeUNPCKLPDMask(2, ShuffleMask);
156 Src1Name = getRegName(MI->getOperand(0).getReg());
157 break;
158 case X86::VUNPCKLPDrr:
159 Src2Name = getRegName(MI->getOperand(2).getReg());
160 // FALL THROUGH.
161 case X86::VUNPCKLPDrm:
162 DecodeUNPCKLPDMask(2, ShuffleMask);
163 Src1Name = getRegName(MI->getOperand(1).getReg());
164 break;
165 case X86::VUNPCKLPDYrr:
166 Src2Name = getRegName(MI->getOperand(2).getReg());
167 // FALL THROUGH.
168 case X86::VUNPCKLPDYrm:
169 DecodeUNPCKLPDMask(4, ShuffleMask);
170 Src1Name = getRegName(MI->getOperand(1).getReg());
157171 break;
158172 case X86::UNPCKLPSrr:
159173 Src2Name = getRegName(MI->getOperand(2).getReg());
160174 // FALL THROUGH.
161175 case X86::UNPCKLPSrm:
162 DecodeUNPCKLPMask(4, ShuffleMask);
163 Src1Name = getRegName(MI->getOperand(0).getReg());
176 DecodeUNPCKLPSMask(4, ShuffleMask);
177 Src1Name = getRegName(MI->getOperand(0).getReg());
178 break;
179 case X86::VUNPCKLPSrr:
180 Src2Name = getRegName(MI->getOperand(2).getReg());
181 // FALL THROUGH.
182 case X86::VUNPCKLPSrm:
183 DecodeUNPCKLPSMask(4, ShuffleMask);
184 Src1Name = getRegName(MI->getOperand(1).getReg());
185 break;
186 case X86::VUNPCKLPSYrr:
187 Src2Name = getRegName(MI->getOperand(2).getReg());
188 // FALL THROUGH.
189 case X86::VUNPCKLPSYrm:
190 DecodeUNPCKLPSMask(8, ShuffleMask);
191 Src1Name = getRegName(MI->getOperand(1).getReg());
164192 break;
165193 case X86::UNPCKHPDrr:
166194 Src2Name = getRegName(MI->getOperand(2).getReg());
None //===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
0 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
11 //
22 // The LLVM Compiler Infrastructure
33 //
9494 ShuffleMask.push_back(7);
9595 }
9696
97 void DecodePUNPCKLMask(unsigned NElts,
97 void DecodePUNPCKLBWMask(unsigned NElts,
98 SmallVectorImpl &ShuffleMask) {
99 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
100 }
101
102 void DecodePUNPCKLWDMask(unsigned NElts,
103 SmallVectorImpl &ShuffleMask) {
104 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
105 }
106
107 void DecodePUNPCKLDQMask(unsigned NElts,
108 SmallVectorImpl &ShuffleMask) {
109 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
110 }
111
112 void DecodePUNPCKLQDQMask(unsigned NElts,
113 SmallVectorImpl &ShuffleMask) {
114 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
115 }
116
117 void DecodePUNPCKLMask(EVT VT,
98118 SmallVectorImpl &ShuffleMask) {
99 for (unsigned i = 0; i != NElts/2; ++i) {
100 ShuffleMask.push_back(i);
101 ShuffleMask.push_back(i+NElts);
102 }
119 DecodeUNPCKLPMask(VT, ShuffleMask);
103120 }
104121
105122 void DecodePUNPCKHMask(unsigned NElts,
132149 }
133150 }
134151
152 void DecodeUNPCKLPSMask(unsigned NElts,
153 SmallVectorImpl &ShuffleMask) {
154 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
155 }
156
157 void DecodeUNPCKLPDMask(unsigned NElts,
158 SmallVectorImpl &ShuffleMask) {
159 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
160 }
135161
136162 /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
137 /// etc. NElts indicates the number of elements in the vector allowing it to
138 /// handle different datatypes and vector widths.
139 void DecodeUNPCKLPMask(unsigned NElts,
163 /// etc. VT indicates the type of the vector allowing it to handle different
164 /// datatypes and vector widths.
165 void DecodeUNPCKLPMask(EVT VT,
140166 SmallVectorImpl &ShuffleMask) {
167
168 int NElts = VT.getVectorNumElements();
169
141170 for (unsigned i = 0; i != NElts/2; ++i) {
142171 ShuffleMask.push_back(i); // Reads from dest
143172 ShuffleMask.push_back(i+NElts); // Reads from src
1515 #define X86_SHUFFLE_DECODE_H
1616
1717 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/CodeGen/ValueTypes.h"
1819
1920 //===----------------------------------------------------------------------===//
2021 // Vector Mask Decoding
4445 void DecodePSHUFLWMask(unsigned Imm,
4546 SmallVectorImpl &ShuffleMask);
4647
47 void DecodePUNPCKLMask(unsigned NElts,
48 void DecodePUNPCKLBWMask(unsigned NElts,
49 SmallVectorImpl &ShuffleMask);
50
51 void DecodePUNPCKLWDMask(unsigned NElts,
52 SmallVectorImpl &ShuffleMask);
53
54 void DecodePUNPCKLDQMask(unsigned NElts,
55 SmallVectorImpl &ShuffleMask);
56
57 void DecodePUNPCKLQDQMask(unsigned NElts,
58 SmallVectorImpl &ShuffleMask);
59
60 void DecodePUNPCKLMask(EVT VT,
4861 SmallVectorImpl &ShuffleMask);
4962
5063 void DecodePUNPCKHMask(unsigned NElts,
5669 void DecodeUNPCKHPMask(unsigned NElts,
5770 SmallVectorImpl &ShuffleMask);
5871
72 void DecodeUNPCKLPSMask(unsigned NElts,
73 SmallVectorImpl &ShuffleMask);
74
75 void DecodeUNPCKLPDMask(unsigned NElts,
76 SmallVectorImpl &ShuffleMask);
5977
6078 /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
61 /// etc. NElts indicates the number of elements in the vector allowing it to
62 /// handle different datatypes and vector widths.
63 void DecodeUNPCKLPMask(unsigned NElts,
79 /// etc. VT indicates the type of the vector allowing it to handle different
80 /// datatypes and vector widths.
81 void DecodeUNPCKLPMask(EVT VT,
6482 SmallVectorImpl &ShuffleMask);
6583
6684 } // llvm namespace
38943894 case X86ISD::PUNPCKLWD:
38953895 case X86ISD::PUNPCKLDQ:
38963896 case X86ISD::PUNPCKLQDQ:
3897 DecodePUNPCKLMask(NumElems, ShuffleMask);
3897 DecodePUNPCKLMask(VT, ShuffleMask);
38983898 break;
38993899 case X86ISD::UNPCKLPS:
39003900 case X86ISD::UNPCKLPD:
3901 DecodeUNPCKLPMask(NumElems, ShuffleMask);
3901 case X86ISD::VUNPCKLPS:
3902 case X86ISD::VUNPCKLPD:
3903 case X86ISD::VUNPCKLPSY:
3904 case X86ISD::VUNPCKLPDY:
3905 DecodeUNPCKLPMask(VT, ShuffleMask);
39023906 break;
39033907 case X86ISD::MOVHLPS:
39043908 DecodeMOVHLPSMask(NumElems, ShuffleMask);
52625266
52635267 // Break it into (shuffle shuffle_hi, shuffle_lo).
52645268 Locs.clear();
5269 Locs.resize(4);
52655270 SmallVector LoMask(4U, -1);
52665271 SmallVector HiMask(4U, -1);
52675272
55075512 X86::getShuffleSHUFImmediate(SVOp), DAG);
55085513 }
55095514
5510 static inline unsigned getUNPCKLOpcode(EVT VT) {
5515 static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
55115516 switch(VT.getSimpleVT().SimpleTy) {
55125517 case MVT::v4i32: return X86ISD::PUNPCKLDQ;
55135518 case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
5514 case MVT::v4f32: return X86ISD::UNPCKLPS;
5515 case MVT::v2f64: return X86ISD::UNPCKLPD;
5519 case MVT::v4f32:
5520 return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
5521 case MVT::v2f64:
5522 return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
5523 case MVT::v8f32: return X86ISD::VUNPCKLPSY;
5524 case MVT::v4f64: return X86ISD::VUNPCKLPDY;
55165525 case MVT::v16i8: return X86ISD::PUNPCKLBW;
55175526 case MVT::v8i16: return X86ISD::PUNPCKLWD;
55185527 default:
56405649 // unpckh_undef). Only use pshufd if speed is more important than size.
56415650 if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
56425651 if (VT != MVT::v2i64 && VT != MVT::v2f64)
5643 return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
5652 return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
56445653 if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
56455654 if (VT != MVT::v2i64 && VT != MVT::v2f64)
56465655 return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
57615770 }
57625771
57635772 if (X86::isUNPCKLMask(SVOp))
5764 return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
5773 return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
5774 dl, VT, V1, V2, DAG);
57655775
57665776 if (X86::isUNPCKHMask(SVOp))
57675777 return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
57885798 ShuffleVectorSDNode *NewSVOp = cast(NewOp);
57895799
57905800 if (X86::isUNPCKLMask(NewSVOp))
5791 return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
5801 return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
5802 dl, VT, V2, V1, DAG);
57925803
57935804 if (X86::isUNPCKHMask(NewSVOp))
57945805 return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
58115822
58125823 if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
58135824 SVOp->getSplatIndex() == 0 && V2IsUndef) {
5814 if (VT == MVT::v2f64)
5815 return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
5825 if (VT == MVT::v2f64) {
5826 X86ISD::NodeType Opcode =
5827 getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
5828 return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
5829 }
58165830 if (VT == MVT::v2i64)
58175831 return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
58185832 }
58395853
58405854 if (X86::isUNPCKL_v_undef_Mask(SVOp))
58415855 if (VT != MVT::v2i64 && VT != MVT::v2f64)
5842 return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
5856 return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
5857 dl, VT, V1, V1, DAG);
58435858 if (X86::isUNPCKH_v_undef_Mask(SVOp))
58445859 if (VT != MVT::v2i64 && VT != MVT::v2f64)
58455860 return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);