llvm.org GIT mirror llvm / 47af16e
[X86] SET0 to use XMM registers where possible PR26018 PR32862 Differential Revision: https://reviews.llvm.org/D35965 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309926 91177308-0d34-0410-b5e6-96231b3b80d8 Dinar Temirbulatov 2 years ago
54 changed file(s) with 922 addition(s) and 1090 deletion(s). Raw diff Collapse all Expand all
77227722 return Expand2AddrUndef(MIB,
77237723 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
77247724 // Extended register without VLX. Use a larger XOR.
7725 SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
7725 SrcReg =
7726 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
77267727 MIB->getOperand(0).setReg(SrcReg);
77277728 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
77287729 }
77307731 bool HasVLX = Subtarget.hasVLX();
77317732 unsigned SrcReg = MIB->getOperand(0).getReg();
77327733 const TargetRegisterInfo *TRI = &getRegisterInfo();
7733 if (HasVLX)
7734 return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
7734 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
7735 unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
7736 MIB->getOperand(0).setReg(XReg);
7737 return Expand2AddrUndef(MIB,
7738 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
7739 }
7740 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
7741 }
7742 case X86::AVX512_512_SET0: {
7743 const TargetRegisterInfo *TRI = &getRegisterInfo();
7744 unsigned SrcReg = MIB->getOperand(0).getReg();
77357745 if (TRI->getEncodingValue(SrcReg) < 16) {
77367746 unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
77377747 MIB->getOperand(0).setReg(XReg);
77387748 return Expand2AddrUndef(MIB, get(X86::VXORPSrr));
77397749 }
7740 // Extended register without VLX. Use a larger XOR.
7741 SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
7742 MIB->getOperand(0).setReg(SrcReg);
77437750 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
77447751 }
7745 case X86::AVX512_512_SET0:
7746 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
77477752 case X86::V_SETALLONES:
77487753 return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
77497754 case X86::AVX2_SETALLONES:
983983 ; AVX512VL-LABEL: movnt_pd:
984984 ; AVX512VL: # BB#0:
985985 ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
986 ; AVX512VL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x57,0xc9]
986 ; AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
987987 ; AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
988988 ; AVX512VL-NEXT: vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]
989989 ; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11341134 }
11351135
11361136 define void @isel_crash_32b(i8* %cV_R.addr) {
1137 ; X32-AVX2-LABEL: isel_crash_32b:
1138 ; X32-AVX2: ## BB#0: ## %eintry
1139 ; X32-AVX2-NEXT: pushl %ebp
1140 ; X32-AVX2-NEXT: Lcfi1:
1141 ; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
1142 ; X32-AVX2-NEXT: Lcfi2:
1143 ; X32-AVX2-NEXT: .cfi_offset %ebp, -8
1144 ; X32-AVX2-NEXT: movl %esp, %ebp
1145 ; X32-AVX2-NEXT: Lcfi3:
1146 ; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp
1147 ; X32-AVX2-NEXT: andl $-32, %esp
1148 ; X32-AVX2-NEXT: subl $128, %esp
1149 ; X32-AVX2-NEXT: movl 8(%ebp), %eax
1150 ; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1151 ; X32-AVX2-NEXT: vmovaps %ymm0, (%esp)
1152 ; X32-AVX2-NEXT: vpbroadcastb (%eax), %ymm1
1153 ; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1154 ; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1155 ; X32-AVX2-NEXT: movl %ebp, %esp
1156 ; X32-AVX2-NEXT: popl %ebp
1157 ; X32-AVX2-NEXT: vzeroupper
1158 ; X32-AVX2-NEXT: retl
1159 ;
1160 ; X64-AVX2-LABEL: isel_crash_32b:
1161 ; X64-AVX2: ## BB#0: ## %eintry
1162 ; X64-AVX2-NEXT: pushq %rbp
1163 ; X64-AVX2-NEXT: Lcfi0:
1164 ; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
1165 ; X64-AVX2-NEXT: Lcfi1:
1166 ; X64-AVX2-NEXT: .cfi_offset %rbp, -16
1167 ; X64-AVX2-NEXT: movq %rsp, %rbp
1168 ; X64-AVX2-NEXT: Lcfi2:
1169 ; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
1170 ; X64-AVX2-NEXT: andq $-32, %rsp
1171 ; X64-AVX2-NEXT: subq $128, %rsp
1172 ; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1173 ; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
1174 ; X64-AVX2-NEXT: movb (%rdi), %al
1175 ; X64-AVX2-NEXT: vmovd %eax, %xmm1
1176 ; X64-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
1177 ; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1178 ; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1179 ; X64-AVX2-NEXT: movq %rbp, %rsp
1180 ; X64-AVX2-NEXT: popq %rbp
1181 ; X64-AVX2-NEXT: vzeroupper
1182 ; X64-AVX2-NEXT: retq
1183 ;
1184 ; X32-AVX512VL-LABEL: isel_crash_32b:
1185 ; X32-AVX512VL: ## BB#0: ## %eintry
1186 ; X32-AVX512VL-NEXT: pushl %ebp
1187 ; X32-AVX512VL-NEXT: Lcfi1:
1188 ; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8
1189 ; X32-AVX512VL-NEXT: Lcfi2:
1190 ; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8
1191 ; X32-AVX512VL-NEXT: movl %esp, %ebp
1192 ; X32-AVX512VL-NEXT: Lcfi3:
1193 ; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp
1194 ; X32-AVX512VL-NEXT: andl $-32, %esp
1195 ; X32-AVX512VL-NEXT: subl $128, %esp
1196 ; X32-AVX512VL-NEXT: movl 8(%ebp), %eax
1197 ; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
1198 ; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp)
1199 ; X32-AVX512VL-NEXT: vpbroadcastb (%eax), %ymm1
1200 ; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1201 ; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1202 ; X32-AVX512VL-NEXT: movl %ebp, %esp
1203 ; X32-AVX512VL-NEXT: popl %ebp
1204 ; X32-AVX512VL-NEXT: vzeroupper
1205 ; X32-AVX512VL-NEXT: retl
1206 ;
1207 ; X64-AVX512VL-LABEL: isel_crash_32b:
1208 ; X64-AVX512VL: ## BB#0: ## %eintry
1209 ; X64-AVX512VL-NEXT: pushq %rbp
1210 ; X64-AVX512VL-NEXT: Lcfi0:
1211 ; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
1212 ; X64-AVX512VL-NEXT: Lcfi1:
1213 ; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
1214 ; X64-AVX512VL-NEXT: movq %rsp, %rbp
1215 ; X64-AVX512VL-NEXT: Lcfi2:
1216 ; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
1217 ; X64-AVX512VL-NEXT: andq $-32, %rsp
1218 ; X64-AVX512VL-NEXT: subq $128, %rsp
1219 ; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
1220 ; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
1221 ; X64-AVX512VL-NEXT: movb (%rdi), %al
1222 ; X64-AVX512VL-NEXT: vmovd %eax, %xmm1
1223 ; X64-AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1
1224 ; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1225 ; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1226 ; X64-AVX512VL-NEXT: movq %rbp, %rsp
1227 ; X64-AVX512VL-NEXT: popq %rbp
1228 ; X64-AVX512VL-NEXT: vzeroupper
1229 ; X64-AVX512VL-NEXT: retq
1137 ; X32-LABEL: isel_crash_32b:
1138 ; X32: ## BB#0: ## %eintry
1139 ; X32-NEXT: pushl %ebp
1140 ; X32-NEXT: Lcfi1:
1141 ; X32-NEXT: .cfi_def_cfa_offset 8
1142 ; X32-NEXT: Lcfi2:
1143 ; X32-NEXT: .cfi_offset %ebp, -8
1144 ; X32-NEXT: movl %esp, %ebp
1145 ; X32-NEXT: Lcfi3:
1146 ; X32-NEXT: .cfi_def_cfa_register %ebp
1147 ; X32-NEXT: andl $-32, %esp
1148 ; X32-NEXT: subl $128, %esp
1149 ; X32-NEXT: movl 8(%ebp), %eax
1150 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1151 ; X32-NEXT: vmovaps %ymm0, (%esp)
1152 ; X32-NEXT: vpbroadcastb (%eax), %ymm1
1153 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1154 ; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1155 ; X32-NEXT: movl %ebp, %esp
1156 ; X32-NEXT: popl %ebp
1157 ; X32-NEXT: vzeroupper
1158 ; X32-NEXT: retl
1159 ;
1160 ; X64-LABEL: isel_crash_32b:
1161 ; X64: ## BB#0: ## %eintry
1162 ; X64-NEXT: pushq %rbp
1163 ; X64-NEXT: Lcfi0:
1164 ; X64-NEXT: .cfi_def_cfa_offset 16
1165 ; X64-NEXT: Lcfi1:
1166 ; X64-NEXT: .cfi_offset %rbp, -16
1167 ; X64-NEXT: movq %rsp, %rbp
1168 ; X64-NEXT: Lcfi2:
1169 ; X64-NEXT: .cfi_def_cfa_register %rbp
1170 ; X64-NEXT: andq $-32, %rsp
1171 ; X64-NEXT: subq $128, %rsp
1172 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
1173 ; X64-NEXT: vmovaps %ymm0, (%rsp)
1174 ; X64-NEXT: movb (%rdi), %al
1175 ; X64-NEXT: vmovd %eax, %xmm1
1176 ; X64-NEXT: vpbroadcastb %xmm1, %ymm1
1177 ; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1178 ; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1179 ; X64-NEXT: movq %rbp, %rsp
1180 ; X64-NEXT: popq %rbp
1181 ; X64-NEXT: vzeroupper
1182 ; X64-NEXT: retq
12301183 eintry:
12311184 %__a.addr.i = alloca <4 x i64>, align 16
12321185 %__b.addr.i = alloca <4 x i64>, align 16
12831236 }
12841237
12851238 define void @isel_crash_16w(i16* %cV_R.addr) {
1286 ; X32-AVX2-LABEL: isel_crash_16w:
1287 ; X32-AVX2: ## BB#0: ## %eintry
1288 ; X32-AVX2-NEXT: pushl %ebp
1289 ; X32-AVX2-NEXT: Lcfi5:
1290 ; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
1291 ; X32-AVX2-NEXT: Lcfi6:
1292 ; X32-AVX2-NEXT: .cfi_offset %ebp, -8
1293 ; X32-AVX2-NEXT: movl %esp, %ebp
1294 ; X32-AVX2-NEXT: Lcfi7:
1295 ; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp
1296 ; X32-AVX2-NEXT: andl $-32, %esp
1297 ; X32-AVX2-NEXT: subl $128, %esp
1298 ; X32-AVX2-NEXT: movl 8(%ebp), %eax
1299 ; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1300 ; X32-AVX2-NEXT: vmovaps %ymm0, (%esp)
1301 ; X32-AVX2-NEXT: vpbroadcastw (%eax), %ymm1
1302 ; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1303 ; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1304 ; X32-AVX2-NEXT: movl %ebp, %esp
1305 ; X32-AVX2-NEXT: popl %ebp
1306 ; X32-AVX2-NEXT: vzeroupper
1307 ; X32-AVX2-NEXT: retl
1308 ;
1309 ; X64-AVX2-LABEL: isel_crash_16w:
1310 ; X64-AVX2: ## BB#0: ## %eintry
1311 ; X64-AVX2-NEXT: pushq %rbp
1312 ; X64-AVX2-NEXT: Lcfi3:
1313 ; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
1314 ; X64-AVX2-NEXT: Lcfi4:
1315 ; X64-AVX2-NEXT: .cfi_offset %rbp, -16
1316 ; X64-AVX2-NEXT: movq %rsp, %rbp
1317 ; X64-AVX2-NEXT: Lcfi5:
1318 ; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
1319 ; X64-AVX2-NEXT: andq $-32, %rsp
1320 ; X64-AVX2-NEXT: subq $128, %rsp
1321 ; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1322 ; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
1323 ; X64-AVX2-NEXT: movw (%rdi), %ax
1324 ; X64-AVX2-NEXT: vmovd %eax, %xmm1
1325 ; X64-AVX2-NEXT: vpbroadcastw %xmm1, %ymm1
1326 ; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1327 ; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1328 ; X64-AVX2-NEXT: movq %rbp, %rsp
1329 ; X64-AVX2-NEXT: popq %rbp
1330 ; X64-AVX2-NEXT: vzeroupper
1331 ; X64-AVX2-NEXT: retq
1332 ;
1333 ; X32-AVX512VL-LABEL: isel_crash_16w:
1334 ; X32-AVX512VL: ## BB#0: ## %eintry
1335 ; X32-AVX512VL-NEXT: pushl %ebp
1336 ; X32-AVX512VL-NEXT: Lcfi5:
1337 ; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8
1338 ; X32-AVX512VL-NEXT: Lcfi6:
1339 ; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8
1340 ; X32-AVX512VL-NEXT: movl %esp, %ebp
1341 ; X32-AVX512VL-NEXT: Lcfi7:
1342 ; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp
1343 ; X32-AVX512VL-NEXT: andl $-32, %esp
1344 ; X32-AVX512VL-NEXT: subl $128, %esp
1345 ; X32-AVX512VL-NEXT: movl 8(%ebp), %eax
1346 ; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
1347 ; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp)
1348 ; X32-AVX512VL-NEXT: vpbroadcastw (%eax), %ymm1
1349 ; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1350 ; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1351 ; X32-AVX512VL-NEXT: movl %ebp, %esp
1352 ; X32-AVX512VL-NEXT: popl %ebp
1353 ; X32-AVX512VL-NEXT: vzeroupper
1354 ; X32-AVX512VL-NEXT: retl
1355 ;
1356 ; X64-AVX512VL-LABEL: isel_crash_16w:
1357 ; X64-AVX512VL: ## BB#0: ## %eintry
1358 ; X64-AVX512VL-NEXT: pushq %rbp
1359 ; X64-AVX512VL-NEXT: Lcfi3:
1360 ; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
1361 ; X64-AVX512VL-NEXT: Lcfi4:
1362 ; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
1363 ; X64-AVX512VL-NEXT: movq %rsp, %rbp
1364 ; X64-AVX512VL-NEXT: Lcfi5:
1365 ; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
1366 ; X64-AVX512VL-NEXT: andq $-32, %rsp
1367 ; X64-AVX512VL-NEXT: subq $128, %rsp
1368 ; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
1369 ; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
1370 ; X64-AVX512VL-NEXT: movw (%rdi), %ax
1371 ; X64-AVX512VL-NEXT: vmovd %eax, %xmm1
1372 ; X64-AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm1
1373 ; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1374 ; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1375 ; X64-AVX512VL-NEXT: movq %rbp, %rsp
1376 ; X64-AVX512VL-NEXT: popq %rbp
1377 ; X64-AVX512VL-NEXT: vzeroupper
1378 ; X64-AVX512VL-NEXT: retq
1239 ; X32-LABEL: isel_crash_16w:
1240 ; X32: ## BB#0: ## %eintry
1241 ; X32-NEXT: pushl %ebp
1242 ; X32-NEXT: Lcfi5:
1243 ; X32-NEXT: .cfi_def_cfa_offset 8
1244 ; X32-NEXT: Lcfi6:
1245 ; X32-NEXT: .cfi_offset %ebp, -8
1246 ; X32-NEXT: movl %esp, %ebp
1247 ; X32-NEXT: Lcfi7:
1248 ; X32-NEXT: .cfi_def_cfa_register %ebp
1249 ; X32-NEXT: andl $-32, %esp
1250 ; X32-NEXT: subl $128, %esp
1251 ; X32-NEXT: movl 8(%ebp), %eax
1252 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1253 ; X32-NEXT: vmovaps %ymm0, (%esp)
1254 ; X32-NEXT: vpbroadcastw (%eax), %ymm1
1255 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1256 ; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1257 ; X32-NEXT: movl %ebp, %esp
1258 ; X32-NEXT: popl %ebp
1259 ; X32-NEXT: vzeroupper
1260 ; X32-NEXT: retl
1261 ;
1262 ; X64-LABEL: isel_crash_16w:
1263 ; X64: ## BB#0: ## %eintry
1264 ; X64-NEXT: pushq %rbp
1265 ; X64-NEXT: Lcfi3:
1266 ; X64-NEXT: .cfi_def_cfa_offset 16
1267 ; X64-NEXT: Lcfi4:
1268 ; X64-NEXT: .cfi_offset %rbp, -16
1269 ; X64-NEXT: movq %rsp, %rbp
1270 ; X64-NEXT: Lcfi5:
1271 ; X64-NEXT: .cfi_def_cfa_register %rbp
1272 ; X64-NEXT: andq $-32, %rsp
1273 ; X64-NEXT: subq $128, %rsp
1274 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
1275 ; X64-NEXT: vmovaps %ymm0, (%rsp)
1276 ; X64-NEXT: movw (%rdi), %ax
1277 ; X64-NEXT: vmovd %eax, %xmm1
1278 ; X64-NEXT: vpbroadcastw %xmm1, %ymm1
1279 ; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
1280 ; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1281 ; X64-NEXT: movq %rbp, %rsp
1282 ; X64-NEXT: popq %rbp
1283 ; X64-NEXT: vzeroupper
1284 ; X64-NEXT: retq
13791285 eintry:
13801286 %__a.addr.i = alloca <4 x i64>, align 16
13811287 %__b.addr.i = alloca <4 x i64>, align 16
14421348 }
14431349
14441350 define void @isel_crash_8d(i32* %cV_R.addr) {
1445 ; X32-AVX2-LABEL: isel_crash_8d:
1446 ; X32-AVX2: ## BB#0: ## %eintry
1447 ; X32-AVX2-NEXT: pushl %ebp
1448 ; X32-AVX2-NEXT: Lcfi9:
1449 ; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
1450 ; X32-AVX2-NEXT: Lcfi10:
1451 ; X32-AVX2-NEXT: .cfi_offset %ebp, -8
1452 ; X32-AVX2-NEXT: movl %esp, %ebp
1453 ; X32-AVX2-NEXT: Lcfi11:
1454 ; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp
1455 ; X32-AVX2-NEXT: andl $-32, %esp
1456 ; X32-AVX2-NEXT: subl $128, %esp
1457 ; X32-AVX2-NEXT: movl 8(%ebp), %eax
1458 ; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1459 ; X32-AVX2-NEXT: vmovaps %ymm0, (%esp)
1460 ; X32-AVX2-NEXT: vbroadcastss (%eax), %ymm1
1461 ; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1462 ; X32-AVX2-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
1463 ; X32-AVX2-NEXT: movl %ebp, %esp
1464 ; X32-AVX2-NEXT: popl %ebp
1465 ; X32-AVX2-NEXT: vzeroupper
1466 ; X32-AVX2-NEXT: retl
1351 ; X32-LABEL: isel_crash_8d:
1352 ; X32: ## BB#0: ## %eintry
1353 ; X32-NEXT: pushl %ebp
1354 ; X32-NEXT: Lcfi9:
1355 ; X32-NEXT: .cfi_def_cfa_offset 8
1356 ; X32-NEXT: Lcfi10:
1357 ; X32-NEXT: .cfi_offset %ebp, -8
1358 ; X32-NEXT: movl %esp, %ebp
1359 ; X32-NEXT: Lcfi11:
1360 ; X32-NEXT: .cfi_def_cfa_register %ebp
1361 ; X32-NEXT: andl $-32, %esp
1362 ; X32-NEXT: subl $128, %esp
1363 ; X32-NEXT: movl 8(%ebp), %eax
1364 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1365 ; X32-NEXT: vmovaps %ymm0, (%esp)
1366 ; X32-NEXT: vbroadcastss (%eax), %ymm1
1367 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1368 ; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
1369 ; X32-NEXT: movl %ebp, %esp
1370 ; X32-NEXT: popl %ebp
1371 ; X32-NEXT: vzeroupper
1372 ; X32-NEXT: retl
14671373 ;
14681374 ; X64-AVX2-LABEL: isel_crash_8d:
14691375 ; X64-AVX2: ## BB#0: ## %eintry
14891395 ; X64-AVX2-NEXT: vzeroupper
14901396 ; X64-AVX2-NEXT: retq
14911397 ;
1492 ; X32-AVX512VL-LABEL: isel_crash_8d:
1493 ; X32-AVX512VL: ## BB#0: ## %eintry
1494 ; X32-AVX512VL-NEXT: pushl %ebp
1495 ; X32-AVX512VL-NEXT: Lcfi9:
1496 ; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8
1497 ; X32-AVX512VL-NEXT: Lcfi10:
1498 ; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8
1499 ; X32-AVX512VL-NEXT: movl %esp, %ebp
1500 ; X32-AVX512VL-NEXT: Lcfi11:
1501 ; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp
1502 ; X32-AVX512VL-NEXT: andl $-32, %esp
1503 ; X32-AVX512VL-NEXT: subl $128, %esp
1504 ; X32-AVX512VL-NEXT: movl 8(%ebp), %eax
1505 ; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
1506 ; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp)
1507 ; X32-AVX512VL-NEXT: vbroadcastss (%eax), %ymm1
1508 ; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1509 ; X32-AVX512VL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
1510 ; X32-AVX512VL-NEXT: movl %ebp, %esp
1511 ; X32-AVX512VL-NEXT: popl %ebp
1512 ; X32-AVX512VL-NEXT: vzeroupper
1513 ; X32-AVX512VL-NEXT: retl
1514 ;
15151398 ; X64-AVX512VL-LABEL: isel_crash_8d:
15161399 ; X64-AVX512VL: ## BB#0: ## %eintry
15171400 ; X64-AVX512VL-NEXT: pushq %rbp
15241407 ; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
15251408 ; X64-AVX512VL-NEXT: andq $-32, %rsp
15261409 ; X64-AVX512VL-NEXT: subq $128, %rsp
1527 ; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
1410 ; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
15281411 ; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
15291412 ; X64-AVX512VL-NEXT: movl (%rdi), %eax
15301413 ; X64-AVX512VL-NEXT: vpbroadcastd %eax, %ymm1
16041487 }
16051488
16061489 define void @isel_crash_4q(i64* %cV_R.addr) {
1607 ; X32-AVX2-LABEL: isel_crash_4q:
1608 ; X32-AVX2: ## BB#0: ## %eintry
1609 ; X32-AVX2-NEXT: pushl %ebp
1610 ; X32-AVX2-NEXT: Lcfi13:
1611 ; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
1612 ; X32-AVX2-NEXT: Lcfi14:
1613 ; X32-AVX2-NEXT: .cfi_offset %ebp, -8
1614 ; X32-AVX2-NEXT: movl %esp, %ebp
1615 ; X32-AVX2-NEXT: Lcfi15:
1616 ; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp
1617 ; X32-AVX2-NEXT: andl $-32, %esp
1618 ; X32-AVX2-NEXT: subl $128, %esp
1619 ; X32-AVX2-NEXT: movl 8(%ebp), %eax
1620 ; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1621 ; X32-AVX2-NEXT: vmovaps %ymm0, (%esp)
1622 ; X32-AVX2-NEXT: movl (%eax), %ecx
1623 ; X32-AVX2-NEXT: movl 4(%eax), %eax
1624 ; X32-AVX2-NEXT: vmovd %ecx, %xmm1
1625 ; X32-AVX2-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
1626 ; X32-AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
1627 ; X32-AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
1628 ; X32-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
1629 ; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1630 ; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1631 ; X32-AVX2-NEXT: movl %ebp, %esp
1632 ; X32-AVX2-NEXT: popl %ebp
1633 ; X32-AVX2-NEXT: vzeroupper
1634 ; X32-AVX2-NEXT: retl
1490 ; X32-LABEL: isel_crash_4q:
1491 ; X32: ## BB#0: ## %eintry
1492 ; X32-NEXT: pushl %ebp
1493 ; X32-NEXT: Lcfi13:
1494 ; X32-NEXT: .cfi_def_cfa_offset 8
1495 ; X32-NEXT: Lcfi14:
1496 ; X32-NEXT: .cfi_offset %ebp, -8
1497 ; X32-NEXT: movl %esp, %ebp
1498 ; X32-NEXT: Lcfi15:
1499 ; X32-NEXT: .cfi_def_cfa_register %ebp
1500 ; X32-NEXT: andl $-32, %esp
1501 ; X32-NEXT: subl $128, %esp
1502 ; X32-NEXT: movl 8(%ebp), %eax
1503 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
1504 ; X32-NEXT: vmovaps %ymm0, (%esp)
1505 ; X32-NEXT: movl (%eax), %ecx
1506 ; X32-NEXT: movl 4(%eax), %eax
1507 ; X32-NEXT: vmovd %ecx, %xmm1
1508 ; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
1509 ; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
1510 ; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
1511 ; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
1512 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1513 ; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1514 ; X32-NEXT: movl %ebp, %esp
1515 ; X32-NEXT: popl %ebp
1516 ; X32-NEXT: vzeroupper
1517 ; X32-NEXT: retl
16351518 ;
16361519 ; X64-AVX2-LABEL: isel_crash_4q:
16371520 ; X64-AVX2: ## BB#0: ## %eintry
16571540 ; X64-AVX2-NEXT: vzeroupper
16581541 ; X64-AVX2-NEXT: retq
16591542 ;
1660 ; X32-AVX512VL-LABEL: isel_crash_4q:
1661 ; X32-AVX512VL: ## BB#0: ## %eintry
1662 ; X32-AVX512VL-NEXT: pushl %ebp
1663 ; X32-AVX512VL-NEXT: Lcfi13:
1664 ; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8
1665 ; X32-AVX512VL-NEXT: Lcfi14:
1666 ; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8
1667 ; X32-AVX512VL-NEXT: movl %esp, %ebp
1668 ; X32-AVX512VL-NEXT: Lcfi15:
1669 ; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp
1670 ; X32-AVX512VL-NEXT: andl $-32, %esp
1671 ; X32-AVX512VL-NEXT: subl $128, %esp
1672 ; X32-AVX512VL-NEXT: movl 8(%ebp), %eax
1673 ; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
1674 ; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp)
1675 ; X32-AVX512VL-NEXT: movl (%eax), %ecx
1676 ; X32-AVX512VL-NEXT: movl 4(%eax), %eax
1677 ; X32-AVX512VL-NEXT: vmovd %ecx, %xmm1
1678 ; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
1679 ; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
1680 ; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
1681 ; X32-AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
1682 ; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1683 ; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
1684 ; X32-AVX512VL-NEXT: movl %ebp, %esp
1685 ; X32-AVX512VL-NEXT: popl %ebp
1686 ; X32-AVX512VL-NEXT: vzeroupper
1687 ; X32-AVX512VL-NEXT: retl
1688 ;
16891543 ; X64-AVX512VL-LABEL: isel_crash_4q:
16901544 ; X64-AVX512VL: ## BB#0: ## %eintry
16911545 ; X64-AVX512VL-NEXT: pushq %rbp
16981552 ; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
16991553 ; X64-AVX512VL-NEXT: andq $-32, %rsp
17001554 ; X64-AVX512VL-NEXT: subq $128, %rsp
1701 ; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
1555 ; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
17021556 ; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
17031557 ; X64-AVX512VL-NEXT: movq (%rdi), %rax
17041558 ; X64-AVX512VL-NEXT: vpbroadcastq %rax, %ymm1
400400 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
401401 ; CHECK-LABEL: vpaddd_mask_test:
402402 ; CHECK: # BB#0:
403 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
403 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
404404 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
405405 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
406406 ; CHECK-NEXT: retq
413413 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
414414 ; CHECK-LABEL: vpaddd_maskz_test:
415415 ; CHECK: # BB#0:
416 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
416 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
417417 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
418418 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
419419 ; CHECK-NEXT: retq
426426 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
427427 ; CHECK-LABEL: vpaddd_mask_fold_test:
428428 ; CHECK: # BB#0:
429 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
429 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
430430 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
431431 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
432432 ; CHECK-NEXT: retq
440440 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
441441 ; CHECK-LABEL: vpaddd_mask_broadcast_test:
442442 ; CHECK: # BB#0:
443 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
443 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
444444 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
445445 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
446446 ; CHECK-NEXT: retq
453453 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
454454 ; CHECK-LABEL: vpaddd_maskz_fold_test:
455455 ; CHECK: # BB#0:
456 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
456 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
457457 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
458458 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
459459 ; CHECK-NEXT: retq
467467 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
468468 ; CHECK-LABEL: vpaddd_maskz_broadcast_test:
469469 ; CHECK: # BB#0:
470 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
470 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
471471 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
472472 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
473473 ; CHECK-NEXT: retq
670670 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
671671 ; CHECK-LABEL: test_mask_vaddps:
672672 ; CHECK: # BB#0:
673 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
673 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
674674 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
675675 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
676676 ; CHECK-NEXT: retq
685685 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
686686 ; CHECK-LABEL: test_mask_vmulps:
687687 ; CHECK: # BB#0:
688 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
688 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
689689 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
690690 ; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
691691 ; CHECK-NEXT: retq
700700 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
701701 ; CHECK-LABEL: test_mask_vminps:
702702 ; CHECK: # BB#0:
703 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
703 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
704704 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
705705 ; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
706706 ; CHECK-NEXT: retq
724724 ;
725725 ; AVX512VL-LABEL: test_mask_vminpd:
726726 ; AVX512VL: # BB#0:
727 ; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
727 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
728728 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
729729 ; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
730730 ; AVX512VL-NEXT: retq
747747 ;
748748 ; SKX-LABEL: test_mask_vminpd:
749749 ; SKX: # BB#0:
750 ; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
750 ; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4
751751 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
752752 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
753753 ; SKX-NEXT: retq
763763 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
764764 ; CHECK-LABEL: test_mask_vmaxps:
765765 ; CHECK: # BB#0:
766 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
766 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
767767 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
768768 ; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
769769 ; CHECK-NEXT: retq
787787 ;
788788 ; AVX512VL-LABEL: test_mask_vmaxpd:
789789 ; AVX512VL: # BB#0:
790 ; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
790 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
791791 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
792792 ; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
793793 ; AVX512VL-NEXT: retq
810810 ;
811811 ; SKX-LABEL: test_mask_vmaxpd:
812812 ; SKX: # BB#0:
813 ; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
813 ; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4
814814 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
815815 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
816816 ; SKX-NEXT: retq
826826 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
827827 ; CHECK-LABEL: test_mask_vsubps:
828828 ; CHECK: # BB#0:
829 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
829 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
830830 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
831831 ; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
832832 ; CHECK-NEXT: retq
841841 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
842842 ; CHECK-LABEL: test_mask_vdivps:
843843 ; CHECK: # BB#0:
844 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
844 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
845845 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
846846 ; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
847847 ; CHECK-NEXT: retq
856856 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
857857 ; CHECK-LABEL: test_mask_vaddpd:
858858 ; CHECK: # BB#0:
859 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
859 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
860860 ; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
861861 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
862862 ; CHECK-NEXT: retq
871871 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
872872 ; CHECK-LABEL: test_maskz_vaddpd:
873873 ; CHECK: # BB#0:
874 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
874 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
875875 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
876876 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
877877 ; CHECK-NEXT: retq
885885 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
886886 ; CHECK-LABEL: test_mask_fold_vaddpd:
887887 ; CHECK: # BB#0:
888 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
888 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
889889 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
890890 ; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
891891 ; CHECK-NEXT: retq
901901 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
902902 ; CHECK-LABEL: test_maskz_fold_vaddpd:
903903 ; CHECK: # BB#0:
904 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
904 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
905905 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
906906 ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
907907 ; CHECK-NEXT: retq
929929 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
930930 ; CHECK-LABEL: test_mask_broadcast_vaddpd:
931931 ; CHECK: # BB#0:
932 ; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
932 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
933933 ; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
934934 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
935935 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
948948 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
949949 ; CHECK-LABEL: test_maskz_broadcast_vaddpd:
950950 ; CHECK: # BB#0:
951 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
951 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
952952 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
953953 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
954954 ; CHECK-NEXT: retq
1515 ; CHECK: ## BB#0:
1616 ; CHECK-NEXT: ## kill: %XMM0 %XMM0 %ZMM0
1717 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [0,1,2,3,4,18,16,7,8,9,10,11,12,13,14,15]
18 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
18 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1919 ; CHECK-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
2020 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2121 ; CHECK-NEXT: retq
10761076 define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
10771077 ; NODQ-LABEL: sitofp_16i1_float:
10781078 ; NODQ: # BB#0:
1079 ; NODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
1079 ; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
10801080 ; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
10811081 ; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
10821082 ; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
10841084 ;
10851085 ; DQ-LABEL: sitofp_16i1_float:
10861086 ; DQ: # BB#0:
1087 ; DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
1087 ; DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
10881088 ; DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
10891089 ; DQ-NEXT: vpmovm2d %k0, %zmm0
10901090 ; DQ-NEXT: vcvtdq2ps %zmm0, %zmm0
11391139 define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
11401140 ; NOVLDQ-LABEL: sitofp_16i1_double:
11411141 ; NOVLDQ: # BB#0:
1142 ; NOVLDQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
1142 ; NOVLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
11431143 ; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
11441144 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
11451145 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
11521152 ;
11531153 ; VLDQ-LABEL: sitofp_16i1_double:
11541154 ; VLDQ: # BB#0:
1155 ; VLDQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2
1155 ; VLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
11561156 ; VLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
11571157 ; VLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
11581158 ; VLDQ-NEXT: vpmovm2d %k1, %ymm0
11631163 ;
11641164 ; VLNODQ-LABEL: sitofp_16i1_double:
11651165 ; VLNODQ: # BB#0:
1166 ; VLNODQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
1166 ; VLNODQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
11671167 ; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
11681168 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
11691169 ; VLNODQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
11751175 ;
11761176 ; AVX512DQ-LABEL: sitofp_16i1_double:
11771177 ; AVX512DQ: # BB#0:
1178 ; AVX512DQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2
1178 ; AVX512DQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
11791179 ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
11801180 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
11811181 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
11911191 define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
11921192 ; NOVLDQ-LABEL: sitofp_8i1_double:
11931193 ; NOVLDQ: # BB#0:
1194 ; NOVLDQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
1194 ; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
11951195 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
11961196 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
11971197 ; NOVLDQ-NEXT: vpmovqd %zmm0, %ymm0
12001200 ;
12011201 ; VLDQ-LABEL: sitofp_8i1_double:
12021202 ; VLDQ: # BB#0:
1203 ; VLDQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1
1203 ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
12041204 ; VLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
12051205 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0
12061206 ; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
12081208 ;
12091209 ; VLNODQ-LABEL: sitofp_8i1_double:
12101210 ; VLNODQ: # BB#0:
1211 ; VLNODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
1211 ; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
12121212 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
12131213 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
12141214 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
12171217 ;
12181218 ; AVX512DQ-LABEL: sitofp_8i1_double:
12191219 ; AVX512DQ: # BB#0:
1220 ; AVX512DQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1
1220 ; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
12211221 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
12221222 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
12231223 ; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
12401240 ;
12411241 ; VLDQ-LABEL: sitofp_8i1_float:
12421242 ; VLDQ: # BB#0:
1243 ; VLDQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
1243 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
12441244 ; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %k0
12451245 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0
12461246 ; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
12481248 ;
12491249 ; VLNODQ-LABEL: sitofp_8i1_float:
12501250 ; VLNODQ: # BB#0:
1251 ; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
1251 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
12521252 ; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %k1
12531253 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
12541254 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
13081308 ;
13091309 ; VLDQ-LABEL: sitofp_4i1_double:
13101310 ; VLDQ: # BB#0:
1311 ; VLDQ-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1311 ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
13121312 ; VLDQ-NEXT: vcmpltpd %ymm0, %ymm1, %k0
13131313 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0
13141314 ; VLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
13161316 ;
13171317 ; VLNODQ-LABEL: sitofp_4i1_double:
13181318 ; VLNODQ: # BB#0:
1319 ; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
1319 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
13201320 ; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1
13211321 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13221322 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14131413 define <16 x float> @uitofp_16i1_float(<16 x i32> %a) {
14141414 ; ALL-LABEL: uitofp_16i1_float:
14151415 ; ALL: # BB#0:
1416 ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
1416 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
14171417 ; ALL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
14181418 ; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
14191419 ; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
14261426 define <16 x double> @uitofp_16i1_double(<16 x i32> %a) {
14271427 ; NOVL-LABEL: uitofp_16i1_double:
14281428 ; NOVL: # BB#0:
1429 ; NOVL-NEXT: vpxord %zmm1, %zmm1, %zmm1
1429 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
14301430 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
14311431 ; NOVL-NEXT: movq {{.*}}(%rip), %rax
14321432 ; NOVL-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
14401440 ;
14411441 ; VL-LABEL: uitofp_16i1_double:
14421442 ; VL: # BB#0:
1443 ; VL-NEXT: vpxord %zmm1, %zmm1, %zmm1
1443 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
14441444 ; VL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
14451445 ; VL-NEXT: movl {{.*}}(%rip), %eax
14461446 ; VL-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
14681468 ;
14691469 ; VL-LABEL: uitofp_8i1_float:
14701470 ; VL: # BB#0:
1471 ; VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
1471 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
14721472 ; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
14731473 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
14741474 ; VL-NEXT: vcvtudq2ps %ymm0, %ymm0
14911491 ;
14921492 ; VL-LABEL: uitofp_8i1_double:
14931493 ; VL: # BB#0:
1494 ; VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
1494 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
14951495 ; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
14961496 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
14971497 ; VL-NEXT: vcvtudq2pd %ymm0, %zmm0
253253 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
254254 ; CHECK-LABEL: gather_qps:
255255 ; CHECK: ## BB#0:
256 ; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
256 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
257257 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
258258 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
259259 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
332332 ; CHECK-NEXT: kmovd %esi, %k1
333333 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm0 {%k1}
334334 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
335 ; CHECK-NEXT: vxorpd %ymm2, %ymm2, %ymm2
335 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
336336 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm2 {%k1}
337337 ; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0
338338 ; CHECK-NEXT: retq
350350 ; CHECK-NEXT: kmovd %esi, %k1
351351 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
352352 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
353 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
353 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
354354 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
355355 ; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0
356356 ; CHECK-NEXT: retq
475475 ; CHECK-NEXT: kmovd %esi, %k1
476476 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm0 {%k1}
477477 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
478 ; CHECK-NEXT: vxorpd %ymm2, %ymm2, %ymm2
478 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
479479 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm2 {%k1}
480480 ; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0
481481 ; CHECK-NEXT: retq
544544 ; CHECK-NEXT: kmovd %esi, %k1
545545 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
546546 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
547 ; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
547 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
548548 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm2 {%k1}
549549 ; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0
550550 ; CHECK-NEXT: retq
846846 ; CHECK-LABEL: gather_mask_test:
847847 ; CHECK: ## BB#0:
848848 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
849 ; CHECK-NEXT: vxorps %zmm2, %zmm2, %zmm2
849 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
850850 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1}
851851 ; CHECK-NEXT: kxorw %k0, %k0, %k1
852852 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
41304130 ; CHECK-NEXT: kmovw %edi, %k1
41314131 ; CHECK-NEXT: vmovapd %zmm0, %zmm3
41324132 ; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1}
4133 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
4133 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
41344134 ; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
41354135 ; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm3
41364136 ; CHECK-NEXT: vfixupimmpd $3, {sae}, %zmm2, %zmm1, %zmm0
41524152 ; CHECK-NEXT: kmovw %edi, %k1
41534153 ; CHECK-NEXT: vmovapd %zmm0, %zmm3
41544154 ; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z}
4155 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
4155 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
41564156 ; CHECK-NEXT: vmovapd %zmm0, %zmm5
41574157 ; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z}
41584158 ; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm3
42214221 ; CHECK-NEXT: kmovw %edi, %k1
42224222 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
42234223 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1}
4224 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
4224 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
42254225 ; CHECK-NEXT: vmovaps %zmm0, %zmm5
42264226 ; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1}
42274227 ; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm3
42464246 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3
42474247 ; CHECK-NEXT: vmovaps %zmm0, %zmm4
42484248 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
4249 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
4249 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
42504250 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
42514251 ; CHECK-NEXT: vaddps %zmm0, %zmm4, %zmm0
42524252 ; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
660660 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
661661 ; KNL-LABEL: test8:
662662 ; KNL: ## BB#0:
663 ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
663 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
664664 ; KNL-NEXT: cmpl %esi, %edi
665665 ; KNL-NEXT: jg LBB17_1
666666 ; KNL-NEXT: ## BB#2:
675675 ;
676676 ; SKX-LABEL: test8:
677677 ; SKX: ## BB#0:
678 ; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
678 ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
679679 ; SKX-NEXT: cmpl %esi, %edi
680680 ; SKX-NEXT: jg LBB17_1
681681 ; SKX-NEXT: ## BB#2:
691691 ;
692692 ; AVX512BW-LABEL: test8:
693693 ; AVX512BW: ## BB#0:
694 ; AVX512BW-NEXT: vpxord %zmm2, %zmm2, %zmm2
694 ; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
695695 ; AVX512BW-NEXT: cmpl %esi, %edi
696696 ; AVX512BW-NEXT: jg LBB17_1
697697 ; AVX512BW-NEXT: ## BB#2:
707707 ;
708708 ; AVX512DQ-LABEL: test8:
709709 ; AVX512DQ: ## BB#0:
710 ; AVX512DQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
710 ; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
711711 ; AVX512DQ-NEXT: cmpl %esi, %edi
712712 ; AVX512DQ-NEXT: jg LBB17_1
713713 ; AVX512DQ-NEXT: ## BB#2:
36013601 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
36023602 ; KNL-LABEL: test_bitcast_v8i1_zext:
36033603 ; KNL: ## BB#0:
3604 ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
3604 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
36053605 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
36063606 ; KNL-NEXT: kmovw %k0, %eax
36073607 ; KNL-NEXT: movzbl %al, %eax
36103610 ;
36113611 ; SKX-LABEL: test_bitcast_v8i1_zext:
36123612 ; SKX: ## BB#0:
3613 ; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1
3613 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
36143614 ; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
36153615 ; SKX-NEXT: kmovb %k0, %eax
36163616 ; SKX-NEXT: addl %eax, %eax
36193619 ;
36203620 ; AVX512BW-LABEL: test_bitcast_v8i1_zext:
36213621 ; AVX512BW: ## BB#0:
3622 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
3622 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
36233623 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
36243624 ; AVX512BW-NEXT: kmovd %k0, %eax
36253625 ; AVX512BW-NEXT: movzbl %al, %eax
36293629 ;
36303630 ; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
36313631 ; AVX512DQ: ## BB#0:
3632 ; AVX512DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
3632 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
36333633 ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
36343634 ; AVX512DQ-NEXT: kmovb %k0, %eax
36353635 ; AVX512DQ-NEXT: addl %eax, %eax
36463646 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
36473647 ; KNL-LABEL: test_bitcast_v16i1_zext:
36483648 ; KNL: ## BB#0:
3649 ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
3649 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
36503650 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
36513651 ; KNL-NEXT: kmovw %k0, %eax
36523652 ; KNL-NEXT: addl %eax, %eax
36543654 ;
36553655 ; SKX-LABEL: test_bitcast_v16i1_zext:
36563656 ; SKX: ## BB#0:
3657 ; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1
3657 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
36583658 ; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
36593659 ; SKX-NEXT: kmovw %k0, %eax
36603660 ; SKX-NEXT: addl %eax, %eax
36633663 ;
36643664 ; AVX512BW-LABEL: test_bitcast_v16i1_zext:
36653665 ; AVX512BW: ## BB#0:
3666 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
3666 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
36673667 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
36683668 ; AVX512BW-NEXT: kmovw %k0, %eax
36693669 ; AVX512BW-NEXT: addl %eax, %eax
36723672 ;
36733673 ; AVX512DQ-LABEL: test_bitcast_v16i1_zext:
36743674 ; AVX512DQ: ## BB#0:
3675 ; AVX512DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
3675 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
36763676 ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
36773677 ; AVX512DQ-NEXT: kmovw %k0, %eax
36783678 ; AVX512DQ-NEXT: addl %eax, %eax
44 define <16 x i32> @test1(<16 x i32> %trigger, <16 x i32>* %addr) {
55 ; AVX512-LABEL: test1:
66 ; AVX512: ## BB#0:
7 ; AVX512-NEXT: vpxord %zmm1, %zmm1, %zmm1
7 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
88 ; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
99 ; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
1010 ; AVX512-NEXT: retq
1616 define <16 x i32> @test2(<16 x i32> %trigger, <16 x i32>* %addr) {
1717 ; AVX512-LABEL: test2:
1818 ; AVX512: ## BB#0:
19 ; AVX512-NEXT: vpxord %zmm1, %zmm1, %zmm1
19 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
2020 ; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
2121 ; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
2222 ; AVX512-NEXT: retq
2828 define void @test3(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) {
2929 ; AVX512-LABEL: test3:
3030 ; AVX512: ## BB#0:
31 ; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
31 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
3232 ; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
3333 ; AVX512-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1}
3434 ; AVX512-NEXT: vzeroupper
4141 define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %dst) {
4242 ; AVX512-LABEL: test4:
4343 ; AVX512: ## BB#0:
44 ; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
44 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
4545 ; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
4646 ; AVX512-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1}
4747 ; AVX512-NEXT: retq
5353 define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val) {
5454 ; AVX512-LABEL: test13:
5555 ; AVX512: ## BB#0:
56 ; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
56 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
5757 ; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
5858 ; AVX512-NEXT: vmovups %zmm1, (%rdi) {%k1}
5959 ; AVX512-NEXT: vzeroupper
9898 define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
9999 ; AVX512-LABEL: test23:
100100 ; AVX512: ## BB#0:
101 ; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
101 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
102102 ; AVX512-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
103103 ; AVX512-NEXT: vpcmpeqq %zmm2, %zmm1, %k2
104104 ; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
310310 define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
311311 ; CHECK-LABEL: test32:
312312 ; CHECK: ## BB#0:
313 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
313 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
314314 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
315315 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07]
316316 ; CHECK-NEXT: retq ## encoding: [0xc3]
324324 define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
325325 ; CHECK-LABEL: test33:
326326 ; CHECK: ## BB#0:
327 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
327 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
328328 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
329329 ; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x07]
330330 ; CHECK-NEXT: retq ## encoding: [0xc3]
338338 define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
339339 ; CHECK-LABEL: test34:
340340 ; CHECK: ## BB#0:
341 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
341 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xef,0xc9]
342342 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04]
343343 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x07]
344344 ; CHECK-NEXT: retq ## encoding: [0xc3]
352352 define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
353353 ; CHECK-LABEL: test35:
354354 ; CHECK: ## BB#0:
355 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
355 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xef,0xc9]
356356 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04]
357357 ; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x07]
358358 ; CHECK-NEXT: retq ## encoding: [0xc3]
366366 define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
367367 ; CHECK-LABEL: test36:
368368 ; CHECK: ## BB#0:
369 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
369 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
370370 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
371371 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07]
372372 ; CHECK-NEXT: retq ## encoding: [0xc3]
380380 define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
381381 ; CHECK-LABEL: test37:
382382 ; CHECK: ## BB#0:
383 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
383 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
384384 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
385385 ; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x07]
386386 ; CHECK-NEXT: retq ## encoding: [0xc3]
394394 define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
395395 ; CHECK-LABEL: test38:
396396 ; CHECK: ## BB#0:
397 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
397 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xef,0xc9]
398398 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04]
399399 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x07]
400400 ; CHECK-NEXT: retq ## encoding: [0xc3]
408408 define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
409409 ; CHECK-LABEL: test39:
410410 ; CHECK: ## BB#0:
411 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
411 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xef,0xc9]
412412 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04]
413413 ; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x07]
414414 ; CHECK-NEXT: retq ## encoding: [0xc3]
422422 define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
423423 ; CHECK-LABEL: test40:
424424 ; CHECK: ## BB#0:
425 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
425 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
426426 ; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
427427 ; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
428428 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07]
437437 define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
438438 ; CHECK-LABEL: test41:
439439 ; CHECK: ## BB#0:
440 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
440 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
441441 ; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
442442 ; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
443443 ; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07]
452452 define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
453453 ; CHECK-LABEL: test42:
454454 ; CHECK: ## BB#0:
455 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
455 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
456456 ; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
457457 ; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
458458 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07]
467467 define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
468468 ; CHECK-LABEL: test43:
469469 ; CHECK: ## BB#0:
470 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
470 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
471471 ; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
472472 ; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
473473 ; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07]
482482 define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
483483 ; CHECK-LABEL: test44:
484484 ; CHECK: ## BB#0:
485 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
485 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2]
486486 ; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
487487 ; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
488488 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07]
497497 define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
498498 ; CHECK-LABEL: test45:
499499 ; CHECK: ## BB#0:
500 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
500 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2]
501501 ; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
502502 ; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
503503 ; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07]
512512 define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
513513 ; CHECK-LABEL: test46:
514514 ; CHECK: ## BB#0:
515 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
515 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
516516 ; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
517517 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
518518 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07]
527527 define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) {
528528 ; CHECK-LABEL: test47:
529529 ; CHECK: ## BB#0:
530 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
530 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
531531 ; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
532532 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
533533 ; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07]
55 ; X86-LABEL: select00:
66 ; X86: # BB#0:
77 ; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp)
8 ; X86-NEXT: vpxord %zmm1, %zmm1, %zmm1
8 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
99 ; X86-NEXT: je .LBB0_2
1010 ; X86-NEXT: # BB#1:
1111 ; X86-NEXT: vmovdqa64 %zmm0, %zmm1
1515 ;
1616 ; X64-LABEL: select00:
1717 ; X64: # BB#0:
18 ; X64-NEXT: vpxord %zmm1, %zmm1, %zmm1
18 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
1919 ; X64-NEXT: cmpl $255, %edi
2020 ; X64-NEXT: je .LBB0_2
2121 ; X64-NEXT: # BB#1:
3333 ; X86-LABEL: select01:
3434 ; X86: # BB#0:
3535 ; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp)
36 ; X86-NEXT: vpxord %zmm1, %zmm1, %zmm1
36 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
3737 ; X86-NEXT: je .LBB1_2
3838 ; X86-NEXT: # BB#1:
3939 ; X86-NEXT: vmovdqa64 %zmm0, %zmm1
4343 ;
4444 ; X64-LABEL: select01:
4545 ; X64: # BB#0:
46 ; X64-NEXT: vpxord %zmm1, %zmm1, %zmm1
46 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
4747 ; X64-NEXT: cmpl $255, %edi
4848 ; X64-NEXT: je .LBB1_2
4949 ; X64-NEXT: # BB#1:
3030 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
3131 ; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
3232 ; CHECK-NEXT: vpmovm2q %k0, %zmm0
33 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
33 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
3434 ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
3535 ; CHECK-NEXT: vpmovq2m %zmm0, %k0
3636 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
4343 define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
4444 ; ALL-LABEL: _ss16xfloat_mask:
4545 ; ALL: # BB#0:
46 ; ALL-NEXT: vpxord %zmm3, %zmm3, %zmm3
46 ; ALL-NEXT: vpxor %xmm3, %xmm3, %xmm3
4747 ; ALL-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
4848 ; ALL-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
4949 ; ALL-NEXT: vmovaps %zmm1, %zmm0
5858 define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
5959 ; ALL-LABEL: _ss16xfloat_maskz:
6060 ; ALL: # BB#0:
61 ; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2
61 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
6262 ; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
6363 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
6464 ; ALL-NEXT: retq
8383 define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
8484 ; ALL-LABEL: _ss16xfloat_mask_load:
8585 ; ALL: # BB#0:
86 ; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2
86 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
8787 ; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
8888 ; ALL-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
8989 ; ALL-NEXT: retq
9898 define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
9999 ; ALL-LABEL: _ss16xfloat_maskz_load:
100100 ; ALL: # BB#0:
101 ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
101 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
102102 ; ALL-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
103103 ; ALL-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
104104 ; ALL-NEXT: retq
215215 define <16 x i32> @test_vbroadcast() {
216216 ; ALL-LABEL: test_vbroadcast:
217217 ; ALL: # BB#0: # %entry
218 ; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0
218 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
219219 ; ALL-NEXT: vcmpunordps %zmm0, %zmm0, %k1
220220 ; ALL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
221221 ; ALL-NEXT: knotw %k1, %k1
234234 ; X64-AVX512VL-LABEL: PR29088:
235235 ; X64-AVX512VL: ## BB#0:
236236 ; X64-AVX512VL-NEXT: vmovaps (%rdi), %xmm0
237 ; X64-AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
237 ; X64-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
238238 ; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi)
239239 ; X64-AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
240240 ; X64-AVX512VL-NEXT: retq
242242 ; X64-AVX512BWVL-LABEL: PR29088:
243243 ; X64-AVX512BWVL: ## BB#0:
244244 ; X64-AVX512BWVL-NEXT: vmovaps (%rdi), %xmm0
245 ; X64-AVX512BWVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
245 ; X64-AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
246246 ; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi)
247247 ; X64-AVX512BWVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
248248 ; X64-AVX512BWVL-NEXT: retq
250250 ; X64-AVX512DQVL-LABEL: PR29088:
251251 ; X64-AVX512DQVL: ## BB#0:
252252 ; X64-AVX512DQVL-NEXT: vmovaps (%rdi), %xmm0
253 ; X64-AVX512DQVL-NEXT: vxorps %ymm1, %ymm1, %ymm1
253 ; X64-AVX512DQVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
254254 ; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi)
255255 ; X64-AVX512DQVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
256256 ; X64-AVX512DQVL-NEXT: retq
33 define <16 x i32> @test() {
44 ; CHECK-LABEL: test:
55 ; CHECK: ## BB#0: ## %entry
6 ; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
6 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
77 ; CHECK-NEXT: retq
88 entry:
99 %0 = icmp slt <16 x i32> undef, undef
2424 define <16 x double> @test2(<16 x float> %x, <16 x float> %y, <16 x double> %a, <16 x double> %b) {
2525 ; CHECK-SKX-LABEL: test2:
2626 ; CHECK-SKX: # BB#0: # %entry
27 ; CHECK-SKX-NEXT: vxorps %zmm6, %zmm6, %zmm6
27 ; CHECK-SKX-NEXT: vxorps %xmm6, %xmm6, %xmm6
2828 ; CHECK-SKX-NEXT: vcmpltps %zmm0, %zmm6, %k0
2929 ; CHECK-SKX-NEXT: vcmpltps %zmm6, %zmm1, %k1
3030 ; CHECK-SKX-NEXT: korw %k1, %k0, %k0
3939 ;
4040 ; CHECK-KNL-LABEL: test2:
4141 ; CHECK-KNL: # BB#0: # %entry
42 ; CHECK-KNL-NEXT: vpxord %zmm6, %zmm6, %zmm6
42 ; CHECK-KNL-NEXT: vxorps %xmm6, %xmm6, %xmm6
4343 ; CHECK-KNL-NEXT: vcmpltps %zmm0, %zmm6, %k0
4444 ; CHECK-KNL-NEXT: vcmpltps %zmm6, %zmm1, %k1
4545 ; CHECK-KNL-NEXT: korw %k1, %k0, %k1
2323 define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
2424 ; CHECK-LABEL: test3:
2525 ; CHECK: ## BB#0:
26 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
26 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
2727 ; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1
2828 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1}
2929 ; CHECK-NEXT: retq
3737 define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
3838 ; CHECK-LABEL: test4:
3939 ; CHECK: ## BB#0:
40 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
40 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
4141 ; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k1
4242 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z}
4343 ; CHECK-NEXT: retq
7171 define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
7272 ; CHECK-LABEL: test7:
7373 ; CHECK: ## BB#0:
74 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
74 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
7575 ; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1
7676 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1}
7777 ; CHECK-NEXT: retq
8585 define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) {
8686 ; CHECK-LABEL: test8:
8787 ; CHECK: ## BB#0:
88 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
88 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
8989 ; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k1
9090 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
9191 ; CHECK-NEXT: retq
2323 define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) {
2424 ; CHECK-LABEL: test_256_3:
2525 ; CHECK: ## BB#0:
26 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
26 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
2727 ; CHECK-NEXT: vpcmpneqb %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xca,0x04]
2828 ; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x07]
2929 ; CHECK-NEXT: retq ## encoding: [0xc3]
3737 define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
3838 ; CHECK-LABEL: test_256_4:
3939 ; CHECK: ## BB#0:
40 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
40 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
4141 ; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc9,0x04]
4242 ; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x07]
4343 ; CHECK-NEXT: retq ## encoding: [0xc3]
7171 define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) {
7272 ; CHECK-LABEL: test_256_7:
7373 ; CHECK: ## BB#0:
74 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
74 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
7575 ; CHECK-NEXT: vpcmpneqw %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x3f,0xca,0x04]
7676 ; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x07]
7777 ; CHECK-NEXT: retq ## encoding: [0xc3]
8585 define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
8686 ; CHECK-LABEL: test_256_8:
8787 ; CHECK: ## BB#0:
88 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
88 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
8989 ; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x04]
9090 ; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x07]
9191 ; CHECK-NEXT: retq ## encoding: [0xc3]
1010 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3
1111 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
1212 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1}
13 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
13 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
1414 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1}
1515 ; CHECK-NEXT: vpaddq %zmm0, %zmm4, %zmm0
1616 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z}
3838 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3
3939 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
4040 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} {z}
41 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
41 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
4242 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z}
4343 ; CHECK-NEXT: vpaddq %zmm0, %zmm4, %zmm0
4444 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z}
6666 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3
6767 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
6868 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1}
69 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
69 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
7070 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1}
7171 ; CHECK-NEXT: vpaddq %zmm0, %zmm4, %zmm0
7272 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z}
9494 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3
9595 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
9696 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} {z}
97 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
97 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
9898 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z}
9999 ; CHECK-NEXT: vpaddq %zmm0, %zmm4, %zmm0
100100 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z}
3939 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3
4040 ; CHECK-NEXT: vmovdqa %ymm0, %ymm4
4141 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1}
42 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
42 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
4343 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
4444 ; CHECK-NEXT: vpaddq %ymm0, %ymm4, %ymm0
4545 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z}
9595 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3
9696 ; CHECK-NEXT: vmovdqa %ymm0, %ymm4
9797 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} {z}
98 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
98 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
9999 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
100100 ; CHECK-NEXT: vpaddq %ymm0, %ymm4, %ymm0
101101 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z}
151151 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3
152152 ; CHECK-NEXT: vmovdqa %ymm0, %ymm4
153153 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1}
154 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
154 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
155155 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
156156 ; CHECK-NEXT: vpaddq %ymm0, %ymm4, %ymm0
157157 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z}
207207 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3
208208 ; CHECK-NEXT: vmovdqa %ymm0, %ymm4
209209 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} {z}
210 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
210 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
211211 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
212212 ; CHECK-NEXT: vpaddq %ymm0, %ymm4, %ymm0
213213 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z}
4848 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
4949 ; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm3 {%k1}
5050 ; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1
51 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
51 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
5252 ; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z}
5353 ; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0
5454 ; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
7070 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
7171 ; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 {%k1}
7272 ; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1
73 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
73 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
7474 ; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z}
7575 ; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0
7676 ; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
111111 ; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
112112 ; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x75,0xda]
113113 ; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x75,0xca]
114 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
114 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
115115 ; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
116116 ; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc1]
117117 ; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
155155 ; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
156156 ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xda]
157157 ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xca]
158 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
158 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
159159 ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
160160 ; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc1]
161161 ; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
7575 define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
7676 ; CHECK-LABEL: vpaddd256_mask_test:
7777 ; CHECK: ## BB#0:
78 ; CHECK-NEXT: vpxor %ymm3, %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xef,0xdb]
78 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xef,0xdb]
7979 ; CHECK-NEXT: vpcmpneqd %ymm3, %ymm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x28,0x1f,0xcb,0x04]
8080 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xc1]
8181 ; CHECK-NEXT: retq ## encoding: [0xc3]
8888 define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
8989 ; CHECK-LABEL: vpaddd256_maskz_test:
9090 ; CHECK: ## BB#0:
91 ; CHECK-NEXT: vpxor %ymm3, %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xef,0xdb]
91 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xef,0xdb]
9292 ; CHECK-NEXT: vpcmpneqd %ymm3, %ymm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x28,0x1f,0xcb,0x04]
9393 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
9494 ; CHECK-NEXT: retq ## encoding: [0xc3]
101101 define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
102102 ; CHECK-LABEL: vpaddd256_mask_fold_test:
103103 ; CHECK: ## BB#0:
104 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
104 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
105105 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
106106 ; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x07]
107107 ; CHECK-NEXT: retq ## encoding: [0xc3]
115115 define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
116116 ; CHECK-LABEL: vpaddd256_mask_broadcast_test:
117117 ; CHECK: ## BB#0:
118 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
118 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
119119 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
120120 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x05,A,A,A,A]
121121 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI10_0-4, kind: reloc_riprel_4byte
129129 define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
130130 ; CHECK-LABEL: vpaddd256_maskz_fold_test:
131131 ; CHECK: ## BB#0:
132 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
132 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
133133 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
134134 ; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
135135 ; CHECK-NEXT: retq ## encoding: [0xc3]
143143 define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
144144 ; CHECK-LABEL: vpaddd256_maskz_broadcast_test:
145145 ; CHECK: ## BB#0:
146 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
146 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
147147 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
148148 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x05,A,A,A,A]
149149 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI12_0-4, kind: reloc_riprel_4byte
215215 define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
216216 ; CHECK-LABEL: test_mask_vaddps_256:
217217 ; CHECK: ## BB#0:
218 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
218 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
219219 ; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcc,0x04]
220220 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x58,0xc2]
221221 ; CHECK-NEXT: retq ## encoding: [0xc3]
228228 define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
229229 ; CHECK-LABEL: test_mask_vmulps_256:
230230 ; CHECK: ## BB#0:
231 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
231 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
232232 ; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcc,0x04]
233233 ; CHECK-NEXT: vmulps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x59,0xc2]
234234 ; CHECK-NEXT: retq ## encoding: [0xc3]
241241 define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1)nounwind readnone {
242242 ; CHECK-LABEL: test_mask_vminps_256:
243243 ; CHECK: ## BB#0:
244 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
244 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
245245 ; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcc,0x04]
246246 ; CHECK-NEXT: vminps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5d,0xc2]
247247 ; CHECK-NEXT: retq ## encoding: [0xc3]
255255 define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
256256 ; CHECK-LABEL: test_mask_vmaxps_256:
257257 ; CHECK: ## BB#0:
258 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
258 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
259259 ; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcc,0x04]
260260 ; CHECK-NEXT: vmaxps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5f,0xc2]
261261 ; CHECK-NEXT: retq ## encoding: [0xc3]
269269 define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
270270 ; CHECK-LABEL: test_mask_vsubps_256:
271271 ; CHECK: ## BB#0:
272 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
272 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
273273 ; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcc,0x04]
274274 ; CHECK-NEXT: vsubps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5c,0xc2]
275275 ; CHECK-NEXT: retq ## encoding: [0xc3]
282282 define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
283283 ; CHECK-LABEL: test_mask_vdivps_256:
284284 ; CHECK: ## BB#0:
285 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
285 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
286286 ; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcc,0x04]
287287 ; CHECK-NEXT: vdivps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5e,0xc2]
288288 ; CHECK-NEXT: retq ## encoding: [0xc3]
295295 define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
296296 ; CHECK-LABEL: test_mask_vmulpd_256:
297297 ; CHECK: ## BB#0:
298 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
298 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
299299 ; CHECK-NEXT: vpcmpneqq %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0xe5,0x28,0x1f,0xcc,0x04]
300300 ; CHECK-NEXT: vmulpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x59,0xc2]
301301 ; CHECK-NEXT: retq ## encoding: [0xc3]
308308 define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
309309 ; CHECK-LABEL: test_mask_vminpd_256:
310310 ; CHECK: ## BB#0:
311 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
311 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
312312 ; CHECK-NEXT: vpcmpneqq %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0xe5,0x28,0x1f,0xcc,0x04]
313313 ; CHECK-NEXT: vminpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5d,0xc2]
314314 ; CHECK-NEXT: retq ## encoding: [0xc3]
322322 define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
323323 ; CHECK-LABEL: test_mask_vmaxpd_256:
324324 ; CHECK: ## BB#0:
325 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
325 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
326326 ; CHECK-NEXT: vpcmpneqq %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0xe5,0x28,0x1f,0xcc,0x04]
327327 ; CHECK-NEXT: vmaxpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5f,0xc2]
328328 ; CHECK-NEXT: retq ## encoding: [0xc3]
336336 define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
337337 ; CHECK-LABEL: test_mask_vsubpd_256:
338338 ; CHECK: ## BB#0:
339 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
339 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
340340 ; CHECK-NEXT: vpcmpneqq %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0xe5,0x28,0x1f,0xcc,0x04]
341341 ; CHECK-NEXT: vsubpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5c,0xc2]
342342 ; CHECK-NEXT: retq ## encoding: [0xc3]
349349 define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
350350 ; CHECK-LABEL: test_mask_vdivpd_256:
351351 ; CHECK: ## BB#0:
352 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
352 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
353353 ; CHECK-NEXT: vpcmpneqq %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0xe5,0x28,0x1f,0xcc,0x04]
354354 ; CHECK-NEXT: vdivpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5e,0xc2]
355355 ; CHECK-NEXT: retq ## encoding: [0xc3]
362362 define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
363363 ; CHECK-LABEL: test_mask_vaddpd_256:
364364 ; CHECK: ## BB#0:
365 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
365 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
366366 ; CHECK-NEXT: vpcmpneqq %ymm4, %ymm3, %k1 ## encoding: [0x62,0xf3,0xe5,0x28,0x1f,0xcc,0x04]
367367 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0xc2]
368368 ; CHECK-NEXT: retq ## encoding: [0xc3]
375375 define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
376376 ; CHECK-LABEL: test_maskz_vaddpd_256:
377377 ; CHECK: ## BB#0:
378 ; CHECK-NEXT: vpxor %ymm3, %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xef,0xdb]
378 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xef,0xdb]
379379 ; CHECK-NEXT: vpcmpneqq %ymm3, %ymm2, %k1 ## encoding: [0x62,0xf3,0xed,0x28,0x1f,0xcb,0x04]
380380 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0xc1]
381381 ; CHECK-NEXT: retq ## encoding: [0xc3]
388388 define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i, <4 x double>* %j, <4 x i64> %mask1) nounwind {
389389 ; CHECK-LABEL: test_mask_fold_vaddpd_256:
390390 ; CHECK: ## BB#0:
391 ; CHECK-NEXT: vpxor %ymm3, %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xef,0xdb]
391 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xef,0xdb]
392392 ; CHECK-NEXT: vpcmpneqq %ymm3, %ymm2, %k1 ## encoding: [0x62,0xf3,0xed,0x28,0x1f,0xcb,0x04]
393393 ; CHECK-NEXT: vaddpd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0x07]
394394 ; CHECK-NEXT: retq ## encoding: [0xc3]
402402 define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j, <4 x i64> %mask1) nounwind {
403403 ; CHECK-LABEL: test_maskz_fold_vaddpd_256:
404404 ; CHECK: ## BB#0:
405 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
405 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
406406 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
407407 ; CHECK-NEXT: vaddpd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0x07]
408408 ; CHECK-NEXT: retq ## encoding: [0xc3]
428428 define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, double* %j, <4 x i64> %mask1) nounwind {
429429 ; CHECK-LABEL: test_mask_broadcast_vaddpd_256:
430430 ; CHECK: ## BB#0:
431 ; CHECK-NEXT: vpxor %ymm0, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0xc0]
431 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
432432 ; CHECK-NEXT: vpcmpneqq %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf3,0xed,0x28,0x1f,0xc8,0x04]
433433 ; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x0f]
434434 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
445445 define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j, <4 x i64> %mask1) nounwind {
446446 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_256:
447447 ; CHECK: ## BB#0:
448 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
448 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
449449 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
450450 ; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x58,0x07]
451451 ; CHECK-NEXT: retq ## encoding: [0xc3]
39493949 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
39503950 ; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
39513951 ; CHECK-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
3952 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
3952 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
39533953 ; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
39543954 ; CHECK-NEXT: vaddpd %ymm4, %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
39553955 ; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
39713971 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
39723972 ; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
39733973 ; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
3974 ; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4]
3974 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
39753975 ; CHECK-NEXT: vmovapd %ymm0, %ymm5 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
39763976 ; CHECK-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
39773977 ; CHECK-NEXT: vaddpd %ymm5, %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
40424042 ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05]
40434043 ; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
40444044 ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xe2,0x05]
4045 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
4045 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
40464046 ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05]
40474047 ; CHECK-NEXT: vaddps %ymm0, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0]
40484048 ; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
40654065 ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05]
40664066 ; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
40674067 ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xe2,0x05]
4068 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
4068 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
40694069 ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05]
40704070 ; CHECK-NEXT: vaddps %ymm0, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0]
40714071 ; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
163163 define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
164164 ; CHECK-LABEL: test_256_17:
165165 ; CHECK: ## BB#0:
166 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
166 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
167167 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
168168 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
169169 ; CHECK-NEXT: retq ## encoding: [0xc3]
177177 define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
178178 ; CHECK-LABEL: test_256_18:
179179 ; CHECK: ## BB#0:
180 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
180 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
181181 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
182182 ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07]
183183 ; CHECK-NEXT: retq ## encoding: [0xc3]
191191 define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
192192 ; CHECK-LABEL: test_256_19:
193193 ; CHECK: ## BB#0:
194 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
194 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
195195 ; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
196196 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
197197 ; CHECK-NEXT: retq ## encoding: [0xc3]
205205 define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
206206 ; CHECK-LABEL: test_256_20:
207207 ; CHECK: ## BB#0:
208 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
208 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
209209 ; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
210210 ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
211211 ; CHECK-NEXT: retq ## encoding: [0xc3]
219219 define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
220220 ; CHECK-LABEL: test_256_21:
221221 ; CHECK: ## BB#0:
222 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
222 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
223223 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
224224 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
225225 ; CHECK-NEXT: retq ## encoding: [0xc3]
233233 define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
234234 ; CHECK-LABEL: test_256_22:
235235 ; CHECK: ## BB#0:
236 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
236 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
237237 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
238238 ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07]
239239 ; CHECK-NEXT: retq ## encoding: [0xc3]
247247 define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
248248 ; CHECK-LABEL: test_256_23:
249249 ; CHECK: ## BB#0:
250 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
250 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
251251 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
252252 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
253253 ; CHECK-NEXT: retq ## encoding: [0xc3]
261261 define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
262262 ; CHECK-LABEL: test_256_24:
263263 ; CHECK: ## BB#0:
264 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
264 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
265265 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
266266 ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
267267 ; CHECK-NEXT: retq ## encoding: [0xc3]
275275 define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
276276 ; CHECK-LABEL: test_256_25:
277277 ; CHECK: ## BB#0:
278 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
278 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
279279 ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
280280 ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
281281 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
290290 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
291291 ; CHECK-LABEL: test_256_26:
292292 ; CHECK: ## BB#0:
293 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
293 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
294294 ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
295295 ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
296296 ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
305305 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
306306 ; CHECK-LABEL: test_256_27:
307307 ; CHECK: ## BB#0:
308 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
308 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
309309 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
310310 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
311311 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
320320 define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
321321 ; CHECK-LABEL: test_256_28:
322322 ; CHECK: ## BB#0:
323 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
323 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
324324 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
325325 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
326326 ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
335335 define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
336336 ; CHECK-LABEL: test_256_29:
337337 ; CHECK: ## BB#0:
338 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
338 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
339339 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
340340 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
341341 ; CHECK-NEXT: retq ## encoding: [0xc3]
349349 define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
350350 ; CHECK-LABEL: test_256_30:
351351 ; CHECK: ## BB#0:
352 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
352 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
353353 ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
354354 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
355355 ; CHECK-NEXT: retq ## encoding: [0xc3]
363363 define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
364364 ; CHECK-LABEL: test_256_31:
365365 ; CHECK: ## BB#0:
366 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
366 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
367367 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
368368 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
369369 ; CHECK-NEXT: retq ## encoding: [0xc3]
377377 define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
378378 ; CHECK-LABEL: test_256_32:
379379 ; CHECK: ## BB#0:
380 ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
380 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
381381 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
382382 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
383383 ; CHECK-NEXT: retq ## encoding: [0xc3]
7272 define <8 x float> @_ss8xfloat_mask(<8 x float> %i, float %a, <8 x i32> %mask1) {
7373 ; CHECK-LABEL: _ss8xfloat_mask:
7474 ; CHECK: # BB#0:
75 ; CHECK-NEXT: vpxor %ymm3, %ymm3, %ymm3
75 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
7676 ; CHECK-NEXT: vpcmpneqd %ymm3, %ymm2, %k1
7777 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm0 {%k1}
7878 ; CHECK-NEXT: retq
8686 define <8 x float> @_ss8xfloat_maskz(float %a, <8 x i32> %mask1) {
8787 ; CHECK-LABEL: _ss8xfloat_maskz:
8888 ; CHECK: # BB#0:
89 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
89 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
9090 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1
9191 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
9292 ; CHECK-NEXT: retq
3939 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4040 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
4141 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
42 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
42 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4343 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
4444 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
4545 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
165165 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
166166 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
167167 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
168 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
168 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
169169 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
170170 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
171171 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
294294 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
295295 ; NoVLX-NEXT: kmovw %edi, %k1
296296 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
297 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
297 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
298298 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
299299 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
300300 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
424424 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
425425 ; NoVLX-NEXT: kmovw %edi, %k1
426426 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
427 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
427 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
428428 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
429429 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
430430 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
554554 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
555555 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
556556 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
557 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
557 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
558558 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
559559 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
560560 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
685685 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
686686 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
687687 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
688 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
688 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
689689 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
690690 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
691691 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
819819 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
820820 ; NoVLX-NEXT: kmovw %edi, %k1
821821 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
822 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
822 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
823823 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
824824 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
825825 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
954954 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
955955 ; NoVLX-NEXT: kmovw %edi, %k1
956956 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
957 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
957 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
958958 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
959959 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
960960 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
10821082 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10831083 ; NoVLX-NEXT: kmovw %k0, (%rsp)
10841084 ; NoVLX-NEXT: movl (%rsp), %ecx
1085 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1085 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
10861086 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10871087 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
10881088 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
11321132 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
11331133 ; NoVLX-NEXT: kmovw %k0, (%rsp)
11341134 ; NoVLX-NEXT: movl (%rsp), %ecx
1135 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1135 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
11361136 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
11371137 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
11381138 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
11801180 ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
11811181 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
11821182 ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
1183 ; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4
1183 ; NoVLX-NEXT: vxorps %xmm4, %xmm4, %xmm4
11841184 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
11851185 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
11861186 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
12421242 ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
12431243 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
12441244 ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
1245 ; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3
1245 ; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3
12461246 ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
12471247 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
12481248 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
14241424 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14251425 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
14261426 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
1427 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1427 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
14281428 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
14291429 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
14301430 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
15001500 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15011501 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
15021502 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
1503 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1503 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
15041504 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
15051505 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
15061506 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
15791579 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
15801580 ; NoVLX-NEXT: kmovw %edi, %k1
15811581 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
1582 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1582 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
15831583 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
15841584 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
15851585 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
16591659 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
16601660 ; NoVLX-NEXT: kmovw %edi, %k1
16611661 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
1662 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1662 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
16631663 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
16641664 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
16651665 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
17391739 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
17401740 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
17411741 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
1742 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1742 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
17431743 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
17441744 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
17451745 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
18201820 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
18211821 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
18221822 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
1823 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1823 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
18241824 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
18251825 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
18261826 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
19041904 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
19051905 ; NoVLX-NEXT: kmovw %edi, %k1
19061906 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
1907 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1907 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
19081908 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
19091909 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
19101910 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
19891989 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
19901990 ; NoVLX-NEXT: kmovw %edi, %k1
19911991 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
1992 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
1992 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
19931993 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
19941994 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
19951995 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
20902090 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
20912091 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
20922092 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2093 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
2093 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
20942094 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
20952095 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
20962096 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
22172217 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
22182218 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
22192219 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2220 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
2220 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
22212221 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
22222222 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
22232223 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
23472347 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
23482348 ; NoVLX-NEXT: kmovw %edi, %k1
23492349 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
2350 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
2350 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
23512351 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
23522352 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
23532353 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
24782478 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
24792479 ; NoVLX-NEXT: kmovw %edi, %k1
24802480 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
2481 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
2481 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
24822482 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
24832483 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
24842484 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
26092609 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
26102610 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
26112611 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2612 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
2612 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
26132613 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
26142614 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
26152615 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
27412741 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
27422742 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
27432743 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2744 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
2744 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
27452745 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
27462746 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
27472747 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
28762876 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
28772877 ; NoVLX-NEXT: kmovw %edi, %k1
28782878 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
2879 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
2879 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
28802880 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
28812881 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
28822882 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
30123012 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
30133013 ; NoVLX-NEXT: kmovw %edi, %k1
30143014 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
3015 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
3015 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
30163016 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
30173017 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
30183018 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
34423442 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
34433443 ; NoVLX-NEXT: kmovw %k0, (%rsp)
34443444 ; NoVLX-NEXT: movl (%rsp), %ecx
3445 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
3445 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
34463446 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
34473447 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
34483448 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
37093709 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
37103710 ; NoVLX-NEXT: kmovw %k0, (%rsp)
37113711 ; NoVLX-NEXT: movl (%rsp), %ecx
3712 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
3712 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
37133713 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
37143714 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
37153715 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
39253925 ; NoVLX-NEXT: movq %rax, %rcx
39263926 ; NoVLX-NEXT: shrq $48, %rax
39273927 ; NoVLX-NEXT: shrq $32, %rcx
3928 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
3928 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
39293929 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
39303930 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
39313931 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
42044204 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
42054205 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm2
42064206 ; NoVLX-NEXT: shrq $48, %rax
4207 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
4207 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
42084208 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
42094209 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3
42104210 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm3, %ymm3
55345534 ; NoVLX-NEXT: andq $-32, %rsp
55355535 ; NoVLX-NEXT: subq $64, %rsp
55365536 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5537 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
5537 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
55385538 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
55395539 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
55405540 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
55845584 ; NoVLX-NEXT: andq $-32, %rsp
55855585 ; NoVLX-NEXT: subq $64, %rsp
55865586 ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
5587 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
5587 ; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1
55885588 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
55895589 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
55905590 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
56545654 ; NoVLX-NEXT: kmovw %k1, %eax
56555655 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
56565656 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
5657 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
5657 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
56585658 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
56595659 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
56605660 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
57265726 ; NoVLX-NEXT: kmovw %k1, %eax
57275727 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
57285728 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
5729 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
5729 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
57305730 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
57315731 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
57325732 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
57825782 ; NoVLX-NEXT: subq $64, %rsp
57835783 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
57845784 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5785 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
5785 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
57865786 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
57875787 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
57885788 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
58545854 ; NoVLX-NEXT: kmovw %k1, %eax
58555855 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
58565856 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
5857 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
5857 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
58585858 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
58595859 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
58605860 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
61016101 ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1
61026102 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
61036103 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
6104 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6104 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
61056105 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
61066106 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
61076107 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
61776177 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
61786178 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
61796179 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
6180 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6180 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
61816181 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
61826182 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
61836183 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
62576257 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
62586258 ; NoVLX-NEXT: kmovw %edi, %k1
62596259 ; NoVLX-NEXT: kandw %k1, %k0, %k0
6260 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6260 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
62616261 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
62626262 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
62636263 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
63386338 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
63396339 ; NoVLX-NEXT: kmovw %edi, %k1
63406340 ; NoVLX-NEXT: kandw %k1, %k0, %k0
6341 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6341 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
63426342 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
63436343 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
63446344 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
64186418 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
64196419 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
64206420 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
6421 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6421 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
64226422 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
64236423 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
64246424 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
64996499 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
65006500 ; NoVLX-NEXT: kmovw %edi, %k1
65016501 ; NoVLX-NEXT: kandw %k0, %k1, %k0
6502 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6502 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
65036503 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
65046504 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
65056505 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
65806580 ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1
65816581 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
65826582 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
6583 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6583 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
65846584 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
65856585 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
65866586 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
66616661 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
66626662 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
66636663 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
6664 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6664 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
66656665 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
66666666 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
66676667 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
67466746 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
67476747 ; NoVLX-NEXT: kmovw %edi, %k1
67486748 ; NoVLX-NEXT: kandw %k1, %k0, %k0
6749 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6749 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
67506750 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
67516751 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
67526752 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
68326832 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
68336833 ; NoVLX-NEXT: kmovw %edi, %k1
68346834 ; NoVLX-NEXT: kandw %k1, %k0, %k0
6835 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6835 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
68366836 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
68376837 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
68386838 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
69176917 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
69186918 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
69196919 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
6920 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
6920 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
69216921 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
69226922 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
69236923 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
70037003 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
70047004 ; NoVLX-NEXT: kmovw %edi, %k1
70057005 ; NoVLX-NEXT: kandw %k0, %k1, %k0
7006 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7006 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
70077007 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
70087008 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
70097009 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
71027102 ; NoVLX-NEXT: .Lcfi255:
71037103 ; NoVLX-NEXT: .cfi_offset %r15, -24
71047104 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
7105 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7105 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
71067106 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
71077107 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
71087108 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
72267226 ; NoVLX-NEXT: .Lcfi263:
72277227 ; NoVLX-NEXT: .cfi_offset %r15, -24
72287228 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
7229 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7229 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
72307230 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
72317231 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
72327232 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
73537353 ; NoVLX-NEXT: .cfi_offset %r15, -24
73547354 ; NoVLX-NEXT: kmovw %edi, %k1
73557355 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
7356 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7356 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
73577357 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
73587358 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
73597359 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
74817481 ; NoVLX-NEXT: .cfi_offset %r15, -24
74827482 ; NoVLX-NEXT: kmovw %edi, %k1
74837483 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
7484 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7484 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
74857485 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
74867486 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
74877487 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
76097609 ; NoVLX-NEXT: .Lcfi287:
76107610 ; NoVLX-NEXT: .cfi_offset %r15, -24
76117611 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
7612 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7612 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
76137613 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
76147614 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
76157615 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
77377737 ; NoVLX-NEXT: .cfi_offset %r15, -24
77387738 ; NoVLX-NEXT: kmovw %edi, %k1
77397739 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
7740 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7740 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
77417741 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
77427742 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
77437743 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
78667866 ; NoVLX-NEXT: .Lcfi303:
78677867 ; NoVLX-NEXT: .cfi_offset %r15, -24
78687868 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
7869 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7869 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
78707870 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
78717871 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
78727872 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
79957995 ; NoVLX-NEXT: .Lcfi311:
79967996 ; NoVLX-NEXT: .cfi_offset %r15, -24
79977997 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
7998 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
7998 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
79997999 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
80008000 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
80018001 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
81278127 ; NoVLX-NEXT: .cfi_offset %r15, -24
81288128 ; NoVLX-NEXT: kmovw %edi, %k1
81298129 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
8130 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
8130 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
81318131 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
81328132 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
81338133 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
82608260 ; NoVLX-NEXT: .cfi_offset %r15, -24
82618261 ; NoVLX-NEXT: kmovw %edi, %k1
82628262 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
8263 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
8263 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
82648264 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
82658265 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
82668266 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
83938393 ; NoVLX-NEXT: .Lcfi335:
83948394 ; NoVLX-NEXT: .cfi_offset %r15, -24
83958395 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
8396 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
8396 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
83978397 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
83988398 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
83998399 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
85268526 ; NoVLX-NEXT: .cfi_offset %r15, -24
85278527 ; NoVLX-NEXT: kmovw %edi, %k1
85288528 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
8529 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
8529 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
85308530 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
85318531 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
85328532 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
97349734 ; NoVLX-NEXT: andq $-32, %rsp
97359735 ; NoVLX-NEXT: subq $64, %rsp
97369736 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
9737 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
9737 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
97389738 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
97399739 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
97409740 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
97849784 ; NoVLX-NEXT: andq $-32, %rsp
97859785 ; NoVLX-NEXT: subq $64, %rsp
97869786 ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
9787 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
9787 ; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1
97889788 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
97899789 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
97909790 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
98469846 ; NoVLX-NEXT: vmovd %ecx, %xmm1
98479847 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
98489848 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
9849 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
9849 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
98509850 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
98519851 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
98529852 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
99109910 ; NoVLX-NEXT: vmovd %ecx, %xmm1
99119911 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
99129912 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
9913 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
9913 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
99149914 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
99159915 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
99169916 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
99669966 ; NoVLX-NEXT: subq $64, %rsp
99679967 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
99689968 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
9969 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
9969 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
99709970 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
99719971 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
99729972 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1003010030 ; NoVLX-NEXT: vmovd %ecx, %xmm1
1003110031 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
1003210032 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
10033 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
10033 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1003410034 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1003510035 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1003610036 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1126411264 ; NoVLX-NEXT: subq $64, %rsp
1126511265 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
1126611266 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
11267 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
11267 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1126811268 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1126911269 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1127011270 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1131611316 ; NoVLX-NEXT: subq $64, %rsp
1131711317 ; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
1131811318 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
11319 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
11319 ; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1132011320 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1132111321 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1132211322 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1138811388 ; NoVLX-NEXT: kmovw %k1, %eax
1138911389 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1139011390 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
11391 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
11391 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1139211392 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1139311393 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1139411394 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1146211462 ; NoVLX-NEXT: kmovw %k1, %eax
1146311463 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1146411464 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
11465 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
11465 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1146611466 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1146711467 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1146811468 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1152011520 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
1152111521 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
1152211522 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
11523 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
11523 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1152411524 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1152511525 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1152611526 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1159411594 ; NoVLX-NEXT: kmovw %k1, %eax
1159511595 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1159611596 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
11597 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
11597 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1159811598 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1159911599 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1160011600 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1181511815 ; NoVLX-NEXT: andq $-32, %rsp
1181611816 ; NoVLX-NEXT: subq $32, %rsp
1181711817 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
11818 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
11818 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1181911819 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1182011820 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1182111821 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1188911889 ; NoVLX-NEXT: andq $-32, %rsp
1189011890 ; NoVLX-NEXT: subq $32, %rsp
1189111891 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
11892 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
11892 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1189311893 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1189411894 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1189511895 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1196611966 ; NoVLX-NEXT: subq $32, %rsp
1196711967 ; NoVLX-NEXT: kmovw %edi, %k1
1196811968 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
11969 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
11969 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1197011970 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1197111971 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1197211972 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1204412044 ; NoVLX-NEXT: subq $32, %rsp
1204512045 ; NoVLX-NEXT: kmovw %edi, %k1
1204612046 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
12047 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12047 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1204812048 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1204912049 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1205012050 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1212212122 ; NoVLX-NEXT: andq $-32, %rsp
1212312123 ; NoVLX-NEXT: subq $32, %rsp
1212412124 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
12125 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12125 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1212612126 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1212712127 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1212812128 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1220012200 ; NoVLX-NEXT: subq $32, %rsp
1220112201 ; NoVLX-NEXT: kmovw %edi, %k1
1220212202 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
12203 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12203 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1220412204 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1220512205 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1220612206 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1227912279 ; NoVLX-NEXT: andq $-32, %rsp
1228012280 ; NoVLX-NEXT: subq $64, %rsp
1228112281 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
12282 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12282 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1228312283 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1228412284 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1228512285 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1235812358 ; NoVLX-NEXT: andq $-32, %rsp
1235912359 ; NoVLX-NEXT: subq $64, %rsp
1236012360 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
12361 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12361 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1236212362 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1236312363 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1236412364 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1244012440 ; NoVLX-NEXT: subq $64, %rsp
1244112441 ; NoVLX-NEXT: kmovw %edi, %k1
1244212442 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
12443 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12443 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1244412444 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1244512445 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1244612446 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1252312523 ; NoVLX-NEXT: subq $64, %rsp
1252412524 ; NoVLX-NEXT: kmovw %edi, %k1
1252512525 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
12526 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12526 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1252712527 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1252812528 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1252912529 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1260612606 ; NoVLX-NEXT: andq $-32, %rsp
1260712607 ; NoVLX-NEXT: subq $64, %rsp
1260812608 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
12609 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12609 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1261012610 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1261112611 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1261212612 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1268912689 ; NoVLX-NEXT: subq $64, %rsp
1269012690 ; NoVLX-NEXT: kmovw %edi, %k1
1269112691 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
12692 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12692 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1269312693 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1269412694 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1269512695 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1279012790 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
1279112791 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1279212792 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
12793 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12793 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1279412794 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1279512795 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1279612796 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
1291612916 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
1291712917 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1291812918 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
12919 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
12919 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1292012920 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1292112921 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1292212922 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
1304513045 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1304613046 ; NoVLX-NEXT: kmovw %edi, %k1
1304713047 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
13048 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
13048 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1304913049 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1305013050 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1305113051 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
1317513175 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1317613176 ; NoVLX-NEXT: kmovw %edi, %k1
1317713177 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
13178 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
13178 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1317913179 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1318013180 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1318113181 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
1330513305 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
1330613306 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1330713307 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
13308 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
13308 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1330913309 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1331013310 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1331113311 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1343613436 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
1343713437 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1343813438 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
13439 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
13439 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1344013440 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1344113441 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1344213442 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1357013570 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1357113571 ; NoVLX-NEXT: kmovw %edi, %k1
1357213572 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
13573 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
13573 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1357413574 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1357513575 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1357613576 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1370513705 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1370613706 ; NoVLX-NEXT: kmovw %edi, %k1
1370713707 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
13708 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
13708 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1370913709 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1371013710 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1371113711 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1383313833 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1383413834 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1383513835 ; NoVLX-NEXT: movl (%rsp), %ecx
13836 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
13836 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1383713837 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1383813838 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1383913839 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1388313883 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1388413884 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1388513885 ; NoVLX-NEXT: movl (%rsp), %ecx
13886 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
13886 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1388713887 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1388813888 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1388913889 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1393113931 ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
1393213932 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
1393313933 ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
13934 ; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4
13934 ; NoVLX-NEXT: vxorps %xmm4, %xmm4, %xmm4
1393513935 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
1393613936 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
1393713937 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
1399313993 ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
1399413994 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
1399513995 ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
13996 ; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3
13996 ; NoVLX-NEXT: vxorps %xmm3, %xmm3, %xmm3
1399713997 ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
1399813998 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
1399913999 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
1417514175 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
1417614176 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
1417714177 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14178 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14178 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1417914179 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1418014180 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1418114181 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1425114251 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
1425214252 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
1425314253 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14254 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14254 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1425514255 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1425614256 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1425714257 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1433014330 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
1433114331 ; NoVLX-NEXT: kmovw %edi, %k1
1433214332 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
14333 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14333 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1433414334 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1433514335 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1433614336 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1441014410 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
1441114411 ; NoVLX-NEXT: kmovw %edi, %k1
1441214412 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
14413 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14413 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1441414414 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1441514415 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1441614416 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1449014490 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
1449114491 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
1449214492 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14493 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14493 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1449414494 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1449514495 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1449614496 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1457114571 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
1457214572 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
1457314573 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14574 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14574 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1457514575 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1457614576 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1457714577 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1465514655 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
1465614656 ; NoVLX-NEXT: kmovw %edi, %k1
1465714657 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
14658 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14658 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1465914659 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1466014660 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1466114661 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1474014740 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
1474114741 ; NoVLX-NEXT: kmovw %edi, %k1
1474214742 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
14743 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14743 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1474414744 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1474514745 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1474614746 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1484114841 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1484214842 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1484314843 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14844 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14844 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1484514845 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1484614846 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1484714847 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
1496814968 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1496914969 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1497014970 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14971 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
14971 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1497214972 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1497314973 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1497414974 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
1509815098 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1509915099 ; NoVLX-NEXT: kmovw %edi, %k1
1510015100 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
15101 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
15101 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1510215102 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1510315103 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1510415104 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
1522915229 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1523015230 ; NoVLX-NEXT: kmovw %edi, %k1
1523115231 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
15232 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
15232 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1523315233 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1523415234 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1523515235 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1
1536015360 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1536115361 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1536215362 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15363 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
15363 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1536415364 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1536515365 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1536615366 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1549215492 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1549315493 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1549415494 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15495 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
15495 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1549615496 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1549715497 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1549815498 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1562715627 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1562815628 ; NoVLX-NEXT: kmovw %edi, %k1
1562915629 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
15630 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
15630 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1563115631 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1563215632 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1563315633 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1576315763 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1576415764 ; NoVLX-NEXT: kmovw %edi, %k1
1576515765 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
15766 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
15766 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1576715767 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1576815768 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1576915769 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1619316193 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1619416194 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1619516195 ; NoVLX-NEXT: movl (%rsp), %ecx
16196 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
16196 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1619716197 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1619816198 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1619916199 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1646016460 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1646116461 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1646216462 ; NoVLX-NEXT: movl (%rsp), %ecx
16463 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
16463 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1646416464 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1646516465 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1646616466 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1667616676 ; NoVLX-NEXT: movq %rax, %rcx
1667716677 ; NoVLX-NEXT: shrq $48, %rax
1667816678 ; NoVLX-NEXT: shrq $32, %rcx
16679 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
16679 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1668016680 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
1668116681 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
1668216682 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
1695516955 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
1695616956 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm2
1695716957 ; NoVLX-NEXT: shrq $48, %rax
16958 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
16958 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1695916959 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
1696016960 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3
1696116961 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm3, %ymm3
1828518285 ; NoVLX-NEXT: andq $-32, %rsp
1828618286 ; NoVLX-NEXT: subq $64, %rsp
1828718287 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
18288 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
18288 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1828918289 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1829018290 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1829118291 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1833518335 ; NoVLX-NEXT: andq $-32, %rsp
1833618336 ; NoVLX-NEXT: subq $64, %rsp
1833718337 ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
18338 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
18338 ; NoVLX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1833918339 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1834018340 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1834118341 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1840518405 ; NoVLX-NEXT: kmovw %k1, %eax
1840618406 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1840718407 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
18408 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
18408 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1840918409 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1841018410 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1841118411 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1847718477 ; NoVLX-NEXT: kmovw %k1, %eax
1847818478 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1847918479 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
18480 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
18480 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1848118481 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1848218482 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1848318483 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1853318533 ; NoVLX-NEXT: subq $64, %rsp
1853418534 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1853518535 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
18536 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
18536 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1853718537 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1853818538 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1853918539 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1860518605 ; NoVLX-NEXT: kmovw %k1, %eax
1860618606 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1860718607 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
18608 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
18608 ; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1860918609 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1861018610 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1861118611 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1885218852 ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1
1885318853 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
1885418854 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
18855 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
18855 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1885618856 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1885718857 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1885818858 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1892818928 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
1892918929 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
1893018930 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
18931 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
18931 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1893218932 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1893318933 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1893418934 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1900819008 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
1900919009 ; NoVLX-NEXT: kmovw %edi, %k1
1901019010 ; NoVLX-NEXT: kandw %k1, %k0, %k0
19011 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
19011 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1901219012 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1901319013 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1901419014 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1908919089 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
1909019090 ; NoVLX-NEXT: kmovw %edi, %k1
1909119091 ; NoVLX-NEXT: kandw %k1, %k0, %k0
19092 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
19092 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1909319093 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1909419094 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1909519095 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1916919169 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
1917019170 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
1917119171 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
19172 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
19172 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1917319173 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1917419174 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1917519175 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1925019250 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
1925119251 ; NoVLX-NEXT: kmovw %edi, %k1
1925219252 ; NoVLX-NEXT: kandw %k0, %k1, %k0
19253 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
19253 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1925419254 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
1925519255 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
1925619256 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1
1933119331 ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1
1933219332 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0
1933319333 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
19334 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
19334