llvm.org GIT mirror llvm / e29983e
Merging r370592: ------------------------------------------------------------------------ r370592 | rksimon | 2019-08-31 18:21:31 +0200 (Sat, 31 Aug 2019) | 3 lines [X86] EltsFromConsecutiveLoads - Don't confuse elt count with vector element count (PR43170) EltsFromConsecutiveLoads was assuming that the number of input elts was the same as the number of elements in the output vector type when creating a zeroing shuffle, causing an assert when subvectors were being combined instead of just scalars. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_90@371382 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 2 months ago
2 changed file(s) with 53 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
76497649 // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
76507650 // vector and a zero vector to clear out the zero elements.
76517651 if (!isAfterLegalize && VT.isVector()) {
7652 SmallVector ClearMask(NumElems, -1);
7653 for (unsigned i = 0; i < NumElems; ++i) {
7654 if (ZeroMask[i])
7655 ClearMask[i] = i + NumElems;
7656 else if (LoadMask[i])
7657 ClearMask[i] = i;
7652 unsigned NumMaskElts = VT.getVectorNumElements();
7653 if ((NumMaskElts % NumElems) == 0) {
7654 unsigned Scale = NumMaskElts / NumElems;
7655 SmallVector ClearMask(NumMaskElts, -1);
7656 for (unsigned i = 0; i < NumElems; ++i) {
7657 if (UndefMask[i])
7658 continue;
7659 int Offset = ZeroMask[i] ? NumMaskElts : 0;
7660 for (unsigned j = 0; j != Scale; ++j)
7661 ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset;
7662 }
7663 SDValue V = CreateLoad(VT, LDBase);
7664 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
7665 : DAG.getConstantFP(0.0, DL, VT);
7666 return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
76587667 }
7659 SDValue V = CreateLoad(VT, LDBase);
7660 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
7661 : DAG.getConstantFP(0.0, DL, VT);
7662 return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
76637668 }
76647669 }
76657670
935935 %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2
936936 ret <16 x float> %res
937937 }
938
939 %union1= type { <16 x float> }
940 @src1 = external dso_local local_unnamed_addr global %union1, align 64
941
942 define void @PR43170(<16 x float>* %a0) {
943 ; SKX64-LABEL: PR43170:
944 ; SKX64: # %bb.0: # %entry
945 ; SKX64-NEXT: vmovaps {{.*}}(%rip), %ymm0
946 ; SKX64-NEXT: vmovaps %zmm0, (%rdi)
947 ; SKX64-NEXT: vzeroupper
948 ; SKX64-NEXT: retq
949 ;
950 ; KNL64-LABEL: PR43170:
951 ; KNL64: # %bb.0: # %entry
952 ; KNL64-NEXT: vmovaps {{.*}}(%rip), %ymm0
953 ; KNL64-NEXT: vmovaps %zmm0, (%rdi)
954 ; KNL64-NEXT: retq
955 ;
956 ; SKX32-LABEL: PR43170:
957 ; SKX32: # %bb.0: # %entry
958 ; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax
959 ; SKX32-NEXT: vmovaps src1, %ymm0
960 ; SKX32-NEXT: vmovaps %zmm0, (%eax)
961 ; SKX32-NEXT: vzeroupper
962 ; SKX32-NEXT: retl
963 ;
964 ; KNL32-LABEL: PR43170:
965 ; KNL32: # %bb.0: # %entry
966 ; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax
967 ; KNL32-NEXT: vmovaps src1, %ymm0
968 ; KNL32-NEXT: vmovaps %zmm0, (%eax)
969 ; KNL32-NEXT: retl
970 entry:
971 %0 = load <8 x float>, <8 x float>* bitcast (%union1* @src1 to <8 x float>*), align 64
972 %1 = shufflevector <8 x float> %0, <8 x float> zeroinitializer, <16 x i32>
973 store <16 x float> %1, <16 x float>* %a0, align 64
974 ret void
975 }