llvm.org GIT mirror llvm / 28178cc
[X86] Add test case for inserting/extracting from two shuffled vectors. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369871 91177308-0d34-0410-b5e6-96231b3b80d8 Amaury Sechet 23 days ago
1 changed file(s) with 82 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
28982898 %8 = insertelement <8 x i16> %7, i16 %a7, i32 7
28992899 ret <8 x i16> %8
29002900 }
2901
2902 define <8 x i16> @shuffle_extract_insert_double(<8 x i16> %a, <8 x i16> %b) {
2903 ; SSE2-LABEL: shuffle_extract_insert_double:
2904 ; SSE2: # %bb.0:
2905 ; SSE2-NEXT: movd %xmm0, %eax
2906 ; SSE2-NEXT: pextrw $4, %xmm0, %r8d
2907 ; SSE2-NEXT: pextrw $6, %xmm0, %edx
2908 ; SSE2-NEXT: pextrw $3, %xmm1, %esi
2909 ; SSE2-NEXT: pextrw $5, %xmm1, %edi
2910 ; SSE2-NEXT: pextrw $7, %xmm1, %ecx
2911 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2912 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2913 ; SSE2-NEXT: pinsrw $2, %eax, %xmm0
2914 ; SSE2-NEXT: pinsrw $3, %esi, %xmm0
2915 ; SSE2-NEXT: pinsrw $4, %edx, %xmm0
2916 ; SSE2-NEXT: pinsrw $5, %edi, %xmm0
2917 ; SSE2-NEXT: pinsrw $6, %r8d, %xmm0
2918 ; SSE2-NEXT: pinsrw $7, %ecx, %xmm0
2919 ; SSE2-NEXT: retq
2920 ;
2921 ; SSSE3-LABEL: shuffle_extract_insert_double:
2922 ; SSSE3: # %bb.0:
2923 ; SSSE3-NEXT: movd %xmm0, %eax
2924 ; SSSE3-NEXT: pextrw $4, %xmm0, %r8d
2925 ; SSSE3-NEXT: pextrw $6, %xmm0, %edx
2926 ; SSSE3-NEXT: pextrw $3, %xmm1, %esi
2927 ; SSSE3-NEXT: pextrw $5, %xmm1, %edi
2928 ; SSSE3-NEXT: pextrw $7, %xmm1, %ecx
2929 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2930 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2931 ; SSSE3-NEXT: pinsrw $2, %eax, %xmm0
2932 ; SSSE3-NEXT: pinsrw $3, %esi, %xmm0
2933 ; SSSE3-NEXT: pinsrw $4, %edx, %xmm0
2934 ; SSSE3-NEXT: pinsrw $5, %edi, %xmm0
2935 ; SSSE3-NEXT: pinsrw $6, %r8d, %xmm0
2936 ; SSSE3-NEXT: pinsrw $7, %ecx, %xmm0
2937 ; SSSE3-NEXT: retq
2938 ;
2939 ; SSE41-LABEL: shuffle_extract_insert_double:
2940 ; SSE41: # %bb.0:
2941 ; SSE41-NEXT: movd %xmm0, %eax
2942 ; SSE41-NEXT: pextrw $4, %xmm0, %ecx
2943 ; SSE41-NEXT: pextrw $6, %xmm0, %edx
2944 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2945 ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2946 ; SSE41-NEXT: pinsrw $2, %eax, %xmm0
2947 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2948 ; SSE41-NEXT: pinsrw $4, %edx, %xmm0
2949 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
2950 ; SSE41-NEXT: pinsrw $6, %ecx, %xmm0
2951 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2952 ; SSE41-NEXT: retq
2953 ;
2954 ; AVX-LABEL: shuffle_extract_insert_double:
2955 ; AVX: # %bb.0:
2956 ; AVX-NEXT: vmovd %xmm0, %eax
2957 ; AVX-NEXT: vpextrw $4, %xmm0, %ecx
2958 ; AVX-NEXT: vpextrw $6, %xmm0, %edx
2959 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2960 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2961 ; AVX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
2962 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2963 ; AVX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
2964 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
2965 ; AVX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
2966 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2967 ; AVX-NEXT: retq
2968 %a0 = extractelement <8 x i16> %a, i32 0
2969 %a4 = extractelement <8 x i16> %a, i32 4
2970 %a6 = extractelement <8 x i16> %a, i32 6
2971 %b11 = extractelement <8 x i16> %b, i32 3
2972 %b13 = extractelement <8 x i16> %b, i32 5
2973 %b15 = extractelement <8 x i16> %b, i32 7
2974 %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32>
2975 %2 = insertelement <8 x i16> %1, i16 %a0, i32 2
2976 %3 = insertelement <8 x i16> %2, i16 %b11, i32 3
2977 %4 = insertelement <8 x i16> %3, i16 %a6, i32 4
2978 %5 = insertelement <8 x i16> %4, i16 %b13, i32 5
2979 %6 = insertelement <8 x i16> %5, i16 %a4, i32 6
2980 %7 = insertelement <8 x i16> %6, i16 %b15, i32 7
2981 ret <8 x i16> %7
2982 }