llvm.org GIT mirror llvm / 3254c74
[X86 CodeGen] Optimization of ZeroExtendLoad for v2i8 vector Load with zero-extend and sign-extend from v2i8 to v2i32 is "Legal" since SSE4.1 and may be performed using PMOVZXBD , PMOVSXBD instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313121 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 2 years ago
4 changed file(s) with 57 addition(s) and 47 deletion(s). Raw diff Collapse all Expand all
940940 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
941941 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
942942 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
943 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
943944 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
944945 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
945946 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
30883088 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
30893089 ; SSE-NEXT: retq
30903090 ;
3091 ; VEX-LABEL: uitofp_load_2i8_to_2f64:
3092 ; VEX: # BB#0:
3093 ; VEX-NEXT: movzwl (%rdi), %eax
3094 ; VEX-NEXT: vmovd %eax, %xmm0
3095 ; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3096 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
3097 ; VEX-NEXT: retq
3098 ;
3099 ; AVX512F-LABEL: uitofp_load_2i8_to_2f64:
3100 ; AVX512F: # BB#0:
3101 ; AVX512F-NEXT: movzwl (%rdi), %eax
3102 ; AVX512F-NEXT: vmovd %eax, %xmm0
3103 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3104 ; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
3105 ; AVX512F-NEXT: retq
3106 ;
3107 ; AVX512VL-LABEL: uitofp_load_2i8_to_2f64:
3108 ; AVX512VL: # BB#0:
3109 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
3110 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
3111 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3112 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
3113 ; AVX512VL-NEXT: retq
3114 ;
3115 ; AVX512DQ-LABEL: uitofp_load_2i8_to_2f64:
3116 ; AVX512DQ: # BB#0:
3117 ; AVX512DQ-NEXT: movzwl (%rdi), %eax
3118 ; AVX512DQ-NEXT: vmovd %eax, %xmm0
3119 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3120 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3121 ; AVX512DQ-NEXT: retq
3122 ;
3123 ; AVX512VLDQ-LABEL: uitofp_load_2i8_to_2f64:
3124 ; AVX512VLDQ: # BB#0:
3125 ; AVX512VLDQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
3126 ; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
3127 ; AVX512VLDQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3128 ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3129 ; AVX512VLDQ-NEXT: retq
3091 ; AVX-LABEL: uitofp_load_2i8_to_2f64:
3092 ; AVX: # BB#0:
3093 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
3094 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3095 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
3096 ; AVX-NEXT: retq
31303097 %ld = load <2 x i8>, <2 x i8> *%a
31313098 %cvt = uitofp <2 x i8> %ld to <2 x double>
31323099 ret <2 x double> %cvt
50785078 %b = sext <32 x i1> %a to <32 x i8>
50795079 ret <32 x i8> %b
50805080 }
5081
5082 define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) {
5083 ; SSE2-LABEL: sext_2i8_to_2i32:
5084 ; SSE2: # BB#0:
5085 ; SSE2-NEXT: movzwl (%rdi), %eax
5086 ; SSE2-NEXT: movd %eax, %xmm0
5087 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
5088 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
5089 ; SSE2-NEXT: psrad $24, %xmm0
5090 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
5091 ; SSE2-NEXT: paddq %xmm0, %xmm0
5092 ; SSE2-NEXT: retq
5093 ;
5094 ; SSSE3-LABEL: sext_2i8_to_2i32:
5095 ; SSSE3: # BB#0:
5096 ; SSSE3-NEXT: movzwl (%rdi), %eax
5097 ; SSSE3-NEXT: movd %eax, %xmm0
5098 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
5099 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
5100 ; SSSE3-NEXT: psrad $24, %xmm0
5101 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
5102 ; SSSE3-NEXT: paddq %xmm0, %xmm0
5103 ; SSSE3-NEXT: retq
5104 ;
5105 ; SSE41-LABEL: sext_2i8_to_2i32:
5106 ; SSE41: # BB#0:
5107 ; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
5108 ; SSE41-NEXT: paddq %xmm0, %xmm0
5109 ; SSE41-NEXT: retq
5110 ;
5111 ; AVX-LABEL: sext_2i8_to_2i32:
5112 ; AVX: # BB#0:
5113 ; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
5114 ; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
5115 ; AVX-NEXT: retq
5116 ;
5117 ; X32-SSE41-LABEL: sext_2i8_to_2i32:
5118 ; X32-SSE41: # BB#0:
5119 ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
5120 ; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0
5121 ; X32-SSE41-NEXT: paddq %xmm0, %xmm0
5122 ; X32-SSE41-NEXT: retl
5123 %x = load <2 x i8>, <2 x i8>* %addr, align 1
5124 %y = sext <2 x i8> %x to <2 x i32>
5125 %z = add <2 x i32>%y, %y
5126 ret <2 x i32>%z
5127 }
5128
22612261 ;
22622262 ; SSE41-LABEL: zext_2i8_to_2i32:
22632263 ; SSE41: # BB#0:
2264 ; SSE41-NEXT: movzwl (%rdi), %eax
2265 ; SSE41-NEXT: movd %eax, %xmm0
2266 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2267 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2264 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
22682265 ; SSE41-NEXT: paddq %xmm0, %xmm0
22692266 ; SSE41-NEXT: retq
22702267 ;
22712268 ; AVX-LABEL: zext_2i8_to_2i32:
22722269 ; AVX: # BB#0:
2273 ; AVX-NEXT: movzwl (%rdi), %eax
2274 ; AVX-NEXT: vmovd %eax, %xmm0
2275 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2276 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2270 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
22772271 ; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
22782272 ; AVX-NEXT: retq
22792273 %x = load <2 x i8>, <2 x i8>* %addr, align 1