llvm.org GIT mirror llvm / 189bce4
Remove 256-bit AVX non-temporal store intrinsics. Similar was previously done for 128-bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156375 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 7 years ago
4 changed file(s) with 57 addition(s) and 22 deletion(s). Raw diff Collapse all Expand all
12811281 Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], []>;
12821282 }
12831283
1284 // Cacheability support ops
1285 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1286 def int_x86_avx_movnt_dq_256 : GCCBuiltin<"__builtin_ia32_movntdq256">,
1287 Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty], []>;
1288 def int_x86_avx_movnt_pd_256 : GCCBuiltin<"__builtin_ia32_movntpd256">,
1289 Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], []>;
1290 def int_x86_avx_movnt_ps_256 : GCCBuiltin<"__builtin_ia32_movntps256">,
1291 Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], []>;
1292 }
1293
12941284 // Conditional load ops
12951285 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
12961286 def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
33343334 addr:$dst)],
33353335 IIC_SSE_MOVNT>, VEX;
33363336 }
3337
3338 def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
3339 (VMOVNTDQYmr addr:$dst, VR256:$src)>;
3340 def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
3341 (VMOVNTPDYmr addr:$dst, VR256:$src)>;
3342 def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
3343 (VMOVNTPSYmr addr:$dst, VR256:$src)>;
33443337
33453338 let AddedComplexity = 400 in { // Prefer non-temporal versions
33463339 def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
5656 Name.startswith("x86.sse2.pcmpgt.") ||
5757 Name.startswith("x86.avx2.pcmpeq.") ||
5858 Name.startswith("x86.avx2.pcmpgt.") ||
59 Name.startswith("x86.avx.vpermil.")) {
59 Name.startswith("x86.avx.vpermil.") ||
60 Name == "x86.avx.movnt.dq.256" ||
61 Name == "x86.avx.movnt.pd.256" ||
62 Name == "x86.avx.movnt.ps.256") {
6063 NewFn = 0;
6164 return true;
6265 }
117120 "pcmpgt");
118121 // need to sign extend since icmp returns vector of i1
119122 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
123 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
124 Name == "llvm.x86.avx.movnt.ps.256" ||
125 Name == "llvm.x86.avx.movnt.pd.256") {
126 IRBuilder<> Builder(C);
127 Builder.SetInsertPoint(CI->getParent(), CI);
128
129 Module *M = F->getParent();
130 SmallVector Elts;
131 Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
132 MDNode *Node = MDNode::get(C, Elts);
133
134 Value *Arg0 = CI->getArgOperand(0);
135 Value *Arg1 = CI->getArgOperand(1);
136
137 // Convert the type of the pointer to a pointer to the stored type.
138 Value *BC = Builder.CreateBitCast(Arg0,
139 PointerType::getUnqual(Arg1->getType()),
140 "cast");
141 StoreInst *SI = Builder.CreateStore(Arg1, BC);
142 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
143 SI->setAlignment(16);
144
145 // Remove intrinsic.
146 CI->eraseFromParent();
147 return;
120148 } else {
121149 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
122 if (Name.startswith("llvm.x86.avx.vpermil.pd.256"))
150 if (Name == "llvm.x86.avx.vpermil.pd.256")
123151 PD256 = true;
124 else if (Name.startswith("llvm.x86.avx.vpermil.pd"))
152 else if (Name == "llvm.x86.avx.vpermil.pd")
125153 PD128 = true;
126 else if (Name.startswith("llvm.x86.avx.vpermil.ps.256"))
154 else if (Name == "llvm.x86.avx.vpermil.ps.256")
127155 PS256 = true;
128 else if (Name.startswith("llvm.x86.avx.vpermil.ps"))
156 else if (Name == "llvm.x86.avx.vpermil.ps")
129157 PS128 = true;
130158
131159 if (PD256 || PD128 || PS256 || PS128) {
25542554 ret i32 %tmp
25552555 }
25562556 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
2557
2558 ; CHECK: movntdq
2559 define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind {
2560 %a2 = add <4 x i64> %a1,
2561 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind
2562 ret void
2563 }
2564 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
2565
2566 ; CHECK: movntps
2567 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
2568 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
2569 ret void
2570 }
2571 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
2572
2573 ; CHECK: movntpd
2574 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
2575 ; add operation forces the execution domain.
2576 %a2 = fadd <4 x double> %a1,
2577 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
2578 ret void
2579 }
2580 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind