llvm.org GIT mirror llvm / 3c98ce2
Add AutoUpgrade support for the SSE4 ptest intrinsics. Patch by Michael Kuperstein. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158295 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 8 years ago
3 changed file(s) with 103 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
2424 #include
2525 using namespace llvm;
2626
27 // Upgrade the declarations of the SSE4.1 functions whose arguments have
28 // changed their type from v4f32 to v2i64.
29 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
30 Function *&NewFn) {
31 // Check whether this is an old version of the function, which received
32 // v4f32 arguments.
33 Type *Arg0Type = F->getFunctionType()->getParamType(0);
34 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
35 return false;
36
37 // Yes, it's old, replace it with new version.
38 F->setName(F->getName() + ".old");
39 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
40 return true;
41 }
2742
2843 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
2944 assert(F && "Illegal to upgrade a non-existent Function.");
6479 NewFn = 0;
6580 return true;
6681 }
82 // SSE4.1 ptest functions may have an old signature.
83 if (Name.startswith("x86.sse41.ptest")) {
84 if (Name == "x86.sse41.ptestc")
85 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
86 if (Name == "x86.sse41.ptestz")
87 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
88 if (Name == "x86.sse41.ptestnzc")
89 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
90 }
6791 // Fix the FMA4 intrinsics to remove the 4
6892 if (Name.startswith("x86.fma4.")) {
6993 F->setName("llvm.x86.fma" + Name.substr(8));
7498 }
7599 }
76100
77 // This may not belong here. This function is effectively being overloaded
78 // to both detect an intrinsic which needs upgrading, and to provide the
79 // upgraded form of the intrinsic. We should perhaps have two separate
101 // This may not belong here. This function is effectively being overloaded
102 // to both detect an intrinsic which needs upgrading, and to provide the
103 // upgraded form of the intrinsic. We should perhaps have two separate
80104 // functions for this.
81105 return false;
82106 }
98122 return false;
99123 }
100124
101 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
102 // upgraded intrinsic. All argument and return casting must be provided in
125 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
126 // upgraded intrinsic. All argument and return casting must be provided in
103127 // order to seamlessly integrate with existing context.
104128 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
105129 Function *F = CI->getCalledFunction();
241265 return;
242266 }
243267
268 StringRef Name = CI->getName();
269
244270 switch (NewFn->getIntrinsicID()) {
245271 default:
246272 llvm_unreachable("Unknown function for CallInst upgrade.");
249275 case Intrinsic::cttz:
250276 assert(CI->getNumArgOperands() == 1 &&
251277 "Mismatch between function args and call args");
252 StringRef Name = CI->getName();
253278 CI->setName(Name + ".old");
254279 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
255280 Builder.getFalse(), Name));
281 CI->eraseFromParent();
282 return;
283
284 case Intrinsic::x86_sse41_ptestc:
285 case Intrinsic::x86_sse41_ptestz:
286 case Intrinsic::x86_sse41_ptestnzc:
287 // The arguments for these intrinsics used to be v4f32, and changed
288 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
289 // So, the only thing required is a bitcast for both arguments.
290 // First, check the arguments have the old type.
291 Value *Arg0 = CI->getArgOperand(0);
292 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
293 return;
294
295 // Old intrinsic, add bitcasts
296 Value *Arg1 = CI->getArgOperand(1);
297
298 Value *BC0 =
299 Builder.CreateBitCast(Arg0,
300 VectorType::get(Type::getInt64Ty(C), 2),
301 "cast");
302 Value *BC1 =
303 Builder.CreateBitCast(Arg1,
304 VectorType::get(Type::getInt64Ty(C), 2),
305 "cast");
306
307 CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
308 CI->replaceAllUsesWith(NewCall);
256309 CI->eraseFromParent();
257310 return;
258311 }
0 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
1
2 define i32 @foo(<2 x i64> %bar) nounwind {
3 entry:
4 ; CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64>
5 %res1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %bar, <2 x i64> %bar)
6 ; CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64>
7 %res2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %bar, <2 x i64> %bar)
8 ; CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64>
9 %res3 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %bar, <2 x i64> %bar)
10 %add1 = add i32 %res1, %res2
11 %add2 = add i32 %add1, %res2
12 ret i32 %add2
13 }
14
15 ; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
16 ; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
17 ; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
18
19 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
20 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
21 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
0 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
1
2 define i32 @foo(<4 x float> %bar) nounwind {
3 entry:
4 ; CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64>
5 %res1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %bar, <4 x float> %bar)
6 ; CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64>
7 %res2 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %bar, <4 x float> %bar)
8 ; CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64>
9 %res3 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %bar, <4 x float> %bar)
10 %add1 = add i32 %res1, %res2
11 %add2 = add i32 %add1, %res2
12 ret i32 %add2
13 }
14
15 ; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
16 ; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
17 ; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
18
19 declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
20 declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
21 declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone