llvm.org GIT mirror llvm / 63307c3
Added addition atomic instrinsics and, or, xor, min, and max. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50663 91177308-0d34-0410-b5e6-96231b3b80d8 Mon P Wang 12 years ago
15 changed file(s) with 597 addition(s) and 64 deletion(s). Raw diff Collapse all Expand all
235235 ///
236236 succ_iterator removeSuccessor(succ_iterator I);
237237
238 /// transferSuccessors - Transfers all the successors from MBB to this
239 /// machine basic block (i.e., copies all the successors fromMBB and
240 /// remove all the successors fromBB).
241 void transferSuccessors(MachineBasicBlock *fromMBB);
242
238243 /// isSuccessor - Return true if the specified MBB is a successor of this
239244 /// block.
240245 bool isSuccessor(MachineBasicBlock *MBB) const;
593593 // the return is always the original value in *ptr
594594 ATOMIC_SWAP,
595595
596 // Val, OUTCHAIN = ATOMIC_LSS(INCHAIN, ptr, amt)
597 // this corresponds to the atomic.lss intrinsic.
598 // *ptr - amt is stored to *ptr atomically.
599 // the return is always the original value in *ptr
600 ATOMIC_LSS,
601
602 // Val, OUTCHAIN = ATOMIC_L[OpName]S(INCHAIN, ptr, amt)
603 // this corresponds to the atomic.[OpName] intrinsic.
604 // op(*ptr, amt) is stored to *ptr atomically.
605 // the return is always the original value in *ptr
606 ATOMIC_LOAD_AND,
607 ATOMIC_LOAD_OR,
608 ATOMIC_LOAD_XOR,
609 ATOMIC_LOAD_MIN,
610 ATOMIC_LOAD_MAX,
611 ATOMIC_LOAD_UMIN,
612 ATOMIC_LOAD_UMAX,
613
596614 // BUILTIN_OP_END - This must be the last enum value in this list.
597615 BUILTIN_OP_END
598616 };
266266 def int_memory_barrier : Intrinsic<[llvm_void_ty, llvm_i1_ty, llvm_i1_ty,
267267 llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], []>;
268268
269 def int_atomic_lcs : Intrinsic<[llvm_anyint_ty,
270 LLVMPointerType>,
271 LLVMMatchType<0>, LLVMMatchType<0>],
269 def int_atomic_lcs : Intrinsic<[llvm_anyint_ty,
270 LLVMPointerType>,
271 LLVMMatchType<0>, LLVMMatchType<0>],
272 [IntrWriteArgMem]>,
273 GCCBuiltin<"__sync_val_compare_and_swap">;
274 def int_atomic_las : Intrinsic<[llvm_anyint_ty,
275 LLVMPointerType>,
276 LLVMMatchType<0>],
277 [IntrWriteArgMem]>,
278 GCCBuiltin<"__sync_fetch_and_add">;
279 def int_atomic_swap : Intrinsic<[llvm_anyint_ty,
280 LLVMPointerType>,
281 LLVMMatchType<0>],
272282 [IntrWriteArgMem]>,
273 GCCBuiltin<"__sync_val_compare_and_swap">;
274 def int_atomic_las : Intrinsic<[llvm_anyint_ty,
275 LLVMPointerType>,
276 LLVMMatchType<0>],
277 [IntrWriteArgMem]>,
278 GCCBuiltin<"__sync_fetch_and_add">;
279 def int_atomic_swap : Intrinsic<[llvm_anyint_ty,
280 LLVMPointerType>,
281 LLVMMatchType<0>],
282 [IntrWriteArgMem]>,
283 GCCBuiltin<"__sync_lock_test_and_set">;
284
283 GCCBuiltin<"__sync_lock_test_and_set">;
284 def int_atomic_lss : Intrinsic<[llvm_anyint_ty,
285 LLVMPointerType>,
286 LLVMMatchType<0>],
287 [IntrWriteArgMem]>,
288 GCCBuiltin<"__sync_fetch_and_sub">;
289 def int_atomic_load_and : Intrinsic<[llvm_anyint_ty,
290 LLVMPointerType>,
291 LLVMMatchType<0>],
292 [IntrWriteArgMem]>,
293 GCCBuiltin<"__sync_fetch_and_and">;
294 def int_atomic_load_or : Intrinsic<[llvm_anyint_ty,
295 LLVMPointerType>,
296 LLVMMatchType<0>],
297 [IntrWriteArgMem]>,
298 GCCBuiltin<"__sync_fetch_and_or">;
299 def int_atomic_load_xor : Intrinsic<[llvm_anyint_ty,
300 LLVMPointerType>,
301 LLVMMatchType<0>],
302 [IntrWriteArgMem]>,
303 GCCBuiltin<"__sync_fetch_and_xor">;
304 def int_atomic_load_min : Intrinsic<[llvm_anyint_ty,
305 LLVMPointerType>,
306 LLVMMatchType<0>],
307 [IntrWriteArgMem]>,
308 GCCBuiltin<"__sync_fetch_and_min">;
309 def int_atomic_load_max : Intrinsic<[llvm_anyint_ty,
310 LLVMPointerType>,
311 LLVMMatchType<0>],
312 [IntrWriteArgMem]>,
313 GCCBuiltin<"__sync_fetch_and_max">;
314 def int_atomic_load_umin : Intrinsic<[llvm_anyint_ty,
315 LLVMPointerType>,
316 LLVMMatchType<0>],
317 [IntrWriteArgMem]>,
318 GCCBuiltin<"__sync_fetch_and_umin">;
319 def int_atomic_load_umax : Intrinsic<[llvm_anyint_ty,
320 LLVMPointerType>,
321 LLVMMatchType<0>],
322 [IntrWriteArgMem]>,
323 GCCBuiltin<"__sync_fetch_and_umax">;
324
285325 //===-------------------------- Other Intrinsics --------------------------===//
286326 //
287327 def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
13391339 /// by the system, this holds the same type (e.g. i32 -> i32).
13401340 MVT::ValueType TransformToType[MVT::LAST_VALUETYPE];
13411341
1342 // Defines the capacity of the TargetLowering::OpActions table
1343 static const int OpActionsCapacity = 173;
1344
13421345 /// OpActions - For each operation and each value type, keep a LegalizeAction
13431346 /// that indicates how instruction selection should deal with the operation.
13441347 /// Most operations are Legal (aka, supported natively by the target), but
13451348 /// operations that are not should be described. Note that operations on
13461349 /// non-legal value types are not described here.
1347 uint64_t OpActions[156];
1350 uint64_t OpActions[OpActionsCapacity];
13481351
13491352 /// LoadXActions - For each load of load extension type and each value type,
13501353 /// keep a LegalizeAction that indicates how instruction selection should deal
13771380 /// TargetDAGCombineArray - Targets can specify ISD nodes that they would
13781381 /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(),
13791382 /// which sets a bit in this array.
1380 unsigned char TargetDAGCombineArray[160/(sizeof(unsigned char)*8)];
1383 unsigned char TargetDAGCombineArray[168/(sizeof(unsigned char)*8)];
13811384
13821385 /// PromoteToType - For operations that must be promoted to a specific type,
13831386 /// this holds the destination type. This map should be sparse, so don't hold
249249 std::find(Predecessors.begin(), Predecessors.end(), pred);
250250 assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
251251 Predecessors.erase(I);
252 }
253
254 void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB)
255 {
256 if (this == fromMBB)
257 return;
258
259 for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(),
260 end = fromMBB->succ_end(); iter != end; ++iter) {
261 addSuccessor(*iter);
262 }
263 while(!fromMBB->succ_empty())
264 fromMBB->removeSuccessor(fromMBB->succ_begin());
252265 }
253266
254267 bool MachineBasicBlock::isSuccessor(MachineBasicBlock *MBB) const {
12341234 break;
12351235 }
12361236
1237 case ISD::ATOMIC_LCS:
1237 case ISD::ATOMIC_LCS: {
1238 unsigned int num_operands = 4;
1239 assert(Node->getNumOperands() == num_operands && "Invalid Atomic node!");
1240 SDOperand Ops[4];
1241 for (unsigned int x = 0; x < num_operands; ++x)
1242 Ops[x] = LegalizeOp(Node->getOperand(x));
1243 Result = DAG.UpdateNodeOperands(Result, &Ops[0], num_operands);
1244
1245 switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
1246 default: assert(0 && "This action is not supported yet!");
1247 case TargetLowering::Custom:
1248 Result = TLI.LowerOperation(Result, DAG);
1249 break;
1250 case TargetLowering::Legal:
1251 break;
1252 }
1253 AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
1254 AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
1255 return Result.getValue(Op.ResNo);
1256 }
12381257 case ISD::ATOMIC_LAS:
1258 case ISD::ATOMIC_LSS:
1259 case ISD::ATOMIC_LOAD_AND:
1260 case ISD::ATOMIC_LOAD_OR:
1261 case ISD::ATOMIC_LOAD_XOR:
1262 case ISD::ATOMIC_LOAD_MIN:
1263 case ISD::ATOMIC_LOAD_MAX:
1264 case ISD::ATOMIC_LOAD_UMIN:
1265 case ISD::ATOMIC_LOAD_UMAX:
12391266 case ISD::ATOMIC_SWAP: {
1240 assert(((Node->getNumOperands() == 4 && Node->getOpcode() == ISD::ATOMIC_LCS) ||
1241 (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_LAS) ||
1242 (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_SWAP)) &&
1243 "Invalid Atomic node!");
1244 int num = Node->getOpcode() == ISD::ATOMIC_LCS ? 4 : 3;
1245 SDOperand Ops[4];
1246 for (int x = 0; x < num; ++x)
1267 unsigned int num_operands = 3;
1268 assert(Node->getNumOperands() == num_operands && "Invalid Atomic node!");
1269 SDOperand Ops[3];
1270 for (unsigned int x = 0; x < num_operands; ++x)
12471271 Ops[x] = LegalizeOp(Node->getOperand(x));
1248 Result = DAG.UpdateNodeOperands(Result, &Ops[0], num);
1272 Result = DAG.UpdateNodeOperands(Result, &Ops[0], num_operands);
12491273
12501274 switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
12511275 default: assert(0 && "This action is not supported yet!");
12521276 case TargetLowering::Custom:
12531277 Result = TLI.LowerOperation(Result, DAG);
12541278 break;
1279 case TargetLowering::Expand:
1280 Result = SDOperand(TLI.ExpandOperationResult(Op.Val, DAG),0);
1281 break;
12551282 case TargetLowering::Legal:
12561283 break;
12571284 }
12581285 AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
12591286 AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
12601287 return Result.getValue(Op.ResNo);
1261 }
1262
1288 }
12631289 case ISD::Constant: {
12641290 ConstantSDNode *CN = cast(Node);
12651291 unsigned opAction =
42414267 break;
42424268 }
42434269 case ISD::ATOMIC_LAS:
4270 case ISD::ATOMIC_LSS:
4271 case ISD::ATOMIC_LOAD_AND:
4272 case ISD::ATOMIC_LOAD_OR:
4273 case ISD::ATOMIC_LOAD_XOR:
4274 case ISD::ATOMIC_LOAD_MIN:
4275 case ISD::ATOMIC_LOAD_MAX:
4276 case ISD::ATOMIC_LOAD_UMIN:
4277 case ISD::ATOMIC_LOAD_UMAX:
42444278 case ISD::ATOMIC_SWAP: {
42454279 Tmp2 = PromoteOp(Node->getOperand(2));
42464280 Result = DAG.getAtomic(Node->getOpcode(), Node->getOperand(0),
28542854 SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain,
28552855 SDOperand Ptr, SDOperand Val,
28562856 MVT::ValueType VT) {
2857 assert((Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_SWAP)
2857 assert(( Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_LSS
2858 || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_LOAD_AND
2859 || Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR
2860 || Opcode == ISD::ATOMIC_LOAD_MIN || Opcode == ISD::ATOMIC_LOAD_MAX
2861 || Opcode == ISD::ATOMIC_LOAD_UMIN || Opcode == ISD::ATOMIC_LOAD_UMAX)
28582862 && "Invalid Atomic Op");
28592863 SDVTList VTs = getVTList(Val.getValueType(), MVT::Other);
28602864 FoldingSetNodeID ID;
42684272 case ISD::MEMBARRIER: return "MemBarrier";
42694273 case ISD::ATOMIC_LCS: return "AtomicLCS";
42704274 case ISD::ATOMIC_LAS: return "AtomicLAS";
4271 case ISD::ATOMIC_SWAP: return "AtomicSWAP";
4275 case ISD::ATOMIC_LSS: return "AtomicLSS";
4276 case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
4277 case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
4278 case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
4279 case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
4280 case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
4281 case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
4282 case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
4283 case ISD::ATOMIC_SWAP: return "AtomicSWAP";
42724284 case ISD::PCMARKER: return "PCMarker";
42734285 case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
42744286 case ISD::SRCVALUE: return "SrcValue";
731731 assert(0 && "UserOp2 should not exist at instruction selection time!");
732732 abort();
733733 }
734
735 private:
736 inline const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
737
734738 };
735739 } // end namespace llvm
736740
27682772 }
27692773 }
27702774
2775
2776 /// Inlined utility function to implement binary input atomic intrinsics for
2777 // visitIntrinsicCall: I is a call instruction
2778 // Op is the associated NodeType for I
2779 const char *
2780 SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
2781 SDOperand Root = getRoot();
2782 SDOperand O2 = getValue(I.getOperand(2));
2783 SDOperand L = DAG.getAtomic(Op, Root,
2784 getValue(I.getOperand(1)),
2785 O2, O2.getValueType());
2786 setValue(&I, L);
2787 DAG.setRoot(L.getValue(1));
2788 return 0;
2789 }
2790
27712791 /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
27722792 /// we want to emit this as a call to a named external function, return the name
27732793 /// otherwise lower it and return null.
32043224 DAG.setRoot(L.getValue(1));
32053225 return 0;
32063226 }
3207 case Intrinsic::atomic_las: {
3208 SDOperand Root = getRoot();
3209 SDOperand O2 = getValue(I.getOperand(2));
3210 SDOperand L = DAG.getAtomic(ISD::ATOMIC_LAS, Root,
3211 getValue(I.getOperand(1)),
3212 O2, O2.getValueType());
3213 setValue(&I, L);
3214 DAG.setRoot(L.getValue(1));
3215 return 0;
3216 }
3217 case Intrinsic::atomic_swap: {
3218 SDOperand Root = getRoot();
3219 SDOperand O2 = getValue(I.getOperand(2));
3220 SDOperand L = DAG.getAtomic(ISD::ATOMIC_SWAP, Root,
3221 getValue(I.getOperand(1)),
3222 O2, O2.getValueType());
3223 setValue(&I, L);
3224 DAG.setRoot(L.getValue(1));
3225 return 0;
3226 }
3227
3227 case Intrinsic::atomic_las:
3228 return implVisitBinaryAtomic(I, ISD::ATOMIC_LAS);
3229 case Intrinsic::atomic_lss:
3230 return implVisitBinaryAtomic(I, ISD::ATOMIC_LSS);
3231 case Intrinsic::atomic_load_and:
3232 return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
3233 case Intrinsic::atomic_load_or:
3234 return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
3235 case Intrinsic::atomic_load_xor:
3236 return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
3237 case Intrinsic::atomic_load_min:
3238 return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
3239 case Intrinsic::atomic_load_max:
3240 return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
3241 case Intrinsic::atomic_load_umin:
3242 return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
3243 case Intrinsic::atomic_load_umax:
3244 return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
3245 case Intrinsic::atomic_swap:
3246 return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
32283247 }
32293248 }
32303249
45184537 AU.setPreservesAll();
45194538 }
45204539
4521
4522
45234540 bool SelectionDAGISel::runOnFunction(Function &Fn) {
45244541 // Get alias analysis for load/store combining.
45254542 AA = &getAnalysis();
164164
165165 TargetLowering::TargetLowering(TargetMachine &tm)
166166 : TM(tm), TD(TM.getTargetData()) {
167 assert(ISD::BUILTIN_OP_END <= 156 &&
167 assert(ISD::BUILTIN_OP_END <= OpActionsCapacity &&
168168 "Fixed size array in TargetLowering is not large enough!");
169169 // All operations default to being supported.
170170 memset(OpActions, 0, sizeof(OpActions));
357357 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
358358 def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2,
359359 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
360 def atomic_lss : SDNode<"ISD::ATOMIC_LSS" , STDAtomic2,
361 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
362 def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , STDAtomic2,
363 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
364 def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , STDAtomic2,
365 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
366 def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , STDAtomic2,
367 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
368 def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", STDAtomic2,
369 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
370 def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", STDAtomic2,
371 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
372 def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", STDAtomic2,
373 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
374 def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", STDAtomic2,
375 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
360376
361377 // Do not use ld, st directly. Use load, extload, sextload, zextload, store,
362378 // and truncst (see below).
291291 if (!Subtarget->hasSSE2())
292292 setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
293293
294 // Expand certain atomics
294295 setOperationAction(ISD::ATOMIC_LCS , MVT::i8, Custom);
295296 setOperationAction(ISD::ATOMIC_LCS , MVT::i16, Custom);
296297 setOperationAction(ISD::ATOMIC_LCS , MVT::i32, Custom);
297298 setOperationAction(ISD::ATOMIC_LCS , MVT::i64, Custom);
299 setOperationAction(ISD::ATOMIC_LSS , MVT::i32, Expand);
298300
299301 // Use the default ISD::LOCATION, ISD::DECLARE expansion.
300302 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
55105512 return DAG.getNode(ISD::MERGE_VALUES, Tys, ResultVal, cpOutH.getValue(1)).Val;
55115513 }
55125514
5515 SDNode* X86TargetLowering::ExpandATOMIC_LSS(SDNode* Op, SelectionDAG &DAG) {
5516 MVT::ValueType T = cast(Op)->getVT();
5517 assert (T == MVT::i32 && "Only know how to expand i32 LSS");
5518 SDOperand negOp = DAG.getNode(ISD::SUB, T,
5519 DAG.getConstant(0, T), Op->getOperand(2));
5520 return DAG.getAtomic(ISD::ATOMIC_LAS, Op->getOperand(0),
5521 Op->getOperand(1), negOp, T).Val;
5522 }
5523
55135524 /// LowerOperation - Provide custom lowering hooks for some operations.
55145525 ///
55155526 SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
55675578 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
55685579 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
55695580 case ISD::ATOMIC_LCS: return ExpandATOMIC_LCS(N, DAG);
5581 case ISD::ATOMIC_LSS: return ExpandATOMIC_LSS(N,DAG);
55705582 }
55715583 }
55725584
57315743 // X86 Scheduler Hooks
57325744 //===----------------------------------------------------------------------===//
57335745
5746 // private utility function
5747 MachineBasicBlock *
5748 X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
5749 MachineBasicBlock *MBB,
5750 unsigned regOpc,
5751 unsigned immOpc) {
5752 // For the atomic bitwise operator, we generate
5753 // thisMBB:
5754 // newMBB:
5755 // ld EAX = [bitinstr.addr]
5756 // mov t1 = EAX
5757 // op t2 = t1, [bitinstr.val]
5758 // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
5759 // bz newMBB
5760 // fallthrough -->nextMBB
5761 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5762 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
5763 ilist::iterator MBBIter = MBB;
5764 ++MBBIter;
5765
5766 /// First build the CFG
5767 MachineFunction *F = MBB->getParent();
5768 MachineBasicBlock *thisMBB = MBB;
5769 MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB);
5770 MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB);
5771 F->getBasicBlockList().insert(MBBIter, newMBB);
5772 F->getBasicBlockList().insert(MBBIter, nextMBB);
5773
5774 // Move all successors to thisMBB to nextMBB
5775 nextMBB->transferSuccessors(thisMBB);
5776
5777 // Update thisMBB to fall through to newMBB
5778 thisMBB->addSuccessor(newMBB);
5779
5780 // newMBB jumps to itself and fall through to nextMBB
5781 newMBB->addSuccessor(nextMBB);
5782 newMBB->addSuccessor(newMBB);
5783
5784 // Insert instructions into newMBB based on incoming instruction
5785 assert(bInstr->getNumOperands() < 8 && "unexpected number of operands");
5786 MachineOperand& destOper = bInstr->getOperand(0);
5787 MachineOperand* argOpers[6];
5788 int numArgs = bInstr->getNumOperands() - 1;
5789 for (int i=0; i < numArgs; ++i)
5790 argOpers[i] = &bInstr->getOperand(i+1);
5791
5792 // x86 address has 4 operands: base, index, scale, and displacement
5793 int lastAddrIndx = 3; // [0,3]
5794 int valArgIndx = 4;
5795
5796 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), X86::EAX);
5797 for (int i=0; i <= lastAddrIndx; ++i)
5798 (*MIB).addOperand(*argOpers[i]);
5799
5800 unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
5801 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t1);
5802 MIB.addReg(X86::EAX);
5803
5804 unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
5805 assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm())
5806 && "invalid operand");
5807 if (argOpers[valArgIndx]->isReg())
5808 MIB = BuildMI(newMBB, TII->get(regOpc), t2);
5809 else
5810 MIB = BuildMI(newMBB, TII->get(immOpc), t2);
5811 MIB.addReg(t1);
5812 (*MIB).addOperand(*argOpers[valArgIndx]);
5813
5814 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32));
5815 for (int i=0; i <= lastAddrIndx; ++i)
5816 (*MIB).addOperand(*argOpers[i]);
5817 MIB.addReg(t2);
5818
5819 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg());
5820 MIB.addReg(X86::EAX);
5821
5822 // insert branch
5823 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
5824
5825 delete bInstr; // The pseudo instruction is gone now.
5826 return nextMBB;
5827 }
5828
5829 // private utility function
5830 MachineBasicBlock *
5831 X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
5832 MachineBasicBlock *MBB,
5833 unsigned cmovOpc) {
5834 // For the atomic min/max operator, we generate
5835 // thisMBB:
5836 // newMBB:
5837 // ld EAX = [min/max.addr]
5838 // mov t1 = EAX
5839 // mov t2 = [min/max.val]
5840 // cmp t1, t2
5841 // cmov[cond] t2 = t1
5842 // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
5843 // bz newMBB
5844 // fallthrough -->nextMBB
5845 //
5846 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5847 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
5848 ilist::iterator MBBIter = MBB;
5849 ++MBBIter;
5850
5851 /// First build the CFG
5852 MachineFunction *F = MBB->getParent();
5853 MachineBasicBlock *thisMBB = MBB;
5854 MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB);
5855 MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB);
5856 F->getBasicBlockList().insert(MBBIter, newMBB);
5857 F->getBasicBlockList().insert(MBBIter, nextMBB);
5858
5859 // Move all successors to thisMBB to nextMBB
5860 nextMBB->transferSuccessors(thisMBB);
5861
5862 // Update thisMBB to fall through to newMBB
5863 thisMBB->addSuccessor(newMBB);
5864
5865 // newMBB jumps to newMBB and fall through to nextMBB
5866 newMBB->addSuccessor(nextMBB);
5867 newMBB->addSuccessor(newMBB);
5868
5869 // Insert instructions into newMBB based on incoming instruction
5870 assert(mInstr->getNumOperands() < 8 && "unexpected number of operands");
5871 MachineOperand& destOper = mInstr->getOperand(0);
5872 MachineOperand* argOpers[6];
5873 int numArgs = mInstr->getNumOperands() - 1;
5874 for (int i=0; i < numArgs; ++i)
5875 argOpers[i] = &mInstr->getOperand(i+1);
5876
5877 // x86 address has 4 operands: base, index, scale, and displacement
5878 int lastAddrIndx = 3; // [0,3]
5879 int valArgIndx = 4;
5880
5881 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), X86::EAX);
5882 for (int i=0; i <= lastAddrIndx; ++i)
5883 (*MIB).addOperand(*argOpers[i]);
5884
5885 unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
5886 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t1);
5887 MIB.addReg(X86::EAX);
5888
5889 // We only support register and immediate values
5890 assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm())
5891 && "invalid operand");
5892
5893 unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
5894 if (argOpers[valArgIndx]->isReg())
5895 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
5896 else
5897 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
5898 (*MIB).addOperand(*argOpers[valArgIndx]);
5899
5900 MIB = BuildMI(newMBB, TII->get(X86::CMP32rr));
5901 MIB.addReg(t1);
5902 MIB.addReg(t2);
5903
5904 // Generate movc
5905 unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
5906 MIB = BuildMI(newMBB, TII->get(cmovOpc),t3);
5907 MIB.addReg(t2);
5908 MIB.addReg(t1);
5909
5910 // Cmp and exchange if none has modified the memory location
5911 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32));
5912 for (int i=0; i <= lastAddrIndx; ++i)
5913 (*MIB).addOperand(*argOpers[i]);
5914 MIB.addReg(t3);
5915
5916 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg());
5917 MIB.addReg(X86::EAX);
5918
5919 // insert branch
5920 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
5921
5922 delete mInstr; // The pseudo instruction is gone now.
5923 return nextMBB;
5924 }
5925
5926
57345927 MachineBasicBlock *
57355928 X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
57365929 MachineBasicBlock *BB) {
57655958 MachineFunction *F = BB->getParent();
57665959 F->getBasicBlockList().insert(It, copy0MBB);
57675960 F->getBasicBlockList().insert(It, sinkMBB);
5768 // Update machine-CFG edges by first adding all successors of the current
5961 // Update machine-CFG edges by transferring all successors of the current
57695962 // block to the new block which will contain the Phi node for the select.
5770 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
5771 e = BB->succ_end(); i != e; ++i)
5772 sinkMBB->addSuccessor(*i);
5773 // Next, remove all successors of the current block, and add the true
5774 // and fallthrough blocks as its successors.
5775 while(!BB->succ_empty())
5776 BB->removeSuccessor(BB->succ_begin());
5963 sinkMBB->transferSuccessors(BB);
5964
5965 // Add the true and fallthrough blocks as its successors.
57775966 BB->addSuccessor(copy0MBB);
57785967 BB->addSuccessor(sinkMBB);
57795968
58736062 delete MI; // The pseudo instruction is gone now.
58746063 return BB;
58756064 }
6065 case X86::ATOMAND32:
6066 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
6067 X86::AND32ri);
6068 case X86::ATOMOR32:
6069 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
6070 X86::OR32ri);
6071 case X86::ATOMXOR32:
6072 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
6073 X86::XOR32ri);
6074 case X86::ATOMMIN32:
6075 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
6076 case X86::ATOMMAX32:
6077 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
6078 case X86::ATOMUMIN32:
6079 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
6080 case X86::ATOMUMAX32:
6081 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
58766082 }
58776083 }
58786084
345345 virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
346346 MachineBasicBlock *MBB);
347347
348
348349 /// getTargetNodeName - This method returns the name of a target specific
349350 /// DAG node.
350351 virtual const char *getTargetNodeName(unsigned Opcode) const;
523524 SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG);
524525 SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG);
525526 SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG);
526
527 SDNode *ExpandATOMIC_LSS(SDNode *N, SelectionDAG &DAG);
528
527529 SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG,
528530 SDOperand Chain,
529531 SDOperand Dst, SDOperand Src,
536538 bool AlwaysInline,
537539 const Value *DstSV, uint64_t DstSVOff,
538540 const Value *SrcSV, uint64_t SrcSVOff);
541
542 /// Utility function to emit atomic bitwise operations (and, or, xor).
543 // It takes the bitwise instruction to expand, the associated machine basic
544 // block, and the associated X86 opcodes for reg/reg and reg/imm.
545 MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
546 MachineInstr *BInstr,
547 MachineBasicBlock *BB,
548 unsigned regOpc,
549 unsigned immOpc);
550
551 /// Utility function to emit atomic min and max. It takes the min/max
552 // instruction to expand, the associated basic block, and the associated
553 // cmov opcode for moving the min or max value.
554 MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
555 MachineBasicBlock *BB,
556 unsigned cmovOpc);
557
539558 };
540559 }
541560
25972597 TB, LOCK;
25982598 }
25992599
2600 // Atomic exchange and and, or, xor
2601 let Constraints = "$val = $dst", Defs = [EFLAGS],
2602 usesCustomDAGSchedInserter = 1 in {
2603 def ATOMAND32 : I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
2604 "#ATOMAND32 PSUEDO!",
2605 [(set GR32:$dst, (atomic_load_and addr:$ptr, GR32:$val))]>,
2606 TB, LOCK;
2607 }
2608
2609 let Constraints = "$val = $dst", Defs = [EFLAGS],
2610 usesCustomDAGSchedInserter = 1 in {
2611 def ATOMOR32 : I<0xC1, MRMSrcMem, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
2612 "#ATOMOR32 PSUEDO!",
2613 [(set GR32:$dst, (atomic_load_or addr:$ptr, GR32:$val))]>,
2614 TB, LOCK;
2615 }
2616
2617 let Constraints = "$val = $dst", Defs = [EFLAGS],
2618 usesCustomDAGSchedInserter = 1 in {
2619 def ATOMXOR32 : I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
2620 "#ATOMXOR32 PSUEDO!",
2621 [(set GR32:$dst, (atomic_load_xor addr:$ptr, GR32:$val))]>,
2622 TB, LOCK;
2623 }
2624
2625 let Constraints = "$val = $dst", Defs = [EFLAGS],
2626 usesCustomDAGSchedInserter = 1 in {
2627 def ATOMMIN32: I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
2628 "#ATOMMIN32 PSUEDO!",
2629 [(set GR32:$dst, (atomic_load_min addr:$ptr, GR32:$val))]>,
2630 TB, LOCK;
2631 }
2632
2633 let Constraints = "$val = $dst", Defs = [EFLAGS],
2634 usesCustomDAGSchedInserter = 1 in {
2635 def ATOMMAX32: I<0xC1, MRMSrcMem, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
2636 "#ATOMMAX32 PSUEDO!",
2637 [(set GR32:$dst, (atomic_load_max addr:$ptr, GR32:$val))]>,
2638 TB, LOCK;
2639 }
2640
2641 let Constraints = "$val = $dst", Defs = [EFLAGS],
2642 usesCustomDAGSchedInserter = 1 in {
2643 def ATOMUMIN32: I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
2644 "#ATOMUMIN32 PSUEDO!",
2645 [(set GR32:$dst, (atomic_load_umin addr:$ptr, GR32:$val))]>,
2646 TB, LOCK;
2647 }
2648
2649 let Constraints = "$val = $dst", Defs = [EFLAGS],
2650 usesCustomDAGSchedInserter = 1 in {
2651 def ATOMUMAX32: I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
2652 "#ATOMUMAX32 PSUEDO!",
2653 [(set GR32:$dst, (atomic_load_umax addr:$ptr, GR32:$val))]>,
2654 TB, LOCK;
2655 }
2656
26002657 //===----------------------------------------------------------------------===//
26012658 // Non-Instruction Patterns
26022659 //===----------------------------------------------------------------------===//
246246 , MaxInlineSizeThreshold(128)
247247 , Is64Bit(is64Bit)
248248 , TargetType(isELF) { // Default to ELF unless otherwise specified.
249
249
250250 // Determine default and user specified characteristics
251251 if (!FS.empty()) {
252252 // If feature string is not empty, parse features string.
0 ; RUN: llvm-as < %s | llc -march=x86 -o %t1 -f
1 ; RUN: grep "lock xaddl" %t1 | count 4
2 ; RUN: grep "lock cmpxchgl" %t1 | count 13
3 ; RUN: grep "xchgl" %t1 | count 14
4 ; RUN: grep "cmova" %t1 | count 2
5 ; RUN: grep "cmovb" %t1 | count 2
6 ; RUN: grep "cmovg" %t1 | count 2
7 ; RUN: grep "cmovl" %t1 | count 2
8
9 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
10
11 define void @main(i32 %argc, i8** %argv) {
12 entry:
13 %argc.addr = alloca i32 ; [#uses=1]
14 %argv.addr = alloca i8** ; [#uses=1]
15 %val1 = alloca i32 ; [#uses=2]
16 %val2 = alloca i32 ; [#uses=15]
17 %andt = alloca i32 ; [#uses=2]
18 %ort = alloca i32 ; [#uses=2]
19 %xort = alloca i32 ; [#uses=2]
20 %old = alloca i32 ; [#uses=18]
21 %temp = alloca i32 ; [#uses=2]
22 store i32 %argc, i32* %argc.addr
23 store i8** %argv, i8*** %argv.addr
24 store i32 0, i32* %val1
25 store i32 31, i32* %val2
26 store i32 3855, i32* %andt
27 store i32 3855, i32* %ort
28 store i32 3855, i32* %xort
29 store i32 4, i32* %temp
30 %tmp = load i32* %temp ; [#uses=1]
31 call i32 @llvm.atomic.las.i32( i32* %val1, i32 %tmp ) ; :0 [#uses=1]
32 store i32 %0, i32* %old
33 call i32 @llvm.atomic.lss.i32( i32* %val2, i32 30 ) ; :1 [#uses=1]
34 store i32 %1, i32* %old
35 call i32 @llvm.atomic.las.i32( i32* %val2, i32 1 ) ; :2 [#uses=1]
36 store i32 %2, i32* %old
37 call i32 @llvm.atomic.lss.i32( i32* %val2, i32 1 ) ; :3 [#uses=1]
38 store i32 %3, i32* %old
39 call i32 @llvm.atomic.load.and.i32( i32* %andt, i32 4080 ) ; :4 [#uses=1]
40 store i32 %4, i32* %old
41 call i32 @llvm.atomic.load.or.i32( i32* %ort, i32 4080 ) ; :5 [#uses=1]
42 store i32 %5, i32* %old
43 call i32 @llvm.atomic.load.xor.i32( i32* %xort, i32 4080 ) ; :6 [#uses=1]
44 store i32 %6, i32* %old
45 call i32 @llvm.atomic.load.min.i32( i32* %val2, i32 16 ) ; :7 [#uses=1]
46 store i32 %7, i32* %old
47 %neg = sub i32 0, 1 ; [#uses=1]
48 call i32 @llvm.atomic.load.min.i32( i32* %val2, i32 %neg ) ; :8 [#uses=1]
49 store i32 %8, i32* %old
50 call i32 @llvm.atomic.load.max.i32( i32* %val2, i32 1 ) ; :9 [#uses=1]
51 store i32 %9, i32* %old
52 call i32 @llvm.atomic.load.max.i32( i32* %val2, i32 0 ) ; :10 [#uses=1]
53 store i32 %10, i32* %old
54 call i32 @llvm.atomic.load.umax.i32( i32* %val2, i32 65535 ) ; :11 [#uses=1]
55 store i32 %11, i32* %old
56 call i32 @llvm.atomic.load.umax.i32( i32* %val2, i32 10 ) ; :12 [#uses=1]
57 store i32 %12, i32* %old
58 call i32 @llvm.atomic.load.umin.i32( i32* %val2, i32 1 ) ; :13 [#uses=1]
59 store i32 %13, i32* %old
60 call i32 @llvm.atomic.load.umin.i32( i32* %val2, i32 10 ) ; :14 [#uses=1]
61 store i32 %14, i32* %old
62 call i32 @llvm.atomic.swap.i32( i32* %val2, i32 1976 ) ; :15 [#uses=1]
63 store i32 %15, i32* %old
64 %neg1 = sub i32 0, 10 ; [#uses=1]
65 call i32 @llvm.atomic.lcs.i32( i32* %val2, i32 %neg1, i32 1 ) ; :16 [#uses=1]
66 store i32 %16, i32* %old
67 call i32 @llvm.atomic.lcs.i32( i32* %val2, i32 1976, i32 1 ) ; :17 [#uses=1]
68 store i32 %17, i32* %old
69 ret void
70 }
71
72 declare i32 @llvm.atomic.las.i32(i32*, i32) nounwind
73
74 declare i32 @llvm.atomic.lss.i32(i32*, i32) nounwind
75
76 declare i32 @llvm.atomic.load.and.i32(i32*, i32) nounwind
77
78 declare i32 @llvm.atomic.load.or.i32(i32*, i32) nounwind
79
80 declare i32 @llvm.atomic.load.xor.i32(i32*, i32) nounwind
81
82 declare i32 @llvm.atomic.load.min.i32(i32*, i32) nounwind
83
84 declare i32 @llvm.atomic.load.max.i32(i32*, i32) nounwind
85
86 declare i32 @llvm.atomic.load.umax.i32(i32*, i32) nounwind
87
88 declare i32 @llvm.atomic.load.umin.i32(i32*, i32) nounwind
89
90 declare i32 @llvm.atomic.swap.i32(i32*, i32) nounwind
91
92 declare i32 @llvm.atomic.lcs.i32(i32*, i32, i32) nounwind