llvm.org GIT mirror llvm / 24f934d
Update CreateMalloc so that its callers specify the size to allocate: MallocInst-autoupgrade users use non-TargetData-computed allocation sizes. Optimization uses use TargetData to compute the allocation size. Now that malloc calls can have constant sizes, update isArrayMallocHelper() to use TargetData to determine the size of the malloced type and the size of malloced arrays. Extend getMallocType() to support malloc calls that have non-bitcast uses. Update OptimizeGlobalAddressOfMalloc() to optimize malloc calls that have non-bitcast uses. The bitcast use of a malloc call has to be treated specially here because the uses of the bitcast need to be replaced and the bitcast needs to be erased (just like the malloc call) for OptimizeGlobalAddressOfMalloc() to work correctly. Update PerformHeapAllocSRoA() to optimize malloc calls that have non-bitcast uses. The bitcast use of the malloc is not handled specially here because ReplaceUsesOfMallocWithGlobal replaces through the bitcast use. Update OptimizeOnceStoredGlobal() to not care about the malloc calls' bitcast use. Update all globalopt malloc tests to not rely on autoupgraded-MallocInsts, but instead use explicit malloc calls with correct allocation sizes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@86077 91177308-0d34-0410-b5e6-96231b3b80d8 Victor Hernandez 9 years ago
19 changed file(s) with 245 addition(s) and 192 deletion(s). Raw diff Collapse all Expand all
8080 ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal));
8181 BasicBlock* BB = builder->GetInsertBlock();
8282 const Type* IntPtrTy = IntegerType::getInt32Ty(C);
83 ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, IntegerType::getInt8Ty(C),
84 val_mem, NULL, "arr");
83 const Type* Int8Ty = IntegerType::getInt8Ty(C);
84 Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty);
85 allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
86 ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem,
87 NULL, "arr");
8588 BB->getInstList().push_back(cast(ptr_arr));
8689
8790 //call void @llvm.memset.i32(i8 *%arr, i8 0, i32 %d, i32 1)
4949 const TargetData* TD);
5050
5151 /// getMallocType - Returns the PointerType resulting from the malloc call.
52 /// This PointerType is the result type of the call's only bitcast use.
53 /// If there is no unique bitcast use, then return NULL.
52 /// The PointerType depends on the number of bitcast uses of the malloc call:
53 /// 0: PointerType is the malloc calls' return type.
54 /// 1: PointerType is the bitcast's result type.
55 /// >1: Unique PointerType cannot be determined, return NULL.
5456 const PointerType* getMallocType(const CallInst* CI);
5557
56 /// getMallocAllocatedType - Returns the Type allocated by malloc call. This
57 /// Type is the result type of the call's only bitcast use. If there is no
58 /// unique bitcast use, then return NULL.
58 /// getMallocAllocatedType - Returns the Type allocated by malloc call.
59 /// The Type depends on the number of bitcast uses of the malloc call:
60 /// 0: PointerType is the malloc calls' return type.
61 /// 1: PointerType is the bitcast's result type.
62 /// >1: Unique PointerType cannot be determined, return NULL.
5963 const Type* getMallocAllocatedType(const CallInst* CI);
6064
6165 /// getMallocArraySize - Returns the array size of a malloc call. If the
898898 /// 3. Bitcast the result of the malloc call to the specified type.
899899 static Instruction *CreateMalloc(Instruction *InsertBefore,
900900 const Type *IntPtrTy, const Type *AllocTy,
901 Value *ArraySize = 0,
901 Value *AllocSize, Value *ArraySize = 0,
902902 const Twine &Name = "");
903903 static Instruction *CreateMalloc(BasicBlock *InsertAtEnd,
904904 const Type *IntPtrTy, const Type *AllocTy,
905 Value *ArraySize = 0, Function* MallocF = 0,
905 Value *AllocSize, Value *ArraySize = 0,
906 Function* MallocF = 0,
906907 const Twine &Name = "");
907908 /// CreateFree - Generate the IR for a call to the builtin free function.
908909 static void CreateFree(Value* Source, Instruction *InsertBefore);
1616 #include "llvm/Instructions.h"
1717 #include "llvm/Module.h"
1818 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/Target/TargetData.h"
1920 using namespace llvm;
2021
2122 //===----------------------------------------------------------------------===//
9596 if (!CI)
9697 return NULL;
9798
98 // Type must be known to determine array size.
99 // The size of the malloc's result type must be known to determine array size.
99100 const Type *T = getMallocAllocatedType(CI);
100 if (!T)
101 if (!T || !T->isSized() || !TD)
101102 return NULL;
102103
103104 Value *MallocArg = CI->getOperand(1);
105 const Type *ArgType = MallocArg->getType();
104106 ConstantExpr *CO = dyn_cast(MallocArg);
105107 BinaryOperator *BO = dyn_cast(MallocArg);
106108
107 Constant *ElementSize = ConstantExpr::getSizeOf(T);
108 ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize,
109 MallocArg->getType());
110 Constant *FoldedElementSize =
111 ConstantFoldConstantExpression(cast(ElementSize), Context, TD);
109 unsigned ElementSizeInt = TD->getTypeAllocSize(T);
110 if (const StructType *ST = dyn_cast(T))
111 ElementSizeInt = TD->getStructLayout(ST)->getSizeInBytes();
112 Constant *ElementSize = ConstantInt::get(ArgType, ElementSizeInt);
112113
113114 // First, check if CI is a non-array malloc.
114 if (CO && ((CO == ElementSize) ||
115 (FoldedElementSize && (CO == FoldedElementSize))))
115 if (CO && CO == ElementSize)
116116 // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
117 return ConstantInt::get(MallocArg->getType(), 1);
117 return ConstantInt::get(ArgType, 1);
118118
119119 // Second, check if CI is an array malloc whose array size can be determined.
120 if (isConstantOne(ElementSize) ||
121 (FoldedElementSize && isConstantOne(FoldedElementSize)))
120 if (isConstantOne(ElementSize))
122121 return MallocArg;
122
123 if (ConstantInt *CInt = dyn_cast(MallocArg))
124 if (CInt->getZExtValue() % ElementSizeInt == 0)
125 return ConstantInt::get(ArgType, CInt->getZExtValue() / ElementSizeInt);
123126
124127 if (!CO && !BO)
125128 return NULL;
127130 Value *Op0 = NULL;
128131 Value *Op1 = NULL;
129132 unsigned Opcode = 0;
130 if (CO && ((CO->getOpcode() == Instruction::Mul) ||
133 if (CO && ((CO->getOpcode() == Instruction::Mul) ||
131134 (CO->getOpcode() == Instruction::Shl))) {
132135 Op0 = CO->getOperand(0);
133136 Op1 = CO->getOperand(1);
134137 Opcode = CO->getOpcode();
135138 }
136 if (BO && ((BO->getOpcode() == Instruction::Mul) ||
139 if (BO && ((BO->getOpcode() == Instruction::Mul) ||
137140 (BO->getOpcode() == Instruction::Shl))) {
138141 Op0 = BO->getOperand(0);
139142 Op1 = BO->getOperand(1);
143146 // Determine array size if malloc's argument is the product of a mul or shl.
144147 if (Op0) {
145148 if (Opcode == Instruction::Mul) {
146 if ((Op1 == ElementSize) ||
147 (FoldedElementSize && (Op1 == FoldedElementSize)))
149 if (Op1 == ElementSize)
148150 // ArraySize * ElementSize
149151 return Op0;
150 if ((Op0 == ElementSize) ||
151 (FoldedElementSize && (Op0 == FoldedElementSize)))
152 if (Op0 == ElementSize)
152153 // ElementSize * ArraySize
153154 return Op1;
154155 }
160161 uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
161162 Value *Op1Pow = ConstantInt::get(Context,
162163 APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
163 if (Op0 == ElementSize || (FoldedElementSize && Op0 == FoldedElementSize))
164 if (Op0 == ElementSize)
164165 // ArraySize << log2(ElementSize)
165166 return Op1Pow;
166 if (Op1Pow == ElementSize ||
167 (FoldedElementSize && Op1Pow == FoldedElementSize))
167 if (Op1Pow == ElementSize)
168168 // ElementSize << log2(ArraySize)
169169 return Op0;
170170 }
204204 }
205205
206206 /// getMallocType - Returns the PointerType resulting from the malloc call.
207 /// This PointerType is the result type of the call's only bitcast use.
208 /// If there is no unique bitcast use, then return NULL.
207 /// The PointerType depends on the number of bitcast uses of the malloc call:
208 /// 0: PointerType is the calls' return type.
209 /// 1: PointerType is the bitcast's result type.
210 /// >1: Unique PointerType cannot be determined, return NULL.
209211 const PointerType *llvm::getMallocType(const CallInst *CI) {
210212 assert(isMalloc(CI) && "GetMallocType and not malloc call");
211213
212 const BitCastInst *BCI = NULL;
213
214 const PointerType *MallocType = NULL;
215 unsigned NumOfBitCastUses = 0;
216
214217 // Determine if CallInst has a bitcast use.
215218 for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end();
216219 UI != E; )
217 if ((BCI = dyn_cast(cast(*UI++))))
218 break;
219
220 // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's
221 // destination type.
222 if (BCI && CI->hasOneUse())
223 return cast(BCI->getDestTy());
220 if (const BitCastInst *BCI = dyn_cast(*UI++)) {
221 MallocType = cast(BCI->getDestTy());
222 NumOfBitCastUses++;
223 }
224
225 // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
226 if (NumOfBitCastUses == 1)
227 return MallocType;
224228
225229 // Malloc call was not bitcast, so type is the malloc function's return type.
226 if (!BCI)
230 if (NumOfBitCastUses == 0)
227231 return cast(CI->getType());
228232
229233 // Type could not be determined.
230234 return NULL;
231235 }
232236
233 /// getMallocAllocatedType - Returns the Type allocated by malloc call. This
234 /// Type is the result type of the call's only bitcast use. If there is no
235 /// unique bitcast use, then return NULL.
237 /// getMallocAllocatedType - Returns the Type allocated by malloc call.
238 /// The Type depends on the number of bitcast uses of the malloc call:
239 /// 0: PointerType is the malloc calls' return type.
240 /// 1: PointerType is the bitcast's result type.
241 /// >1: Unique PointerType cannot be determined, return NULL.
236242 const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
237243 const PointerType *PT = getMallocType(CI);
238244 return PT ? PT->getElementType() : NULL;
36183618 // Autoupgrade old malloc instruction to malloc call.
36193619 // FIXME: Remove in LLVM 3.0.
36203620 const Type *IntPtrTy = Type::getInt32Ty(Context);
3621 Constant *AllocSize = ConstantExpr::getSizeOf(Ty);
3622 AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy);
36213623 if (!MallocF)
36223624 // Prototype malloc as "void *(int32)".
36233625 // This function is renamed as "malloc" in ValidateEndOfModule().
36243626 MallocF = cast(
36253627 M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL));
3626 Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, Size, MallocF);
3628 Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, AllocSize, Size, MallocF);
36273629 return false;
36283630 }
36293631
21002100 if (!Ty || !Size) return Error("Invalid MALLOC record");
21012101 if (!CurBB) return Error("Invalid malloc instruction with no BB");
21022102 const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext());
2103 Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType());
2104 AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty);
21032105 I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(),
2104 Size, NULL);
2106 AllocSize, Size, NULL);
21052107 InstructionList.push_back(I);
21062108 break;
21072109 }
821821 /// malloc into a global, and any loads of GV as uses of the new global.
822822 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
823823 CallInst *CI,
824 BitCastInst *BCI,
824 const Type *AllocTy,
825825 Value* NElems,
826826 LLVMContext &Context,
827827 TargetData* TD) {
828 DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
829 << " CALL = " << *CI << " BCI = " << *BCI << '\n');
828 DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
830829
831830 const Type *IntPtrTy = TD->getIntPtrType(Context);
832831
832 // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
833 // returned NULL and we would not be here).
834 BitCastInst *BCI = NULL;
835 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; )
836 if ((BCI = dyn_cast(cast(*UI++))))
837 break;
838
833839 ConstantInt *NElements = cast(NElems);
834840 if (NElements->getZExtValue() != 1) {
835841 // If we have an array allocation, transform it to a single element
836842 // allocation to make the code below simpler.
837 Type *NewTy = ArrayType::get(getMallocAllocatedType(CI),
838 NElements->getZExtValue());
839 Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy);
840 Instruction* NewMI = cast(NewM);
843 Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue());
844 unsigned TypeSize = TD->getTypeAllocSize(NewTy);
845 if (const StructType *ST = dyn_cast(NewTy))
846 TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
847 Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy,
848 ConstantInt::get(IntPtrTy, TypeSize));
841849 Value* Indices[2];
842850 Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
843 Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
844 NewMI->getName()+".el0", CI);
845 BCI->replaceAllUsesWith(NewGEP);
846 BCI->eraseFromParent();
851 Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2,
852 NewCI->getName()+".el0", CI);
853 Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI);
854 if (BCI) BCI->replaceAllUsesWith(NewGEP);
855 CI->replaceAllUsesWith(Cast);
856 if (BCI) BCI->eraseFromParent();
847857 CI->eraseFromParent();
848 BCI = cast(NewMI);
849 CI = extractMallocCallFromBitCast(NewMI);
858 BCI = dyn_cast(NewCI);
859 CI = BCI ? extractMallocCallFromBitCast(BCI) : cast(NewCI);
850860 }
851861
852862 // Create the new global variable. The contents of the malloc'd memory is
860870 GV,
861871 GV->isThreadLocal());
862872
863 // Anything that used the malloc now uses the global directly.
864 BCI->replaceAllUsesWith(NewGV);
873 // Anything that used the malloc or its bitcast now uses the global directly.
874 if (BCI) BCI->replaceAllUsesWith(NewGV);
875 CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));
865876
866877 Constant *RepValue = NewGV;
867878 if (NewGV->getType() != GV->getType()->getElementType())
929940 GV->getParent()->getGlobalList().insert(GV, InitBool);
930941
931942
932 // Now the GV is dead, nuke it and the malloc.
943 // Now the GV is dead, nuke it and the malloc (both CI and BCI).
933944 GV->eraseFromParent();
934 BCI->eraseFromParent();
945 if (BCI) BCI->eraseFromParent();
935946 CI->eraseFromParent();
936947
937948 // To further other optimizations, loop over all users of NewGV and try to
12721283
12731284 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
12741285 /// it up into multiple allocations of arrays of the fields.
1275 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
1276 CallInst *CI, BitCastInst* BCI,
1277 Value* NElems,
1278 LLVMContext &Context,
1286 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1287 Value* NElems, LLVMContext &Context,
12791288 TargetData *TD) {
1280 DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI
1281 << " BITCAST = " << *BCI << '\n');
1289 DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
12821290 const Type* MAT = getMallocAllocatedType(CI);
12831291 const StructType *STy = cast(MAT);
12841292
12861294 // it into GV). If there are other uses, change them to be uses of
12871295 // the global to simplify later code. This also deletes the store
12881296 // into GV.
1289 ReplaceUsesOfMallocWithGlobal(BCI, GV);
1290
1297 ReplaceUsesOfMallocWithGlobal(CI, GV);
1298
12911299 // Okay, at this point, there are no users of the malloc. Insert N
12921300 // new mallocs at the same place as CI, and N globals.
12931301 std::vector FieldGlobals;
13051313 GV->isThreadLocal());
13061314 FieldGlobals.push_back(NGV);
13071315
1308 Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
1309 FieldTy, NElems,
1310 BCI->getName() + ".f" + Twine(FieldNo));
1316 unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
1317 if (const StructType* ST = dyn_cast(FieldTy))
1318 TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
1319 const Type* IntPtrTy = TD->getIntPtrType(Context);
1320 Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1321 ConstantInt::get(IntPtrTy, TypeSize),
1322 NElems,
1323 CI->getName() + ".f" + Twine(FieldNo));
13111324 FieldMallocs.push_back(NMI);
1312 new StoreInst(NMI, NGV, BCI);
1325 new StoreInst(NMI, NGV, CI);
13131326 }
13141327
13151328 // The tricky aspect of this transformation is handling the case when malloc
13261339 // }
13271340 Value *RunningOr = 0;
13281341 for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1329 Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1330 Constant::getNullValue(FieldMallocs[i]->getType()),
1331 "isnull");
1342 Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1343 Constant::getNullValue(FieldMallocs[i]->getType()),
1344 "isnull");
13321345 if (!RunningOr)
13331346 RunningOr = Cond; // First seteq
13341347 else
1335 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI);
1348 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
13361349 }
13371350
13381351 // Split the basic block at the old malloc.
1339 BasicBlock *OrigBB = BCI->getParent();
1340 BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont");
1352 BasicBlock *OrigBB = CI->getParent();
1353 BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
13411354
13421355 // Create the block to check the first condition. Put all these blocks at the
13431356 // end of the function as they are unlikely to be executed.
13731386 }
13741387
13751388 BranchInst::Create(ContBB, NullPtrBlock);
1376
1377 // CI and BCI are no longer needed, remove them.
1378 BCI->eraseFromParent();
1389
1390 // CI is no longer needed, remove it.
13791391 CI->eraseFromParent();
13801392
13811393 /// InsertedScalarizedLoads - As we process loads, if we can't immediately
14621474 /// cast of malloc.
14631475 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
14641476 CallInst *CI,
1465 BitCastInst *BCI,
1477 const Type *AllocTy,
14661478 Module::global_iterator &GVI,
14671479 TargetData *TD,
14681480 LLVMContext &Context) {
1469 // If we can't figure out the type being malloced, then we can't optimize.
1470 const Type *AllocTy = getMallocAllocatedType(CI);
1471 assert(AllocTy);
1472
14731481 // If this is a malloc of an abstract type, don't touch it.
14741482 if (!AllocTy->isSized())
14751483 return false;
14901498 // for.
14911499 {
14921500 SmallPtrSet PHIs;
1493 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
1501 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
14941502 return false;
14951503 }
14961504
14981506 // transform the program to use global memory instead of malloc'd memory.
14991507 // This eliminates dynamic allocation, avoids an indirection accessing the
15001508 // data, and exposes the resultant global to further GlobalOpt.
1501 Value *NElems = getMallocArraySize(CI, Context, TD);
15021509 // We cannot optimize the malloc if we cannot determine malloc array size.
1503 if (NElems) {
1510 if (Value *NElems = getMallocArraySize(CI, Context, TD)) {
15041511 if (ConstantInt *NElements = dyn_cast(NElems))
15051512 // Restrict this transformation to only working on small allocations
15061513 // (2048 bytes currently), as we don't want to introduce a 16M global or
15071514 // something.
15081515 if (TD &&
15091516 NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
1510 GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, Context, TD);
1517 GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems,
1518 Context, TD);
15111519 return true;
15121520 }
15131521
15251533 // This the structure has an unreasonable number of fields, leave it
15261534 // alone.
15271535 if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1528 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
1536 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
15291537
15301538 // If this is a fixed size array, transform the Malloc to be an alloc of
15311539 // structs. malloc [100 x struct],1 -> malloc struct, 100
15321540 if (const ArrayType *AT =
15331541 dyn_cast(getMallocAllocatedType(CI))) {
1534 Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
1535 AT->getNumElements());
1536 Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
1537 AllocSTy, NumElements,
1538 BCI->getName());
1539 Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
1540 BCI->replaceAllUsesWith(Cast);
1541 BCI->eraseFromParent();
1542 const Type *IntPtrTy = TD->getIntPtrType(Context);
1543 unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
1544 Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1545 Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1546 Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
1547 AllocSize, NumElements,
1548 CI->getName());
1549 Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1550 CI->replaceAllUsesWith(Cast);
15421551 CI->eraseFromParent();
1543 BCI = cast(NewMI);
1544 CI = extractMallocCallFromBitCast(NewMI);
1552 CI = dyn_cast(Malloc) ?
1553 extractMallocCallFromBitCast(Malloc):
1554 cast(Malloc);
15451555 }
15461556
1547 GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, Context, TD);
1557 GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD),
1558 Context, TD);
15481559 return true;
15491560 }
15501561 }
15761587 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
15771588 return true;
15781589 } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
1579 if (getMallocAllocatedType(CI)) {
1580 BitCastInst* BCI = NULL;
1581 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
1582 UI != E; )
1583 BCI = dyn_cast(cast(*UI++));
1584 if (BCI &&
1585 TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context))
1586 return true;
1587 }
1590 const Type* MallocType = getMallocAllocatedType(CI);
1591 if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
1592 GVI, TD, Context))
1593 return true;
15881594 }
15891595 }
15901596
16981698
16991699 LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
17001700 const char *Name) {
1701 const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
1702 return wrap(unwrap(B)->Insert(CallInst::CreateMalloc(
1703 unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), 0, 0, ""),
1704 Twine(Name)));
1701 const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
1702 Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
1703 AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
1704 Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
1705 ITy, unwrap(Ty), AllocSize,
1706 0, 0, "");
1707 return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
17051708 }
17061709
17071710 LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
17081711 LLVMValueRef Val, const char *Name) {
1709 const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
1710 return wrap(unwrap(B)->Insert(CallInst::CreateMalloc(
1711 unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), unwrap(Val), 0, ""),
1712 Twine(Name)));
1712 const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
1713 Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
1714 AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
1715 Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
1716 ITy, unwrap(Ty), AllocSize,
1717 unwrap(Val), 0, "");
1718 return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
17131719 }
17141720
17151721 LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
2323 #include "llvm/Support/CallSite.h"
2424 #include "llvm/Support/ConstantRange.h"
2525 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Target/TargetData.h"
2627
2728 using namespace llvm;
2829
447448 return isa(val) && cast(val)->isOne();
448449 }
449450
450 static Value *checkArraySize(Value *Amt, const Type *IntPtrTy) {
451 if (!Amt)
452 Amt = ConstantInt::get(IntPtrTy, 1);
453 else {
454 assert(!isa(Amt) &&
455 "Passed basic block into malloc size parameter! Use other ctor");
456 assert(Amt->getType() == IntPtrTy &&
457 "Malloc array size is not an intptr!");
458 }
459 return Amt;
460 }
461
462451 static Instruction *createMalloc(Instruction *InsertBefore,
463452 BasicBlock *InsertAtEnd, const Type *IntPtrTy,
464 const Type *AllocTy, Value *ArraySize,
465 Function *MallocF, const Twine &NameStr) {
453 const Type *AllocTy, Value *AllocSize,
454 Value *ArraySize, Function *MallocF,
455 const Twine &Name) {
466456 assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
467457 "createMalloc needs either InsertBefore or InsertAtEnd");
468458
470460 // bitcast (i8* malloc(typeSize)) to type*
471461 // malloc(type, arraySize) becomes:
472462 // bitcast (i8 *malloc(typeSize*arraySize)) to type*
473 Value *AllocSize = ConstantExpr::getSizeOf(AllocTy);
474 AllocSize = ConstantExpr::getTruncOrBitCast(cast(AllocSize),
475 IntPtrTy);
476 ArraySize = checkArraySize(ArraySize, IntPtrTy);
463 if (!ArraySize)
464 ArraySize = ConstantInt::get(IntPtrTy, 1);
465 else if (ArraySize->getType() != IntPtrTy) {
466 if (InsertBefore)
467 ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertBefore);
468 else
469 ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertAtEnd);
470 }
477471
478472 if (!IsConstantOne(ArraySize)) {
479473 if (IsConstantOne(AllocSize)) {
512506 Result = MCall;
513507 if (Result->getType() != AllocPtrType)
514508 // Create a cast instruction to convert to the right type...
515 Result = new BitCastInst(MCall, AllocPtrType, NameStr, InsertBefore);
509 Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
516510 } else {
517511 MCall = CallInst::Create(MallocF, AllocSize, "malloccall");
518512 Result = MCall;
519513 if (Result->getType() != AllocPtrType) {
520514 InsertAtEnd->getInstList().push_back(MCall);
521515 // Create a cast instruction to convert to the right type...
522 Result = new BitCastInst(MCall, AllocPtrType, NameStr);
516 Result = new BitCastInst(MCall, AllocPtrType, Name);
523517 }
524518 }
525519 MCall->setTailCall();
537531 /// 3. Bitcast the result of the malloc call to the specified type.
538532 Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
539533 const Type *IntPtrTy, const Type *AllocTy,
540 Value *ArraySize, const Twine &Name) {
541 return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy,
534 Value *AllocSize, Value *ArraySize,
535 const Twine &Name) {
536 return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
542537 ArraySize, NULL, Name);
543538 }
544539
552547 /// responsibility of the caller.
553548 Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
554549 const Type *IntPtrTy, const Type *AllocTy,
555 Value *ArraySize, Function* MallocF,
556 const Twine &Name) {
557 return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy,
550 Value *AllocSize, Value *ArraySize,
551 Function *MallocF, const Twine &Name) {
552 return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
558553 ArraySize, MallocF, Name);
559554 }
560555
3030 }
3131
3232 declare i32 @bar(i8*)
33 declare i32 @bar2(i64*)
3334
3435 define i32 @foo1(i32 %n) nounwind {
3536 entry:
5960 ret i32 %add16
6061 }
6162
62 define i32 @foo2(i32 %n) nounwind {
63 define i32 @foo2(i64 %n) nounwind {
6364 entry:
64 %call = malloc i8, i32 %n ; [#uses=1]
65 %call = tail call i8* @malloc(i64 %n) ; [#uses=1]
6566 ; CHECK: %call =
6667 ; CHECK: ==> %n elements, %n bytes allocated
68 %mallocsize = mul i64 %n, 8 ; [#uses=1]
69 %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1]
70 %call3 = bitcast i8* %malloccall to i64* ; [#uses=1]
71 ; CHECK: %malloccall =
72 ; CHECK: ==> (8 * %n) elements, (8 * %n) bytes allocated
6773 %call2 = tail call i8* @calloc(i64 2, i64 4) nounwind ; [#uses=1]
6874 ; CHECK: %call2 =
6975 ; CHECK: ==> 8 elements, 8 bytes allocated
7177 ; CHECK: %call4 =
7278 ; CHECK: ==> 16 elements, 16 bytes allocated
7379 %call6 = tail call i32 @bar(i8* %call) nounwind ; [#uses=1]
80 %call7 = tail call i32 @bar2(i64* %call3) nounwind ; [#uses=1]
7481 %call8 = tail call i32 @bar(i8* %call2) nounwind ; [#uses=1]
7582 %call10 = tail call i32 @bar(i8* %call4) nounwind ; [#uses=1]
76 %add = add i32 %call8, %call6 ; [#uses=1]
77 %add11 = add i32 %add, %call10 ; [#uses=1]
83 %add = add i32 %call8, %call6 ; [#uses=1]
84 %add10 = add i32 %add, %call7 ; [#uses=1]
85 %add11 = add i32 %add10, %call10 ; [#uses=1]
7886 ret i32 %add11
7987 }
88
89 declare noalias i8* @malloc(i64) nounwind
8090
8191 declare noalias i8* @calloc(i64, i64) nounwind
8292
0 ; RUN: opt < %s -globalopt
1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
12
23 %struct.s_annealing_sched = type { i32, float, float, float, float }
34 %struct.s_bb = type { i32, i32, i32, i32 }
9596 unreachable
9697
9798 bb1.i38: ; preds = %bb
98 %0 = malloc %struct.s_net, i32 undef ; <%struct.s_net*> [#uses=1]
99 %mallocsize = mul i64 28, undef ; > [#uses=1]
100 %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1]
101 %0 = bitcast i8* %malloccall to %struct.s_net* ; <%struct.s_net*> [#uses=1]
99102 br i1 undef, label %bb.i1.i39, label %my_malloc.exit2.i
100103
101104 bb.i1.i39: ; preds = %bb1.i38
114117 bb7: ; preds = %bb6.preheader
115118 unreachable
116119 }
120
121 declare noalias i8* @malloc(i64)
None ; RUN: opt < %s -globalopt -S | grep {@X.f0}
1 ; RUN: opt < %s -globalopt -S | grep {@X.f1}
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
3 target triple = "i386-apple-darwin7"
0 ; RUN: opt < %s -globalopt -S | FileCheck %s
1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
42
53 %struct.foo = type { i32, i32 }
64 @X = internal global %struct.foo* null
5 ; CHECK: @X.f0
6 ; CHECK: @X.f1
77
8 define void @bar(i32 %Size) nounwind noinline {
8 define void @bar(i64 %Size) nounwind noinline {
99 entry:
10 %.sub = malloc %struct.foo, i32 %Size
10 %mallocsize = mul i64 %Size, 8 ; [#uses=1]
11 %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1]
12 %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
1113 store %struct.foo* %.sub, %struct.foo** @X, align 4
1214 ret void
1315 }
16
17 declare noalias i8* @malloc(i64)
1418
1519 define i32 @baz() nounwind readonly noinline {
1620 bb1.thread:
None ; RUN: opt < %s -globalopt -S | grep {@X.f0}
1 ; RUN: opt < %s -globalopt -S | grep {@X.f1}
0 ; RUN: opt < %s -globalopt -S | FileCheck %s
1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
22
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
4 target triple = "i386-apple-darwin7"
53 %struct.foo = type { i32, i32 }
64 @X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2]
5 ; CHECK: @X.f0
6 ; CHECK: @X.f1
77
88 define void @bar(i32 %Size) nounwind noinline {
99 entry:
10 %0 = malloc [1000000 x %struct.foo]
11 ;%.sub = bitcast [1000000 x %struct.foo]* %0 to %struct.foo*
10 %malloccall = tail call i8* @malloc(i64 8000000) ; [#uses=1]
11 %0 = bitcast i8* %malloccall to [1000000 x %struct.foo]* ; <[1000000 x %struct.foo]*> [#uses=1]
1212 %.sub = getelementptr [1000000 x %struct.foo]* %0, i32 0, i32 0 ; <%struct.foo*> [#uses=1]
1313 store %struct.foo* %.sub, %struct.foo** @X, align 4
1414 ret void
1515 }
16
17 declare noalias i8* @malloc(i64)
1618
1719 define i32 @baz() nounwind readonly noinline {
1820 bb1.thread:
0 ; RUN: opt < %s -globalopt -S | FileCheck %s
1
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
3 target triple = "i386-apple-darwin10"
1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
42
53 %struct.foo = type { i32, i32 }
64 @X = internal global %struct.foo* null
75 ; CHECK: @X.f0
86 ; CHECK: @X.f1
97
10 define void @bar(i32 %Size) nounwind noinline {
8 define void @bar(i64 %Size) nounwind noinline {
119 entry:
12 %mallocsize = mul i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), %Size, ; [#uses=1]
13 ; CHECK: mul i32 %Size
14 %malloccall = tail call i8* @malloc(i32 %mallocsize) ; > [#uses=1]
10 %mallocsize = mul i64 8, %Size, ; > [#uses=1]
11 ; CHECK: mul i64 %Size, 4
12 %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1]
1513 %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
1614 store %struct.foo* %.sub, %struct.foo** @X, align 4
1715 ret void
1816 }
1917
20 declare noalias i8* @malloc(i32)
18 declare noalias i8* @malloc(i64)
2119
2220 define i32 @baz() nounwind readonly noinline {
2321 bb1.thread:
0 ; RUN: opt < %s -globalopt -S | FileCheck %s
1
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
3 target triple = "i386-apple-darwin7"
1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
42
53 %struct.foo = type { i32, i32 }
64 @X = internal global %struct.foo* null
75 ; CHECK: @X.f0
86 ; CHECK: @X.f1
97
10 define void @bar(i32 %Size) nounwind noinline {
8 define void @bar(i64 %Size) nounwind noinline {
119 entry:
12 %mallocsize = shl i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), 9, ; [#uses=1]
13 %malloccall = tail call i8* @malloc(i32 %mallocsize) ; [#uses=1]
14 ; CHECK: @malloc(i32 mul (i32 512
10 %mallocsize = shl i64 %Size, 3 ; [#uses=1]
11 %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1]
12 ; CHECK: mul i64 %Size, 4
1513 %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
1614 store %struct.foo* %.sub, %struct.foo** @X, align 4
1715 ret void
1816 }
1917
20 declare noalias i8* @malloc(i32)
18 declare noalias i8* @malloc(i64)
2119
2220 define i32 @baz() nounwind readonly noinline {
2321 bb1.thread:
0 ; RUN: opt < %s -globalopt -S | grep {tmp.f1 = phi i32. }
11 ; RUN: opt < %s -globalopt -S | grep {tmp.f0 = phi i32. }
2 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
23
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
4 target triple = "i386-apple-darwin7"
54 %struct.foo = type { i32, i32 }
65 @X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2]
76
87 define void @bar(i32 %Size) nounwind noinline {
98 entry:
10 %tmp = malloc [1000000 x %struct.foo] ; <[1000000 x %struct.foo]*> [#uses=1]
9 %malloccall = tail call i8* @malloc(i64 8000000) ; *> [#uses=1]
10 %tmp = bitcast i8* %malloccall to [1000000 x %struct.foo]* ; <[1000000 x %struct.foo]*> [#uses=1]
1111 %.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0 ; <%struct.foo*> [#uses=1]
1212 store %struct.foo* %.sub, %struct.foo** @X, align 4
1313 ret void
1414 }
15
16 declare noalias i8* @malloc(i64)
1517
1618 define i32 @baz() nounwind readonly noinline {
1719 bb1.thread:
None ; RUN: opt < %s -globalopt -S | not grep global
0 ; RUN: opt < %s -globalopt -S | FileCheck %s
11 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
22
33 @G = internal global i32* null ; [#uses=3]
4 ; CHECK-NOT: global
45
56 define void @init() {
6 %P = malloc i32 ; *> [#uses=1]
7 %malloccall = tail call i8* @malloc(i64 4) ; *> [#uses=1]
8 %P = bitcast i8* %malloccall to i32* ; [#uses=1]
79 store i32* %P, i32** @G
810 %GV = load i32** @G ; [#uses=1]
911 store i32 0, i32* %GV
1012 ret void
1113 }
1214
15 declare noalias i8* @malloc(i64)
16
1317 define i32 @get() {
1418 %GV = load i32** @G ; [#uses=1]
1519 %V = load i32* %GV ; [#uses=1]
1620 ret i32 %V
21 ; CHECK: ret i32 0
1722 }
1823
0 ; RUN: opt < %s -globalopt -globaldce -S | not grep malloc
1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
2 target triple = "i686-apple-darwin8"
1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
32
43 @G = internal global i32* null ; [#uses=3]
54
65 define void @init() {
7 %P = malloc i32, i32 100 ; *> [#uses=1]
6 %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) ; *> [#uses=1]
7 %P = bitcast i8* %malloccall to i32* ; [#uses=1]
88 store i32* %P, i32** @G
99 %GV = load i32** @G ; [#uses=1]
1010 %GVe = getelementptr i32* %GV, i32 40 ; [#uses=1]
1111 store i32 20, i32* %GVe
1212 ret void
1313 }
14
15 declare noalias i8* @malloc(i64)
1416
1517 define i32 @get() {
1618 %GV = load i32** @G ; [#uses=1]
0 ; RUN: opt < %s -globalopt -globaldce -S | not grep malloc
1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
2 target triple = "i686-apple-darwin8"
1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
32
43 @G = internal global i32* null ; [#uses=4]
54
65 define void @init() {
7 %P = malloc i32, i32 100 ; *> [#uses=1]
6 %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) ; *> [#uses=1]
7 %P = bitcast i8* %malloccall to i32* ; [#uses=1]
88 store i32* %P, i32** @G
99 %GV = load i32** @G ; [#uses=1]
1010 %GVe = getelementptr i32* %GV, i32 40 ; [#uses=1]
1111 store i32 20, i32* %GVe
1212 ret void
1313 }
14
15 declare noalias i8* @malloc(i64)
1416
1517 define i32 @get() {
1618 %GV = load i32** @G ; [#uses=1]