llvm.org GIT mirror llvm / 7f04b36
SROA CBE Fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_25@63790 91177308-0d34-0410-b5e6-96231b3b80d8 Tanya Lattner 11 years ago
6 changed file(s) with 401 addition(s) and 343 deletion(s). Raw diff Collapse all Expand all
124124 void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
125125 SmallVector &NewElts);
126126
127 bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
128 uint64_t Offset, unsigned AllocaSize);
129 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
127 const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial);
128 void ConvertToScalar(AllocationInst *AI, const Type *Ty);
129 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset);
130130 Value *ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI,
131 uint64_t Offset);
132 Value *ConvertUsesOfStoreToScalar(Value *StoredVal, AllocaInst *NewAI,
133 uint64_t Offset, Instruction *InsertPt);
131 unsigned Offset);
132 Value *ConvertUsesOfStoreToScalar(StoreInst *SI, AllocaInst *NewAI,
133 unsigned Offset);
134134 static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI);
135135 };
136136 }
222222 AI->eraseFromParent();
223223 continue;
224224 }
225
226 // If this alloca is impossible for us to promote, reject it early.
227 if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
228 continue;
229
230 // Check to see if this allocation is only modified by a memcpy/memmove from
231 // a constant global. If this is the case, we can change all users to use
232 // the constant global instead. This is commonly produced by the CFE by
233 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
234 // is only subsequently read.
235 if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
236 DOUT << "Found alloca equal to global: " << *AI;
237 DOUT << " memcpy = " << *TheCopy;
238 Constant *TheSrc = cast(TheCopy->getOperand(2));
239 AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
240 TheCopy->eraseFromParent(); // Don't mutate the global.
241 AI->eraseFromParent();
242 ++NumGlobals;
243 Changed = true;
244 continue;
245 }
246225
247226 // Check to see if we can perform the core SROA transformation. We cannot
248227 // transform the allocation instruction if it is an array allocation
249228 // (allocations OF arrays are ok though), and an allocation of a scalar
250229 // value cannot be decomposed at all.
251 uint64_t AllocaSize = TD->getTypePaddedSize(AI->getAllocatedType());
252
253 if ((isa(AI->getAllocatedType()) ||
230 if (!AI->isArrayAllocation() &&
231 (isa(AI->getAllocatedType()) ||
254232 isa(AI->getAllocatedType())) &&
255 // Do not promote any struct whose size is too big.
256 AllocaSize < SRThreshold &&
233 AI->getAllocatedType()->isSized() &&
234 // Do not promote any struct whose size is larger than "128" bytes.
235 TD->getTypePaddedSize(AI->getAllocatedType()) < SRThreshold &&
257236 // Do not promote any struct into more than "32" separate vars.
258237 getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) {
259238 // Check that all of the users of the allocation are capable of being
271250 continue;
272251 }
273252 }
253
254 // Check to see if this allocation is only modified by a memcpy/memmove from
255 // a constant global. If this is the case, we can change all users to use
256 // the constant global instead. This is commonly produced by the CFE by
257 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
258 // is only subsequently read.
259 if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
260 DOUT << "Found alloca equal to global: " << *AI;
261 DOUT << " memcpy = " << *TheCopy;
262 Constant *TheSrc = cast(TheCopy->getOperand(2));
263 AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
264 TheCopy->eraseFromParent(); // Don't mutate the global.
265 AI->eraseFromParent();
266 ++NumGlobals;
267 Changed = true;
268 continue;
269 }
274270
275271 // If we can turn this aggregate value (potentially with casts) into a
276272 // simple scalar value that can be mem2reg'd into a register value.
277 // IsNotTrivial tracks whether this is something that mem2reg could have
278 // promoted itself. If so, we don't want to transform it needlessly. Note
279 // that we can't just check based on the type: the alloca may be of an i32
280 // but that has pointer arithmetic to set byte 3 of it or something.
281273 bool IsNotTrivial = false;
282 const Type *VectorTy = 0;
283 if (CanConvertToScalar(AI, IsNotTrivial, VectorTy,
284 0, unsigned(AllocaSize)) && IsNotTrivial) {
285 AllocaInst *NewAI;
286 if (VectorTy && isa(VectorTy)) {
287 DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n";
288
289 // Create and insert the vector alloca.
290 NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin());
291 ConvertUsesToScalar(AI, NewAI, 0);
292 } else {
293 DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n";
294
295 // Create and insert the integer alloca.
296 const Type *NewTy = IntegerType::get(AllocaSize*8);
297 NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
298 ConvertUsesToScalar(AI, NewAI, 0);
299 }
300 NewAI->takeName(AI);
301 AI->eraseFromParent();
302 ++NumConverted;
303 Changed = true;
304 continue;
305 }
306
307 // Otherwise, couldn't process this alloca.
274 if (const Type *ActualType = CanConvertToScalar(AI, IsNotTrivial))
275 if (IsNotTrivial && ActualType != Type::VoidTy) {
276 ConvertToScalar(AI, ActualType);
277 Changed = true;
278 continue;
279 }
280
281 // Otherwise, couldn't process this.
308282 }
309283
310284 return Changed;
11701144 }
11711145 }
11721146
1173 /// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
1174 /// the offset specified by Offset (which is specified in bytes).
1147 /// MergeInType - Add the 'In' type to the accumulated type so far. If the
1148 /// types are incompatible, return true, otherwise update Accum and return
1149 /// false.
11751150 ///
1176 /// There are two cases we handle here:
1177 /// 1) A union of vector types of the same size and potentially its elements.
1151 /// There are three cases we handle here:
1152 /// 1) An effectively-integer union, where the pieces are stored into as
1153 /// smaller integers (common with byte swap and other idioms).
1154 /// 2) A union of vector types of the same size and potentially its elements.
11781155 /// Here we turn element accesses into insert/extract element operations.
1179 /// This promotes a <4 x float> with a store of float to the third element
1180 /// into a <4 x float> that uses insert element.
1181 /// 2) A fully general blob of memory, which we turn into some (potentially
1182 /// large) integer type with extract and insert operations where the loads
1183 /// and stores would mutate the memory.
1184 static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
1185 unsigned AllocaSize, const TargetData &TD) {
1186 // If this could be contributing to a vector, analyze it.
1187 if (VecTy != Type::VoidTy) { // either null or a vector type.
1188
1189 // If the In type is a vector that is the same size as the alloca, see if it
1190 // matches the existing VecTy.
1191 if (const VectorType *VInTy = dyn_cast(In)) {
1192 if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
1193 // If we're storing/loading a vector of the right size, allow it as a
1194 // vector. If this the first vector we see, remember the type so that
1195 // we know the element size.
1196 if (VecTy == 0)
1197 VecTy = VInTy;
1198 return;
1199 }
1200 } else if (In == Type::FloatTy || In == Type::DoubleTy ||
1201 (isa(In) && In->getPrimitiveSizeInBits() >= 8 &&
1202 isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
1203 // If we're accessing something that could be an element of a vector, see
1204 // if the implied vector agrees with what we already have and if Offset is
1205 // compatible with it.
1206 unsigned EltSize = In->getPrimitiveSizeInBits()/8;
1207 if (Offset % EltSize == 0 &&
1208 AllocaSize % EltSize == 0 &&
1209 (VecTy == 0 ||
1210 cast(VecTy)->getElementType()
1211 ->getPrimitiveSizeInBits()/8 == EltSize)) {
1212 if (VecTy == 0)
1213 VecTy = VectorType::get(In, AllocaSize/EltSize);
1214 return;
1215 }
1216 }
1217 }
1218
1219 // Otherwise, we have a case that we can't handle with an optimized vector
1220 // form. We can still turn this into a large integer.
1221 VecTy = Type::VoidTy;
1222 }
1223
1224 /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
1225 /// its accesses to use a to single vector type, return true, and set VecTy to
1226 /// the new type. If we could convert the alloca into a single promotable
1227 /// integer, return true but set VecTy to VoidTy. Further, if the use is not a
1228 /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
1229 /// is the current offset from the base of the alloca being analyzed.
1156 /// 3) A union of scalar types, such as int/float or int/pointer. Here we
1157 /// merge together into integers, allowing the xform to work with #1 as
1158 /// well.
1159 static bool MergeInType(const Type *In, const Type *&Accum,
1160 const TargetData &TD) {
1161 // If this is our first type, just use it.
1162 const VectorType *PTy;
1163 if (Accum == Type::VoidTy || In == Accum) {
1164 Accum = In;
1165 } else if (In == Type::VoidTy) {
1166 // Noop.
1167 } else if (In->isInteger() && Accum->isInteger()) { // integer union.
1168 // Otherwise pick whichever type is larger.
1169 if (cast(In)->getBitWidth() >
1170 cast(Accum)->getBitWidth())
1171 Accum = In;
1172 } else if (isa(In) && isa(Accum)) {
1173 // Pointer unions just stay as one of the pointers.
1174 } else if (isa(In) || isa(Accum)) {
1175 if ((PTy = dyn_cast(Accum)) &&
1176 PTy->getElementType() == In) {
1177 // Accum is a vector, and we are accessing an element: ok.
1178 } else if ((PTy = dyn_cast(In)) &&
1179 PTy->getElementType() == Accum) {
1180 // In is a vector, and accum is an element: ok, remember In.
1181 Accum = In;
1182 } else if ((PTy = dyn_cast(In)) && isa(Accum) &&
1183 PTy->getBitWidth() == cast(Accum)->getBitWidth()) {
1184 // Two vectors of the same size: keep Accum.
1185 } else {
1186 // Cannot insert an short into a <4 x int> or handle
1187 // <2 x int> -> <4 x int>
1188 return true;
1189 }
1190 } else {
1191 // Pointer/FP/Integer unions merge together as integers.
1192 switch (Accum->getTypeID()) {
1193 case Type::PointerTyID: Accum = TD.getIntPtrType(); break;
1194 case Type::FloatTyID: Accum = Type::Int32Ty; break;
1195 case Type::DoubleTyID: Accum = Type::Int64Ty; break;
1196 case Type::X86_FP80TyID: return true;
1197 case Type::FP128TyID: return true;
1198 case Type::PPC_FP128TyID: return true;
1199 default:
1200 assert(Accum->isInteger() && "Unknown FP type!");
1201 break;
1202 }
1203
1204 switch (In->getTypeID()) {
1205 case Type::PointerTyID: In = TD.getIntPtrType(); break;
1206 case Type::FloatTyID: In = Type::Int32Ty; break;
1207 case Type::DoubleTyID: In = Type::Int64Ty; break;
1208 case Type::X86_FP80TyID: return true;
1209 case Type::FP128TyID: return true;
1210 case Type::PPC_FP128TyID: return true;
1211 default:
1212 assert(In->isInteger() && "Unknown FP type!");
1213 break;
1214 }
1215 return MergeInType(In, Accum, TD);
1216 }
1217 return false;
1218 }
1219
1220 /// getIntAtLeastAsBigAs - Return an integer type that is at least as big as the
1221 /// specified type. If there is no suitable type, this returns null.
1222 const Type *getIntAtLeastAsBigAs(unsigned NumBits) {
1223 if (NumBits > 64) return 0;
1224 if (NumBits > 32) return Type::Int64Ty;
1225 if (NumBits > 16) return Type::Int32Ty;
1226 if (NumBits > 8) return Type::Int16Ty;
1227 return Type::Int8Ty;
1228 }
1229
1230 /// CanConvertToScalar - V is a pointer. If we can convert the pointee to a
1231 /// single scalar integer type, return that type. Further, if the use is not
1232 /// a completely trivial use that mem2reg could promote, set IsNotTrivial. If
1233 /// there are no uses of this pointer, return Type::VoidTy to differentiate from
1234 /// failure.
12301235 ///
1231 bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial,
1232 const Type *&VecTy, uint64_t Offset,
1233 unsigned AllocaSize) {
1236 const Type *SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial) {
1237 const Type *UsedType = Type::VoidTy; // No uses, no forced type.
1238 const PointerType *PTy = cast(V->getType());
1239
12341240 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
12351241 Instruction *User = cast(*UI);
12361242
12371243 if (LoadInst *LI = dyn_cast(User)) {
1238 // Don't break volatile loads.
12391244 if (LI->isVolatile())
1240 return false;
1241 MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD);
1245 return 0;
1246
1247 // FIXME: Loads of a first class aggregrate value could be converted to a
1248 // series of loads and insertvalues
1249 if (!LI->getType()->isSingleValueType())
1250 return 0;
1251
1252 if (MergeInType(LI->getType(), UsedType, *TD))
1253 return 0;
12421254 continue;
12431255 }
12441256
12451257 if (StoreInst *SI = dyn_cast(User)) {
12461258 // Storing the pointer, not into the value?
12471259 if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
1248 MergeInType(SI->getOperand(0)->getType(), Offset, VecTy, AllocaSize, *TD);
1249 continue;
1250 }
1251
1252 if (BitCastInst *BCI = dyn_cast(User)) {
1253 if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, Offset, AllocaSize))
1254 return false;
1260
1261 // FIXME: Stores of a first class aggregrate value could be converted to a
1262 // series of extractvalues and stores
1263 if (!SI->getOperand(0)->getType()->isSingleValueType())
1264 return 0;
1265
1266 // NOTE: We could handle storing of FP imms into integers here!
1267
1268 if (MergeInType(SI->getOperand(0)->getType(), UsedType, *TD))
1269 return 0;
1270 continue;
1271 }
1272 if (BitCastInst *CI = dyn_cast(User)) {
12551273 IsNotTrivial = true;
1274 const Type *SubTy = CanConvertToScalar(CI, IsNotTrivial);
1275 if (!SubTy || MergeInType(SubTy, UsedType, *TD)) return 0;
12561276 continue;
12571277 }
12581278
12591279 if (GetElementPtrInst *GEP = dyn_cast(User)) {
1260 // If this is a GEP with a variable indices, we can't handle it.
1261 if (!GEP->hasAllConstantIndices())
1262 return false;
1263
1264 // Compute the offset that this GEP adds to the pointer.
1265 SmallVector Indices(GEP->op_begin()+1, GEP->op_end());
1266 uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
1267 &Indices[0], Indices.size());
1268 // See if all uses can be converted.
1269 if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, Offset+GEPOffset,
1270 AllocaSize))
1271 return false;
1272 IsNotTrivial = true;
1273 continue;
1274 }
1275
1276 // If this is a constant sized memset of a constant value (e.g. 0) we can
1277 // handle it.
1278 if (isa(User) &&
1279 // Store of constant value.
1280 isa(User->getOperand(2)) &&
1281 // Store with constant size.
1282 isa(User->getOperand(3))) {
1283 VecTy = Type::VoidTy;
1284 IsNotTrivial = true;
1285 continue;
1286 }
1287
1288 // Otherwise, we cannot handle this!
1289 return false;
1290 }
1291
1292 return true;
1280 // Check to see if this is stepping over an element: GEP Ptr, int C
1281 if (GEP->getNumOperands() == 2 && isa(GEP->getOperand(1))) {
1282 unsigned Idx = cast(GEP->getOperand(1))->getZExtValue();
1283 unsigned ElSize = TD->getTypePaddedSize(PTy->getElementType());
1284 unsigned BitOffset = Idx*ElSize*8;
1285 if (BitOffset > 64 || !isPowerOf2_32(ElSize)) return 0;
1286
1287 IsNotTrivial = true;
1288 const Type *SubElt = CanConvertToScalar(GEP, IsNotTrivial);
1289 if (SubElt == 0) return 0;
1290 if (SubElt != Type::VoidTy && SubElt->isInteger()) {
1291 const Type *NewTy =
1292 getIntAtLeastAsBigAs(TD->getTypePaddedSizeInBits(SubElt)+BitOffset);
1293 if (NewTy == 0 || MergeInType(NewTy, UsedType, *TD)) return 0;
1294 continue;
1295 }
1296 // Cannot handle this!
1297 return 0;
1298 }
1299
1300 if (GEP->getNumOperands() == 3 &&
1301 isa(GEP->getOperand(1)) &&
1302 isa(GEP->getOperand(2)) &&
1303 cast(GEP->getOperand(1))->isZero()) {
1304 // We are stepping into an element, e.g. a structure or an array:
1305 // GEP Ptr, i32 0, i32 Cst
1306 const Type *AggTy = PTy->getElementType();
1307 unsigned Idx = cast(GEP->getOperand(2))->getZExtValue();
1308
1309 if (const ArrayType *ATy = dyn_cast(AggTy)) {
1310 if (Idx >= ATy->getNumElements()) return 0; // Out of range.
1311 } else if (const VectorType *VectorTy = dyn_cast(AggTy)) {
1312 // Getting an element of the vector.
1313 if (Idx >= VectorTy->getNumElements()) return 0; // Out of range.
1314
1315 // Merge in the vector type.
1316 if (MergeInType(VectorTy, UsedType, *TD)) return 0;
1317
1318 const Type *SubTy = CanConvertToScalar(GEP, IsNotTrivial);
1319 if (SubTy == 0) return 0;
1320
1321 if (SubTy != Type::VoidTy && MergeInType(SubTy, UsedType, *TD))
1322 return 0;
1323
1324 // We'll need to change this to an insert/extract element operation.
1325 IsNotTrivial = true;
1326 continue; // Everything looks ok
1327
1328 } else if (isa(AggTy)) {
1329 // Structs are always ok.
1330 } else {
1331 return 0;
1332 }
1333 const Type *NTy =
1334 getIntAtLeastAsBigAs(TD->getTypePaddedSizeInBits(AggTy));
1335 if (NTy == 0 || MergeInType(NTy, UsedType, *TD)) return 0;
1336 const Type *SubTy = CanConvertToScalar(GEP, IsNotTrivial);
1337 if (SubTy == 0) return 0;
1338 if (SubTy != Type::VoidTy && MergeInType(SubTy, UsedType, *TD))
1339 return 0;
1340 continue; // Everything looks ok
1341 }
1342 return 0;
1343 }
1344
1345 // Cannot handle this!
1346 return 0;
1347 }
1348
1349 return UsedType;
1350 }
1351
1352 /// ConvertToScalar - The specified alloca passes the CanConvertToScalar
1353 /// predicate and is non-trivial. Convert it to something that can be trivially
1354 /// promoted into a register by mem2reg.
1355 void SROA::ConvertToScalar(AllocationInst *AI, const Type *ActualTy) {
1356 DOUT << "CONVERT TO SCALAR: " << *AI << " TYPE = "
1357 << *ActualTy << "\n";
1358 ++NumConverted;
1359
1360 BasicBlock *EntryBlock = AI->getParent();
1361 assert(EntryBlock == &EntryBlock->getParent()->getEntryBlock() &&
1362 "Not in the entry block!");
1363 EntryBlock->getInstList().remove(AI); // Take the alloca out of the program.
1364
1365 // Create and insert the alloca.
1366 AllocaInst *NewAI = new AllocaInst(ActualTy, 0, AI->getName(),
1367 EntryBlock->begin());
1368 ConvertUsesToScalar(AI, NewAI, 0);
1369 delete AI;
12931370 }
12941371
12951372
13001377 ///
13011378 /// Offset is an offset from the original alloca, in bits that need to be
13021379 /// shifted to the right. By the end of this, there should be no uses of Ptr.
1303 void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
1380 void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
13041381 while (!Ptr->use_empty()) {
13051382 Instruction *User = cast(Ptr->use_back());
1306
1383
13071384 if (LoadInst *LI = dyn_cast(User)) {
1308 LI->replaceAllUsesWith(ConvertUsesOfLoadToScalar(LI, NewAI, Offset));
1385 Value *NV = ConvertUsesOfLoadToScalar(LI, NewAI, Offset);
1386 LI->replaceAllUsesWith(NV);
13091387 LI->eraseFromParent();
13101388 continue;
13111389 }
1312
1390
13131391 if (StoreInst *SI = dyn_cast(User)) {
13141392 assert(SI->getOperand(0) != Ptr && "Consistency error!");
1315 new StoreInst(ConvertUsesOfStoreToScalar(SI->getOperand(0), NewAI,
1316 Offset, SI), NewAI, SI);
1393
1394 Value *SV = ConvertUsesOfStoreToScalar(SI, NewAI, Offset);
1395 new StoreInst(SV, NewAI, SI);
13171396 SI->eraseFromParent();
13181397 continue;
13191398 }
1320
1399
13211400 if (BitCastInst *CI = dyn_cast(User)) {
13221401 ConvertUsesToScalar(CI, NewAI, Offset);
13231402 CI->eraseFromParent();
13241403 continue;
13251404 }
1326
1405
13271406 if (GetElementPtrInst *GEP = dyn_cast(User)) {
1328 // Compute the offset that this GEP adds to the pointer.
1329 SmallVector Indices(GEP->op_begin()+1, GEP->op_end());
1330 uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
1331 &Indices[0], Indices.size());
1332 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
1407 const PointerType *AggPtrTy =
1408 cast(GEP->getOperand(0)->getType());
1409 unsigned AggSizeInBits =
1410 TD->getTypePaddedSizeInBits(AggPtrTy->getElementType());
1411
1412 // Check to see if this is stepping over an element: GEP Ptr, int C
1413 unsigned NewOffset = Offset;
1414 if (GEP->getNumOperands() == 2) {
1415 unsigned Idx = cast(GEP->getOperand(1))->getZExtValue();
1416 unsigned BitOffset = Idx*AggSizeInBits;
1417
1418 NewOffset += BitOffset;
1419 ConvertUsesToScalar(GEP, NewAI, NewOffset);
1420 GEP->eraseFromParent();
1421 continue;
1422 }
1423
1424 assert(GEP->getNumOperands() == 3 && "Unsupported operation");
1425
1426 // We know that operand #2 is zero.
1427 unsigned Idx = cast(GEP->getOperand(2))->getZExtValue();
1428 const Type *AggTy = AggPtrTy->getElementType();
1429 if (const SequentialType *SeqTy = dyn_cast(AggTy)) {
1430 unsigned ElSizeBits =
1431 TD->getTypePaddedSizeInBits(SeqTy->getElementType());
1432
1433 NewOffset += ElSizeBits*Idx;
1434 } else {
1435 const StructType *STy = cast(AggTy);
1436 unsigned EltBitOffset =
1437 TD->getStructLayout(STy)->getElementOffsetInBits(Idx);
1438
1439 NewOffset += EltBitOffset;
1440 }
1441 ConvertUsesToScalar(GEP, NewAI, NewOffset);
13331442 GEP->eraseFromParent();
13341443 continue;
13351444 }
1336
1337 // If this is a constant sized memset of a constant value (e.g. 0) we can
1338 // transform it into a store of the expanded constant value.
1339 if (MemSetInst *MSI = dyn_cast(User)) {
1340 assert(MSI->getRawDest() == Ptr && "Consistency error!");
1341 unsigned NumBytes = cast(MSI->getLength())->getZExtValue();
1342 unsigned Val = cast(MSI->getValue())->getZExtValue();
1343
1344 // Compute the value replicated the right number of times.
1345 APInt APVal(NumBytes*8, Val);
1346
1347 // Splat the value if non-zero.
1348 if (Val)
1349 for (unsigned i = 1; i != NumBytes; ++i)
1350 APVal |= APVal << 8;
1351
1352 new StoreInst(ConvertUsesOfStoreToScalar(ConstantInt::get(APVal), NewAI,
1353 Offset, MSI), NewAI, MSI);
1354 MSI->eraseFromParent();
1355 continue;
1356 }
1357
13581445
13591446 assert(0 && "Unsupported operation!");
13601447 abort();
13611448 }
13621449 }
13631450
1364 /// ConvertUsesOfLoadToScalar - Convert all of the users of the specified load
1365 /// to use the new alloca directly, returning the value that should replace the
1366 /// load. This happens when we are converting an "integer union" to a single
1367 /// integer scalar, or when we are converting a "vector union" to a vector with
1368 /// insert/extractelement instructions.
1451 /// ConvertUsesOfLoadToScalar - Convert all of the users the specified load to
1452 /// use the new alloca directly, returning the value that should replace the
1453 /// load. This happens when we are converting an "integer union" to a
1454 /// single integer scalar, or when we are converting a "vector union" to a
1455 /// vector with insert/extractelement instructions.
13691456 ///
13701457 /// Offset is an offset from the original alloca, in bits that need to be
13711458 /// shifted to the right. By the end of this, there should be no uses of Ptr.
1372 Value *SROA::ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI,
1373 uint64_t Offset) {
1459 Value *SROA::ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI,
1460 unsigned Offset) {
13741461 // The load is a bit extract from NewAI shifted right by Offset bits.
13751462 Value *NV = new LoadInst(NewAI, LI->getName(), LI);
1376
1377 // If the load is of the whole new alloca, no conversion is needed.
1378 if (NV->getType() == LI->getType() && Offset == 0)
1463
1464 if (NV->getType() == LI->getType() && Offset == 0) {
1465 // We win, no conversion needed.
13791466 return NV;
1380
1381 // If the result alloca is a vector type, this is either an element
1382 // access or a bitcast to another vector type of the same size.
1467 }
1468
1469 // If the result type of the 'union' is a pointer, then this must be ptr->ptr
1470 // cast. Anything else would result in NV being an integer.
1471 if (isa(NV->getType())) {
1472 assert(isa(LI->getType()));
1473 return new BitCastInst(NV, LI->getType(), LI->getName(), LI);
1474 }
1475
13831476 if (const VectorType *VTy = dyn_cast(NV->getType())) {
1477 // If the result alloca is a vector type, this is either an element
1478 // access or a bitcast to another vector type.
13841479 if (isa(LI->getType()))
13851480 return new BitCastInst(NV, LI->getType(), LI->getName(), LI);
13861481
13891484 if (Offset) {
13901485 unsigned EltSize = TD->getTypePaddedSizeInBits(VTy->getElementType());
13911486 Elt = Offset/EltSize;
1392 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
1393 }
1394 // Return the element extracted out of it.
1395 Value *V = new ExtractElementInst(NV, ConstantInt::get(Type::Int32Ty, Elt),
1396 "tmp", LI);
1397 if (V->getType() != LI->getType())
1398 V = new BitCastInst(V, LI->getType(), "tmp", LI);
1399 return V;
1400 }
1401
1402 // Otherwise, this must be a union that was converted to an integer value.
1487 Offset -= EltSize*Elt;
1488 }
1489 NV = new ExtractElementInst(NV, ConstantInt::get(Type::Int32Ty, Elt),
1490 "tmp", LI);
1491
1492 // If we're done, return this element.
1493 if (NV->getType() == LI->getType() && Offset == 0)
1494 return NV;
1495 }
1496
14031497 const IntegerType *NTy = cast(NV->getType());
1404
1498
14051499 // If this is a big-endian system and the load is narrower than the
14061500 // full alloca type, we need to do a shift to get the right bits.
14071501 int ShAmt = 0;
14141508 } else {
14151509 ShAmt = Offset;
14161510 }
1417
1511
14181512 // Note: we support negative bitwidths (with shl) which are not defined.
14191513 // We do this to support (f.e.) loads off the end of a structure where
14201514 // only some bits are used.
14211515 if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
1422 NV = BinaryOperator::CreateLShr(NV,
1423 ConstantInt::get(NV->getType(), ShAmt),
1516 NV = BinaryOperator::CreateLShr(NV,
1517 ConstantInt::get(NV->getType(),ShAmt),
14241518 LI->getName(), LI);
14251519 else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
1426 NV = BinaryOperator::CreateShl(NV,
1427 ConstantInt::get(NV->getType(), -ShAmt),
1520 NV = BinaryOperator::CreateShl(NV,
1521 ConstantInt::get(NV->getType(),-ShAmt),
14281522 LI->getName(), LI);
1429
1523
14301524 // Finally, unconditionally truncate the integer to the right width.
14311525 unsigned LIBitWidth = TD->getTypeSizeInBits(LI->getType());
14321526 if (LIBitWidth < NTy->getBitWidth())
14331527 NV = new TruncInst(NV, IntegerType::get(LIBitWidth),
14341528 LI->getName(), LI);
1435
1529
14361530 // If the result is an integer, this is a trunc or bitcast.
14371531 if (isa(LI->getType())) {
14381532 // Should be done.
1439 } else if (LI->getType()->isFloatingPoint() ||
1440 isa(LI->getType())) {
1533 } else if (LI->getType()->isFloatingPoint()) {
14411534 // Just do a bitcast, we know the sizes match up.
14421535 NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI);
14431536 } else {
14571550 ///
14581551 /// Offset is an offset from the original alloca, in bits that need to be
14591552 /// shifted to the right. By the end of this, there should be no uses of Ptr.
1460 Value *SROA::ConvertUsesOfStoreToScalar(Value *SV, AllocaInst *NewAI,
1461 uint64_t Offset, Instruction *IP) {
1462
1553 Value *SROA::ConvertUsesOfStoreToScalar(StoreInst *SI, AllocaInst *NewAI,
1554 unsigned Offset) {
1555
14631556 // Convert the stored type to the actual type, shift it left to insert
14641557 // then 'or' into place.
1558 Value *SV = SI->getOperand(0);
14651559 const Type *AllocaType = NewAI->getType()->getElementType();
1466 if (SV->getType() == AllocaType && Offset == 0)
1467 return SV;
1468
1469 if (const VectorType *VTy = dyn_cast(AllocaType)) {
1470 Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", IP);
1471
1560 if (SV->getType() == AllocaType && Offset == 0) {
1561 // All is well.
1562 } else if (const VectorType *PTy = dyn_cast(AllocaType)) {
1563 Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", SI);
1564
14721565 // If the result alloca is a vector type, this is either an element
14731566 // access or a bitcast to another vector type.
14741567 if (isa(SV->getType())) {
1475 SV = new BitCastInst(SV, AllocaType, SV->getName(), IP);
1568 SV = new BitCastInst(SV, AllocaType, SV->getName(), SI);
14761569 } else {
14771570 // Must be an element insertion.
1478 unsigned Elt = Offset/TD->getTypePaddedSizeInBits(VTy->getElementType());
1479
1480 if (SV->getType() != VTy->getElementType())
1481 SV = new BitCastInst(SV, VTy->getElementType(), "tmp", IP);
1482
1571 unsigned Elt = Offset/TD->getTypePaddedSizeInBits(PTy->getElementType());
14831572 SV = InsertElementInst::Create(Old, SV,
14841573 ConstantInt::get(Type::Int32Ty, Elt),
1485 "tmp", IP);
1486 }
1487 return SV;
1488 }
1489
1490
1491 Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", IP);
1492
1493 // If SV is a float, convert it to the appropriate integer type.
1494 // If it is a pointer, do the same, and also handle ptr->ptr casts
1495 // here.
1496 unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType());
1497 unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
1498 unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
1499 unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
1500 if (SV->getType()->isFloatingPoint() || isa(SV->getType()))
1501 SV = new BitCastInst(SV, IntegerType::get(SrcWidth), SV->getName(), IP);
1502 else if (isa(SV->getType()))
1503 SV = new PtrToIntInst(SV, TD->getIntPtrType(), SV->getName(), IP);
1504
1505 // Zero extend or truncate the value if needed.
1506 if (SV->getType() != AllocaType) {
1507 if (SV->getType()->getPrimitiveSizeInBits() <
1508 AllocaType->getPrimitiveSizeInBits())
1509 SV = new ZExtInst(SV, AllocaType, SV->getName(), IP);
1510 else {
1511 // Truncation may be needed if storing more than the alloca can hold
1512 // (undefined behavior).
1513 SV = new TruncInst(SV, AllocaType, SV->getName(), IP);
1514 SrcWidth = DestWidth;
1515 SrcStoreWidth = DestStoreWidth;
1516 }
1517 }
1518
1519 // If this is a big-endian system and the store is narrower than the
1520 // full alloca type, we need to do a shift to get the right bits.
1521 int ShAmt = 0;
1522 if (TD->isBigEndian()) {
1523 // On big-endian machines, the lowest bit is stored at the bit offset
1524 // from the pointer given by getTypeStoreSizeInBits. This matters for
1525 // integers with a bitwidth that is not a multiple of 8.
1526 ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
1574 "tmp", SI);
1575 }
1576 } else if (isa(AllocaType)) {
1577 // If the alloca type is a pointer, then all the elements must be
1578 // pointers.
1579 if (SV->getType() != AllocaType)
1580 SV = new BitCastInst(SV, AllocaType, SV->getName(), SI);
15271581 } else {
1528 ShAmt = Offset;
1529 }
1530
1531 // Note: we support negative bitwidths (with shr) which are not defined.
1532 // We do this to support (f.e.) stores off the end of a structure where
1533 // only some bits in the structure are set.
1534 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
1535 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
1536 SV = BinaryOperator::CreateShl(SV,
1537 ConstantInt::get(SV->getType(), ShAmt),
1538 SV->getName(), IP);
1539 Mask <<= ShAmt;
1540 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
1541 SV = BinaryOperator::CreateLShr(SV,
1542 ConstantInt::get(SV->getType(), -ShAmt),
1543 SV->getName(), IP);
1544 Mask = Mask.lshr(-ShAmt);
1545 }
1546
1547 // Mask out the bits we are about to insert from the old value, and or
1548 // in the new bits.
1549 if (SrcWidth != DestWidth) {
1550 assert(DestWidth > SrcWidth);
1551 Old = BinaryOperator::CreateAnd(Old, ConstantInt::get(~Mask),
1552 Old->getName()+".mask", IP);
1553 SV = BinaryOperator::CreateOr(Old, SV, SV->getName()+".ins", IP);
1582 Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", SI);
1583
1584 // If SV is a float, convert it to the appropriate integer type.
1585 // If it is a pointer, do the same, and also handle ptr->ptr casts
1586 // here.
1587 unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType());
1588 unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
1589 unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
1590 unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
1591 if (SV->getType()->isFloatingPoint())
1592 SV = new BitCastInst(SV, IntegerType::get(SrcWidth),
1593 SV->getName(), SI);
1594 else if (isa(SV->getType()))
1595 SV = new PtrToIntInst(SV, TD->getIntPtrType(), SV->getName(), SI);
1596
1597 // Always zero extend the value if needed.
1598 if (SV->getType() != AllocaType)
1599 SV = new ZExtInst(SV, AllocaType, SV->getName(), SI);
1600
1601 // If this is a big-endian system and the store is narrower than the
1602 // full alloca type, we need to do a shift to get the right bits.
1603 int ShAmt = 0;
1604 if (TD->isBigEndian()) {
1605 // On big-endian machines, the lowest bit is stored at the bit offset
1606 // from the pointer given by getTypeStoreSizeInBits. This matters for
1607 // integers with a bitwidth that is not a multiple of 8.
1608 ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
1609 } else {
1610 ShAmt = Offset;
1611 }
1612
1613 // Note: we support negative bitwidths (with shr) which are not defined.
1614 // We do this to support (f.e.) stores off the end of a structure where
1615 // only some bits in the structure are set.
1616 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
1617 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
1618 SV = BinaryOperator::CreateShl(SV,
1619 ConstantInt::get(SV->getType(), ShAmt),
1620 SV->getName(), SI);
1621 Mask <<= ShAmt;
1622 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
1623 SV = BinaryOperator::CreateLShr(SV,
1624 ConstantInt::get(SV->getType(),-ShAmt),
1625 SV->getName(), SI);
1626 Mask = Mask.lshr(ShAmt);
1627 }
1628
1629 // Mask out the bits we are about to insert from the old value, and or
1630 // in the new bits.
1631 if (SrcWidth != DestWidth) {
1632 assert(DestWidth > SrcWidth);
1633 Old = BinaryOperator::CreateAnd(Old, ConstantInt::get(~Mask),
1634 Old->getName()+".mask", SI);
1635 SV = BinaryOperator::CreateOr(Old, SV, SV->getName()+".ins", SI);
1636 }
15541637 }
15551638 return SV;
15561639 }
None ; RUN: llvm-as < %s | opt -scalarrepl -instcombine | llvm-dis | grep {ret i32 undef}
0 ; RUN: echo disabled
11
22 ; Test that an array is not incorrectly deconstructed.
33
None ; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
0 ; RUN: echo disabled
11
22 define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
33 %vsiidx = alloca [2 x <4 x i32>], align 16 ; <[2 x <4 x i32>]*> [#uses=3]
None ; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
0 ; RUN: echo disabled
11 ; rdar://6532315
22 %t = type { { i32, i16, i8, i8 } }
33
0 ; PR1226
11 ; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret i32 16843009}
22 ; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
3 ; RUN: llvm-as < %s | opt -scalarrepl -instcombine | llvm-dis | grep {ret i16 514}
43
54 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
65 target triple = "i686-apple-darwin8"
4847 }
4948
5049
51 %struct.f = type { i32, i32, i32, i32, i32, i32 }
52
53 define i16 @test4() nounwind {
54 entry:
55 %A = alloca %struct.f, align 8 ; <%struct.f*> [#uses=3]
56 %0 = getelementptr %struct.f* %A, i32 0, i32 0 ; [#uses=1]
57 store i32 1, i32* %0, align 8
58 %1 = getelementptr %struct.f* %A, i32 0, i32 1 ; [#uses=1]
59 %2 = bitcast i32* %1 to i8* ; [#uses=1]
60 call void @llvm.memset.i32(i8* %2, i8 2, i32 12, i32 4)
61 %3 = getelementptr %struct.f* %A, i32 0, i32 2 ; [#uses=1]
62 %4 = load i32* %3, align 8 ; [#uses=1]
63 %retval12 = trunc i32 %4 to i16 ; [#uses=1]
64 ret i16 %retval12
65 }
5252 ret void
5353 }
5454
55 define i32 @test5(float %X) { ;; should turn into bitcast.
56 %X_addr = alloca [4 x float]
57 %X1 = getelementptr [4 x float]* %X_addr, i32 0, i32 2
58 store float %X, float* %X1
59 %a = bitcast float* %X1 to i32*
60 %tmp = load i32* %a
61 ret i32 %tmp
62 }
63