llvm.org GIT mirror llvm / d59be50
[DAGCombine] Match a pattern where a wide type scalar value is stored by several narrow stores This opportunity is found from spec 2017 557.xz_r. And it is used by the sha encrypt/decrypt. See sha-2/sha512.c static void store64(u64 x, unsigned char* y) { for(int i = 0; i != 8; ++i) y[i] = (x >> ((7-i) * 8)) & 255; } static u64 load64(const unsigned char* y) { u64 res = 0; for(int i = 0; i != 8; ++i) res |= (u64)(y[i]) << ((7-i) * 8); return res; } The load64 has been implemented by https://reviews.llvm.org/D26149 This patch is trying to implement the store pattern. Match a pattern where a wide type scalar value is stored by several narrow stores. Fold it into a single store or a BSWAP and a store if the targets supports it. Assuming little endian target: i8 *p = ... i32 val = ... p[0] = (val >> 0) & 0xFF; p[1] = (val >> 8) & 0xFF; p[2] = (val >> 16) & 0xFF; p[3] = (val >> 24) & 0xFF; > *((i32)p) = val; i8 *p = ... i32 val = ... p[0] = (val >> 24) & 0xFF; p[1] = (val >> 16) & 0xFF; p[2] = (val >> 8) & 0xFF; p[3] = (val >> 0) & 0xFF; > *((i32)p) = BSWAP(val); Differential Revision: https://reviews.llvm.org/D61843 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362472 91177308-0d34-0410-b5e6-96231b3b80d8 QingShan Zhang 3 months ago
2 changed file(s) with 228 addition(s) and 266 deletion(s). Raw diff Collapse all Expand all
523523 const SDLoc &DL);
524524 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
525525 SDValue MatchLoadCombine(SDNode *N);
526 SDValue MatchStoreCombine(StoreSDNode *N);
526527 SDValue ReduceLoadWidth(SDNode *N);
527528 SDValue ReduceLoadOpStoreWidth(SDNode *N);
528529 SDValue splitMergedValStore(StoreSDNode *ST);
62616262 return BigEndian;
62626263 }
62636264
6265 static SDValue stripTruncAndExt(SDValue Value) {
6266 switch (Value.getOpcode()) {
6267 case ISD::TRUNCATE:
6268 case ISD::ZERO_EXTEND:
6269 case ISD::SIGN_EXTEND:
6270 case ISD::ANY_EXTEND:
6271 return stripTruncAndExt(Value.getOperand(0));
6272 }
6273 return Value;
6274 }
6275
6276 /// Match a pattern where a wide type scalar value is stored by several narrow
6277 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6278 /// supports it.
6279 ///
6280 /// Assuming little endian target:
6281 /// i8 *p = ...
6282 /// i32 val = ...
6283 /// p[0] = (val >> 0) & 0xFF;
6284 /// p[1] = (val >> 8) & 0xFF;
6285 /// p[2] = (val >> 16) & 0xFF;
6286 /// p[3] = (val >> 24) & 0xFF;
6287 /// =>
6288 /// *((i32)p) = val;
6289 ///
6290 /// i8 *p = ...
6291 /// i32 val = ...
6292 /// p[0] = (val >> 24) & 0xFF;
6293 /// p[1] = (val >> 16) & 0xFF;
6294 /// p[2] = (val >> 8) & 0xFF;
6295 /// p[3] = (val >> 0) & 0xFF;
6296 /// =>
6297 /// *((i32)p) = BSWAP(val);
6298 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6299 // Collect all the stores in the chain.
6300 SDValue Chain;
6301 SmallVector Stores;
6302 for (StoreSDNode *Store = N; Store; Store = dyn_cast(Chain)) {
6303 if (Store->getMemoryVT() != MVT::i8 ||
6304 Store->isVolatile() || Store->isIndexed())
6305 return SDValue();
6306 Stores.push_back(Store);
6307 Chain = Store->getChain();
6308 }
6309 // Handle the simple type only.
6310 unsigned Width = Stores.size();
6311 EVT VT = EVT::getIntegerVT(
6312 *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6313 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6314 return SDValue();
6315
6316 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6317 if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6318 return SDValue();
6319
6320 // Check if all the bytes of the combined value we are looking at are stored
6321 // to the same base address. Collect bytes offsets from Base address into
6322 // ByteOffsets.
6323 SDValue CombinedValue;
6324 SmallVector ByteOffsets(Width);
6325 int64_t FirstOffset = INT64_MAX;
6326 StoreSDNode *FirstStore = nullptr;
6327 Optional Base;
6328 for (auto Store : Stores) {
6329 // All the stores store different byte of the CombinedValue. A truncate is
6330 // required to get that byte value.
6331 SDValue Trunc = Store->getValue();
6332 if (Trunc.getOpcode() != ISD::TRUNCATE)
6333 return SDValue();
6334 // A shift operation is required to get the right byte offset, except the
6335 // first byte.
6336 int64_t Offset = 0;
6337 SDValue Value = Trunc.getOperand(0);
6338 if (Value.getOpcode() == ISD::SRL ||
6339 Value.getOpcode() == ISD::SRA) {
6340 ConstantSDNode *ShiftOffset =
6341 dyn_cast(Value.getOperand(1));
6342 // Trying to match the following pattern. The shift offset must be
6343 // a constant and a multiple of 8. It is the byte offset in "y".
6344 //
6345 // x = srl y, offset
6346 // i8 z = trunc x
6347 // store z, ...
6348 if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6349 return SDValue();
6350
6351 Offset = ShiftOffset->getSExtValue()/8;
6352 Value = Value.getOperand(0);
6353 }
6354
6355 // Stores must share the same combined value with different offsets.
6356 if (!CombinedValue)
6357 CombinedValue = Value;
6358 else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6359 return SDValue();
6360
6361 // The trunc and all the extend operation should be stripped to get the
6362 // real value we are stored.
6363 else if (CombinedValue.getValueType() != VT) {
6364 if (Value.getValueType() == VT ||
6365 Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6366 CombinedValue = Value;
6367 // Give up if the combined value type is smaller than the store size.
6368 if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6369 return SDValue();
6370 }
6371
6372 // Stores must share the same base address
6373 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6374 int64_t ByteOffsetFromBase = 0;
6375 if (!Base)
6376 Base = Ptr;
6377 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6378 return SDValue();
6379
6380 // Remember the first byte store
6381 if (ByteOffsetFromBase < FirstOffset) {
6382 FirstStore = Store;
6383 FirstOffset = ByteOffsetFromBase;
6384 }
6385 // Map the offset in the store and the offset in the combined value.
6386 if (Offset < 0 || Offset >= Width)
6387 return SDValue();
6388 ByteOffsets[Offset] = ByteOffsetFromBase;
6389 }
6390
6391 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6392 assert(FirstStore && "First store must be set");
6393
6394 // Check if the bytes of the combined value we are looking at match with
6395 // either big or little endian value store.
6396 Optional IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6397 if (!IsBigEndian.hasValue())
6398 return SDValue();
6399
6400 // The node we are looking at matches with the pattern, check if we can
6401 // replace it with a single bswap if needed and store.
6402
6403 // If the store needs byte swap check if the target supports it
6404 bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6405
6406 // Before legalize we can introduce illegal bswaps which will be later
6407 // converted to an explicit bswap sequence. This way we end up with a single
6408 // store and byte shuffling instead of several stores and byte shuffling.
6409 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6410 return SDValue();
6411
6412 // Check that a store of the wide type is both allowed and fast on the target
6413 bool Fast = false;
6414 bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6415 VT, FirstStore->getAddressSpace(),
6416 FirstStore->getAlignment(), &Fast);
6417 if (!Allowed || !Fast)
6418 return SDValue();
6419
6420 if (VT != CombinedValue.getValueType()) {
6421 assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6422 "Get unexpected store value to combine");
6423 CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6424 CombinedValue);
6425 }
6426
6427 if (NeedsBswap)
6428 CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6429
6430 SDValue NewStore =
6431 DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
6432 FirstStore->getPointerInfo(), FirstStore->getAlignment());
6433
6434 // Rely on other DAG combine rules to remove the other individual stores.
6435 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6436 return NewStore;
6437 }
6438
62646439 /// Match a pattern where a wide type scalar value is loaded by several narrow
62656440 /// loads and combined by shifts and ors. Fold it into a single load or a load
62666441 /// and a BSWAP if the targets supports it.
1576115936 if (SDValue NewST = TransformFPLoadStorePair(N))
1576215937 return NewST;
1576315938
15939 // Try transforming several stores into STORE (BSWAP).
15940 if (SDValue Store = MatchStoreCombine(ST))
15941 return Store;
15942
1576415943 if (ST->isUnindexed()) {
1576515944 // Walk up chain skipping non-aliasing memory nodes, on this store and any
1576615945 // adjacent stores.
99 define void @store_i32_by_i8(i32 signext %m, i8* %p) {
1010 ; CHECK-PPC64LE-LABEL: store_i32_by_i8:
1111 ; CHECK-PPC64LE: # %bb.0: # %entry
12 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
13 ; CHECK-PPC64LE-NEXT: stb 3, 0(4)
14 ; CHECK-PPC64LE-NEXT: stb 5, 1(4)
15 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
16 ; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
17 ; CHECK-PPC64LE-NEXT: stb 5, 2(4)
18 ; CHECK-PPC64LE-NEXT: stb 3, 3(4)
12 ; CHECK-PPC64LE-NEXT: stw 3, 0(4)
1913 ; CHECK-PPC64LE-NEXT: blr
2014 ;
2115 ; CHECK-PPC64-LABEL: store_i32_by_i8:
2216 ; CHECK-PPC64: # %bb.0: # %entry
23 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
24 ; CHECK-PPC64-NEXT: stb 3, 0(4)
25 ; CHECK-PPC64-NEXT: stb 5, 1(4)
26 ; CHECK-PPC64-NEXT: srwi 5, 3, 16
27 ; CHECK-PPC64-NEXT: srwi 3, 3, 24
28 ; CHECK-PPC64-NEXT: stb 5, 2(4)
29 ; CHECK-PPC64-NEXT: stb 3, 3(4)
17 ; CHECK-PPC64-NEXT: stwbrx 3, 0, 4
3018 ; CHECK-PPC64-NEXT: blr
3119 entry:
3220 %conv = trunc i32 %m to i8
5442 define void @store_i32_by_i8_bswap(i32 signext %m, i8* %p) {
5543 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap:
5644 ; CHECK-PPC64LE: # %bb.0: # %entry
57 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 24
58 ; CHECK-PPC64LE-NEXT: stb 5, 0(4)
59 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
60 ; CHECK-PPC64LE-NEXT: stb 5, 1(4)
61 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
62 ; CHECK-PPC64LE-NEXT: stb 5, 2(4)
63 ; CHECK-PPC64LE-NEXT: stb 3, 3(4)
45 ; CHECK-PPC64LE-NEXT: stwbrx 3, 0, 4
6446 ; CHECK-PPC64LE-NEXT: blr
6547 ;
6648 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap:
6749 ; CHECK-PPC64: # %bb.0: # %entry
68 ; CHECK-PPC64-NEXT: srwi 5, 3, 24
69 ; CHECK-PPC64-NEXT: srwi 6, 3, 16
70 ; CHECK-PPC64-NEXT: stb 5, 0(4)
71 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
72 ; CHECK-PPC64-NEXT: stb 6, 1(4)
73 ; CHECK-PPC64-NEXT: stb 5, 2(4)
74 ; CHECK-PPC64-NEXT: stb 3, 3(4)
50 ; CHECK-PPC64-NEXT: stw 3, 0(4)
7551 ; CHECK-PPC64-NEXT: blr
7652 entry:
7753 %0 = lshr i32 %m, 24
10379 define void @store_i64_by_i8(i64 %m, i8* %p) {
10480 ; CHECK-PPC64LE-LABEL: store_i64_by_i8:
10581 ; CHECK-PPC64LE: # %bb.0: # %entry
106 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8
107 ; CHECK-PPC64LE-NEXT: stb 3, 0(4)
108 ; CHECK-PPC64LE-NEXT: stb 5, 1(4)
109 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16
110 ; CHECK-PPC64LE-NEXT: stb 5, 2(4)
111 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24
112 ; CHECK-PPC64LE-NEXT: stb 5, 3(4)
113 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32
114 ; CHECK-PPC64LE-NEXT: stb 5, 4(4)
115 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40
116 ; CHECK-PPC64LE-NEXT: stb 5, 5(4)
117 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48
118 ; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56
119 ; CHECK-PPC64LE-NEXT: stb 5, 6(4)
120 ; CHECK-PPC64LE-NEXT: stb 3, 7(4)
82 ; CHECK-PPC64LE-NEXT: stdx 3, 0, 4
12183 ; CHECK-PPC64LE-NEXT: blr
12284 ;
12385 ; CHECK-PPC64-LABEL: store_i64_by_i8:
12486 ; CHECK-PPC64: # %bb.0: # %entry
125 ; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8
126 ; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16
127 ; CHECK-PPC64-NEXT: stb 5, 1(4)
128 ; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24
129 ; CHECK-PPC64-NEXT: stb 6, 2(4)
130 ; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32
131 ; CHECK-PPC64-NEXT: stb 5, 3(4)
132 ; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40
133 ; CHECK-PPC64-NEXT: stb 6, 4(4)
134 ; CHECK-PPC64-NEXT: stb 3, 0(4)
135 ; CHECK-PPC64-NEXT: stb 5, 5(4)
136 ; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48
137 ; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56
138 ; CHECK-PPC64-NEXT: stb 5, 6(4)
139 ; CHECK-PPC64-NEXT: stb 3, 7(4)
87 ; CHECK-PPC64-NEXT: stdbrx 3, 0, 4
14088 ; CHECK-PPC64-NEXT: blr
14189 entry:
14290 %conv = trunc i64 %m to i8
184132 define void @store_i64_by_i8_bswap(i64 %m, i8* %p) {
185133 ; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap:
186134 ; CHECK-PPC64LE: # %bb.0: # %entry
187 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8
188 ; CHECK-PPC64LE-NEXT: stb 3, 7(4)
189 ; CHECK-PPC64LE-NEXT: stb 5, 6(4)
190 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16
191 ; CHECK-PPC64LE-NEXT: stb 5, 5(4)
192 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24
193 ; CHECK-PPC64LE-NEXT: stb 5, 4(4)
194 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32
195 ; CHECK-PPC64LE-NEXT: stb 5, 3(4)
196 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40
197 ; CHECK-PPC64LE-NEXT: stb 5, 2(4)
198 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48
199 ; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56
200 ; CHECK-PPC64LE-NEXT: stb 5, 1(4)
201 ; CHECK-PPC64LE-NEXT: stb 3, 0(4)
135 ; CHECK-PPC64LE-NEXT: stdbrx 3, 0, 4
202136 ; CHECK-PPC64LE-NEXT: blr
203137 ;
204138 ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap:
205139 ; CHECK-PPC64: # %bb.0: # %entry
206 ; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8
207 ; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16
208 ; CHECK-PPC64-NEXT: stb 5, 6(4)
209 ; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24
210 ; CHECK-PPC64-NEXT: stb 6, 5(4)
211 ; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32
212 ; CHECK-PPC64-NEXT: stb 5, 4(4)
213 ; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40
214 ; CHECK-PPC64-NEXT: stb 6, 3(4)
215 ; CHECK-PPC64-NEXT: stb 3, 7(4)
216 ; CHECK-PPC64-NEXT: stb 5, 2(4)
217 ; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48
218 ; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56
219 ; CHECK-PPC64-NEXT: stb 5, 1(4)
220 ; CHECK-PPC64-NEXT: stb 3, 0(4)
140 ; CHECK-PPC64-NEXT: stdx 3, 0, 4
221141 ; CHECK-PPC64-NEXT: blr
222142 entry:
223143 %conv = trunc i64 %m to i8
266186 define void @store_i64_by_i8_bswap_uses(i32 signext %t, i8* %p) {
267187 ; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap_uses:
268188 ; CHECK-PPC64LE: # %bb.0: # %entry
269 ; CHECK-PPC64LE-NEXT: slwi 5, 3, 3
270 ; CHECK-PPC64LE-NEXT: subf 3, 3, 5
271 ; CHECK-PPC64LE-NEXT: extsw 3, 3
272 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8
273 ; CHECK-PPC64LE-NEXT: stb 3, 7(4)
274 ; CHECK-PPC64LE-NEXT: stb 5, 6(4)
275 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16
276 ; CHECK-PPC64LE-NEXT: stb 5, 5(4)
277 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24
278 ; CHECK-PPC64LE-NEXT: stb 5, 4(4)
279 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32
280 ; CHECK-PPC64LE-NEXT: stb 5, 3(4)
281 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40
282 ; CHECK-PPC64LE-NEXT: stb 5, 2(4)
283 ; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48
284 ; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56
285 ; CHECK-PPC64LE-NEXT: stb 5, 1(4)
286 ; CHECK-PPC64LE-NEXT: stb 3, 0(4)
189 ; CHECK-PPC64LE-NEXT: slwi [[REG:[0-9]+]], 3, 3
190 ; CHECK-PPC64LE-NEXT: subf [[REG1:[0-9]+]], 3, [[REG]]
191 ; CHECK-PPC64LE-NEXT: extsw [[REG2:[0-9]+]], [[REG1]]
192 ; CHECK-PPC64LE-NEXT: stdbrx [[REG2]], 0, 4
287193 ; CHECK-PPC64LE-NEXT: blr
288194 ;
289195 ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap_uses:
290196 ; CHECK-PPC64: # %bb.0: # %entry
291 ; CHECK-PPC64-NEXT: slwi 5, 3, 3
292 ; CHECK-PPC64-NEXT: subf 3, 3, 5
293 ; CHECK-PPC64-NEXT: extsw 3, 3
294 ; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8
295 ; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16
296 ; CHECK-PPC64-NEXT: stb 5, 6(4)
297 ; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24
298 ; CHECK-PPC64-NEXT: stb 6, 5(4)
299 ; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32
300 ; CHECK-PPC64-NEXT: stb 5, 4(4)
301 ; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40
302 ; CHECK-PPC64-NEXT: stb 6, 3(4)
303 ; CHECK-PPC64-NEXT: stb 3, 7(4)
304 ; CHECK-PPC64-NEXT: stb 5, 2(4)
305 ; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48
306 ; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56
307 ; CHECK-PPC64-NEXT: stb 5, 1(4)
308 ; CHECK-PPC64-NEXT: stb 3, 0(4)
197 ; CHECK-PPC64-NEXT: slwi [[REG:[0-9]+]], 3, 3
198 ; CHECK-PPC64-NEXT: subf [[REG1:[0-9]+]], 3, [[REG]]
199 ; CHECK-PPC64-NEXT: extsw [[REG2:[0-9]+]], [[REG1]]
200 ; CHECK-PPC64-NEXT: stdx [[REG2]], 0, 4
309201 ; CHECK-PPC64-NEXT: blr
310202 entry:
311203 %mul = mul nsw i32 %t, 7
355247 define void @store_i32_by_i8_bswap_volatile(i32 signext %m, i8* %p) {
356248 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_volatile:
357249 ; CHECK-PPC64LE: # %bb.0: # %entry
358 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
359 ; CHECK-PPC64LE-NEXT: stb 3, 3(4)
360 ; CHECK-PPC64LE-NEXT: stb 5, 2(4)
361 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
362 ; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
363 ; CHECK-PPC64LE-NEXT: stb 5, 1(4)
364 ; CHECK-PPC64LE-NEXT: stb 3, 0(4)
365 ; CHECK-PPC64LE-NEXT: blr
250 ; CHECK-PPC64LE-NOT: stwbrx
366251 ;
367252 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_volatile:
368253 ; CHECK-PPC64: # %bb.0: # %entry
369 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
370 ; CHECK-PPC64-NEXT: stb 3, 3(4)
371 ; CHECK-PPC64-NEXT: stb 5, 2(4)
372 ; CHECK-PPC64-NEXT: srwi 5, 3, 16
373 ; CHECK-PPC64-NEXT: srwi 3, 3, 24
374 ; CHECK-PPC64-NEXT: stb 5, 1(4)
375 ; CHECK-PPC64-NEXT: stb 3, 0(4)
376 ; CHECK-PPC64-NEXT: blr
254 ; CHECK-PPC64-NOT: stw
377255 entry:
378256 %conv = trunc i32 %m to i8
379257 %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
402280 define void @store_i32_by_i8_bswap_store_in_between(i32 signext %m, i8* %p, i8* %q) {
403281 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_store_in_between:
404282 ; CHECK-PPC64LE: # %bb.0: # %entry
405 ; CHECK-PPC64LE-NEXT: srwi 6, 3, 8
406 ; CHECK-PPC64LE-NEXT: stb 3, 3(4)
407 ; CHECK-PPC64LE-NEXT: stb 6, 2(4)
408 ; CHECK-PPC64LE-NEXT: li 6, 3
409 ; CHECK-PPC64LE-NEXT: stb 6, 0(5)
410 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
411 ; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
412 ; CHECK-PPC64LE-NEXT: stb 5, 1(4)
413 ; CHECK-PPC64LE-NEXT: stb 3, 0(4)
414 ; CHECK-PPC64LE-NEXT: blr
283 ; CHECK-PPC64LE-NOT: stwbrx
415284 ;
416285 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_store_in_between:
417286 ; CHECK-PPC64: # %bb.0: # %entry
418 ; CHECK-PPC64-NEXT: li 6, 3
419 ; CHECK-PPC64-NEXT: srwi 7, 3, 8
420 ; CHECK-PPC64-NEXT: stb 7, 2(4)
421 ; CHECK-PPC64-NEXT: stb 3, 3(4)
422 ; CHECK-PPC64-NEXT: stb 6, 0(5)
423 ; CHECK-PPC64-NEXT: srwi 5, 3, 16
424 ; CHECK-PPC64-NEXT: srwi 3, 3, 24
425 ; CHECK-PPC64-NEXT: stb 5, 1(4)
426 ; CHECK-PPC64-NEXT: stb 3, 0(4)
427 ; CHECK-PPC64-NEXT: blr
287 ; CHECK-PPC64-NOT: stw
428288 entry:
429289 %conv = trunc i32 %m to i8
430290 %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
447307 define void @store_i32_by_i8_bswap_unrelated_store(i32 signext %m, i8* %p, i8* %q) {
448308 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_unrelated_store:
449309 ; CHECK-PPC64LE: # %bb.0: # %entry
450 ; CHECK-PPC64LE-NEXT: srwi 6, 3, 8
451 ; CHECK-PPC64LE-NEXT: stb 3, 3(4)
452 ; CHECK-PPC64LE-NEXT: stb 6, 2(5)
453 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
454 ; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
455 ; CHECK-PPC64LE-NEXT: stb 5, 1(4)
456 ; CHECK-PPC64LE-NEXT: stb 3, 0(4)
457 ; CHECK-PPC64LE-NEXT: blr
310 ; CHECK-PPC64LE-NOT: stwbrx
458311 ;
459312 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_unrelated_store:
460313 ; CHECK-PPC64: # %bb.0: # %entry
461 ; CHECK-PPC64-NEXT: srwi 6, 3, 8
462 ; CHECK-PPC64-NEXT: stb 3, 3(4)
463 ; CHECK-PPC64-NEXT: stb 6, 2(5)
464 ; CHECK-PPC64-NEXT: srwi 5, 3, 16
465 ; CHECK-PPC64-NEXT: srwi 3, 3, 24
466 ; CHECK-PPC64-NEXT: stb 5, 1(4)
467 ; CHECK-PPC64-NEXT: stb 3, 0(4)
468 ; CHECK-PPC64-NEXT: blr
314 ; CHECK-PPC64-NOT: stw
469315 entry:
470316 %conv = trunc i32 %m to i8
471317 %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
492338 define void @store_i32_by_i8_bswap_nonzero_offset(i32 signext %m, i8* %p) {
493339 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_nonzero_offset:
494340 ; CHECK-PPC64LE: # %bb.0: # %entry
495 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
496 ; CHECK-PPC64LE-NEXT: stb 5, 3(4)
497 ; CHECK-PPC64LE-NEXT: stb 3, 4(4)
498 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
499 ; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
500 ; CHECK-PPC64LE-NEXT: stb 5, 2(4)
501 ; CHECK-PPC64LE-NEXT: stb 3, 1(4)
341 ; CHECK-PPC64LE-NEXT: addi [[REG1:[0-9]+]], 4, 1
342 ; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG1]]
502343 ; CHECK-PPC64LE-NEXT: blr
503344 ;
504345 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_nonzero_offset:
505346 ; CHECK-PPC64: # %bb.0: # %entry
506 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
507 ; CHECK-PPC64-NEXT: stb 3, 4(4)
508 ; CHECK-PPC64-NEXT: stb 5, 3(4)
509 ; CHECK-PPC64-NEXT: srwi 5, 3, 16
510 ; CHECK-PPC64-NEXT: srwi 3, 3, 24
511 ; CHECK-PPC64-NEXT: stb 5, 2(4)
512 ; CHECK-PPC64-NEXT: stb 3, 1(4)
347 ; CHECK-PPC64-NEXT: stw 3, 1(4)
513348 ; CHECK-PPC64-NEXT: blr
514349 entry:
515350 %0 = lshr i32 %m, 8
538373 define void @store_i32_by_i8_neg_offset(i32 signext %m, i8* %p) {
539374 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_neg_offset:
540375 ; CHECK-PPC64LE: # %bb.0: # %entry
541 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
542 ; CHECK-PPC64LE-NEXT: stb 5, -3(4)
543 ; CHECK-PPC64LE-NEXT: stb 3, -4(4)
544 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
545 ; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
546 ; CHECK-PPC64LE-NEXT: stb 5, -2(4)
547 ; CHECK-PPC64LE-NEXT: stb 3, -1(4)
376 ; CHECK-PPC64LE-NEXT: stw 3, -4(4)
548377 ; CHECK-PPC64LE-NEXT: blr
549378 ;
550379 ; CHECK-PPC64-LABEL: store_i32_by_i8_neg_offset:
551380 ; CHECK-PPC64: # %bb.0: # %entry
552 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
553 ; CHECK-PPC64-NEXT: stb 3, -4(4)
554 ; CHECK-PPC64-NEXT: stb 5, -3(4)
555 ; CHECK-PPC64-NEXT: srwi 5, 3, 16
556 ; CHECK-PPC64-NEXT: srwi 3, 3, 24
557 ; CHECK-PPC64-NEXT: stb 5, -2(4)
558 ; CHECK-PPC64-NEXT: stb 3, -1(4)
381 ; CHECK-PPC64-NEXT: addi [[REG1:[0-9]+]], 4, -4
382 ; CHECK-PPC64-NEXT: stwbrx 3, 0, [[REG1]]
559383 ; CHECK-PPC64-NEXT: blr
560384 entry:
561385 %0 = lshr i32 %m, 8
584408 define void @store_i32_by_i8_bswap_neg_offset(i32 signext %m, i8* %p) {
585409 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_neg_offset:
586410 ; CHECK-PPC64LE: # %bb.0: # %entry
587 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
588 ; CHECK-PPC64LE-NEXT: stb 5, -3(4)
589 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 24
590 ; CHECK-PPC64LE-NEXT: stb 5, -4(4)
591 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
592 ; CHECK-PPC64LE-NEXT: stb 5, -2(4)
593 ; CHECK-PPC64LE-NEXT: stb 3, -1(4)
411 ; CHECK-PPC64LE-NEXT: addi [[REG1:[0-9]+]], 4, -4
412 ; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG1]]
594413 ; CHECK-PPC64LE-NEXT: blr
595414 ;
596415 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_neg_offset:
597416 ; CHECK-PPC64: # %bb.0: # %entry
598 ; CHECK-PPC64-NEXT: srwi 5, 3, 16
599 ; CHECK-PPC64-NEXT: srwi 6, 3, 24
600 ; CHECK-PPC64-NEXT: stb 5, -3(4)
601 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
602 ; CHECK-PPC64-NEXT: stb 6, -4(4)
603 ; CHECK-PPC64-NEXT: stb 5, -2(4)
604 ; CHECK-PPC64-NEXT: stb 3, -1(4)
417 ; CHECK-PPC64-NEXT: stw 3, -4(4)
605418 ; CHECK-PPC64-NEXT: blr
606419 entry:
607420 %0 = lshr i32 %m, 16
630443 define void @store_i32_by_i8_bswap_base_index_offset(i32 %m, i32 %i, i8* %p) {
631444 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_base_index_offset:
632445 ; CHECK-PPC64LE: # %bb.0: # %entry
633 ; CHECK-PPC64LE-NEXT: extsw 4, 4
634 ; CHECK-PPC64LE-NEXT: srwi 6, 3, 16
635 ; CHECK-PPC64LE-NEXT: add 4, 5, 4
636 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 24
637 ; CHECK-PPC64LE-NEXT: stb 6, -3(4)
638 ; CHECK-PPC64LE-NEXT: stb 5, -4(4)
639 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
640 ; CHECK-PPC64LE-NEXT: stb 5, -2(4)
641 ; CHECK-PPC64LE-NEXT: stb 3, -1(4)
446 ; CHECK-PPC64LE-NEXT: extsw [[REG1:[0-9]+]], 4
447 ; CHECK-PPC64LE-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]]
448 ; CHECK-PPC64LE-NEXT: addi [[REG3:[0-9]+]], [[REG2]], -4
449 ; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG3]]
642450 ; CHECK-PPC64LE-NEXT: blr
643451 ;
644452 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_base_index_offset:
645453 ; CHECK-PPC64: # %bb.0: # %entry
646 ; CHECK-PPC64-NEXT: extsw 4, 4
647 ; CHECK-PPC64-NEXT: srwi 6, 3, 16
648 ; CHECK-PPC64-NEXT: add 4, 5, 4
649 ; CHECK-PPC64-NEXT: srwi 5, 3, 24
650 ; CHECK-PPC64-NEXT: stb 6, -3(4)
651 ; CHECK-PPC64-NEXT: srwi 6, 3, 8
652 ; CHECK-PPC64-NEXT: stb 5, -4(4)
653 ; CHECK-PPC64-NEXT: stb 6, -2(4)
654 ; CHECK-PPC64-NEXT: stb 3, -1(4)
454 ; CHECK-PPC64-NEXT: extsw [[REG1:[0-9]+]], 4
455 ; CHECK-PPC64-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]]
456 ; CHECK-PPC64-NEXT: stw 3, -4([[REG2]])
655457 ; CHECK-PPC64-NEXT: blr
656458 entry:
657459 %0 = lshr i32 %m, 16
693495 define void @store_i32_by_i8_bswap_complicated(i32 %m, i32 %i, i8* %p) {
694496 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_complicated:
695497 ; CHECK-PPC64LE: # %bb.0: # %entry
696 ; CHECK-PPC64LE-NEXT: extsw 4, 4
697 ; CHECK-PPC64LE-NEXT: add 4, 5, 4
698 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 24
699 ; CHECK-PPC64LE-NEXT: stb 5, 3(4)
700 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
701 ; CHECK-PPC64LE-NEXT: stb 5, 4(4)
702 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
703 ; CHECK-PPC64LE-NEXT: stb 5, 5(4)
704 ; CHECK-PPC64LE-NEXT: stb 3, 6(4)
498 ; CHECK-PPC64LE-NEXT: extsw [[REG1:[0-9]+]], 4
499 ; CHECK-PPC64LE-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]]
500 ; CHECK-PPC64LE-NEXT: addi [[REG3:[0-9]+]], [[REG2]], 3
501 ; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG3]]
705502 ; CHECK-PPC64LE-NEXT: blr
706503 ;
707504 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_complicated:
708505 ; CHECK-PPC64: # %bb.0: # %entry
709 ; CHECK-PPC64-NEXT: extsw 4, 4
710 ; CHECK-PPC64-NEXT: srwi 6, 3, 24
711 ; CHECK-PPC64-NEXT: add 4, 5, 4
712 ; CHECK-PPC64-NEXT: srwi 5, 3, 16
713 ; CHECK-PPC64-NEXT: stb 6, 3(4)
714 ; CHECK-PPC64-NEXT: stb 5, 4(4)
715 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
716 ; CHECK-PPC64-NEXT: stb 5, 5(4)
717 ; CHECK-PPC64-NEXT: stb 3, 6(4)
506 ; CHECK-PPC64-NEXT: extsw [[REG1:[0-9]+]], 4
507 ; CHECK-PPC64-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]]
508 ; CHECK-PPC64-NEXT: stw 3, 3([[REG2]])
718509 ; CHECK-PPC64-NEXT: blr
719510 entry:
720511 %idx.ext = sext i32 %i to i64
744535 define void @store_i16_by_i8_bswap(i16 %m, i8* %p) {
745536 ; CHECK-PPC64LE-LABEL: store_i16_by_i8_bswap:
746537 ; CHECK-PPC64LE: # %bb.0: # %entry
747 ; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
748 ; CHECK-PPC64LE-NEXT: stb 5, 0(4)
749 ; CHECK-PPC64LE-NEXT: stb 3, 1(4)
538 ; CHECK-PPC64LE-NEXT: sthbrx 3, 0, 4
750539 ; CHECK-PPC64LE-NEXT: blr
751540 ;
752541 ; CHECK-PPC64-LABEL: store_i16_by_i8_bswap:
753542 ; CHECK-PPC64: # %bb.0: # %entry
754 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
755 ; CHECK-PPC64-NEXT: stb 5, 0(4)
756 ; CHECK-PPC64-NEXT: stb 3, 1(4)
543 ; CHECK-PPC64-NEXT: sth 3, 0(4)
757544 ; CHECK-PPC64-NEXT: blr
758545 entry:
759546 %0 = lshr i16 %m, 8
770557 define void @store_16_by_i8(i16 %m, i8* %p) {
771558 ; CHECK-PPC64LE-LABEL: store_16_by_i8:
772559 ; CHECK-PPC64LE: # %bb.0: # %entry
773 ; CHECK-PPC64LE-NEXT: stb 3, 0(4)
774 ; CHECK-PPC64LE-NEXT: srwi 3, 3, 8
775 ; CHECK-PPC64LE-NEXT: stb 3, 1(4)
560 ; CHECK-PPC64LE-NEXT: sth 3, 0(4)
776561 ; CHECK-PPC64LE-NEXT: blr
777562 ;
778563 ; CHECK-PPC64-LABEL: store_16_by_i8:
779564 ; CHECK-PPC64: # %bb.0: # %entry
780 ; CHECK-PPC64-NEXT: srwi 5, 3, 8
781 ; CHECK-PPC64-NEXT: stb 3, 0(4)
782 ; CHECK-PPC64-NEXT: stb 5, 1(4)
565 ; CHECK-PPC64-NEXT: sthbrx 3, 0, 4
783566 ; CHECK-PPC64-NEXT: blr
784567 entry:
785568 %conv1 = trunc i16 %m to i8