llvm.org GIT mirror llvm / e9359f4
Fix alignment checks in MergeConsecutiveStores. 1) check whether the alignment of the memory is sufficient for the *merged* store or load to be efficient. Not doing so can result in some ridiculously poor code generation, if merging creates a vector operation which must be aligned but isn't. 2) DON'T check that the alignment of each load/store is equal. If you're merging 2 4-byte stores, the first *might* have 8-byte alignment, but the second certainly will have 4-byte alignment. We do want to allow those to be merged. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236850 91177308-0d34-0410-b5e6-96231b3b80d8 James Y Knight 4 years ago
3 changed file(s) with 123 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
1065210652 return true;
1065310653 }
1065410654
10655 static bool allowableAlignment(const SelectionDAG &DAG,
10656 const TargetLowering &TLI, EVT EVTTy,
10657 unsigned AS, unsigned Align) {
10658 if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
10659 return true;
10660
10661 Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
10662 unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
10663 return (Align >= ABIAlignment);
10664 }
10665
1065510666 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
1065610667 if (OptLevel == CodeGenOpt::None)
1065710668 return false;
1071810729 if (!Ptr.equalBaseIndex(BasePtr))
1071910730 break;
1072010731
10721 // Check that the alignment is the same.
10722 if (Index->getAlignment() != St->getAlignment())
10723 break;
10724
1072510732 // The memory operands must not be volatile.
1072610733 if (Index->isVolatile() || Index->isIndexed())
1072710734 break;
1073310740
1073410741 // The stored memory type must be the same.
1073510742 if (Index->getMemoryVT() != MemVT)
10736 break;
10737
10738 // We do not allow unaligned stores because we want to prevent overriding
10739 // stores.
10740 if (Index->getAlignment()*8 != MemVT.getSizeInBits())
1074110743 break;
1074210744
1074310745 // We found a potential memory operand to merge.
1081310815
1081410816 // The node with the lowest store address.
1081510817 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
10818 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
10819 unsigned FirstStoreAlign = FirstInChain->getAlignment();
1081610820
1081710821 // Store the constants into memory as one consecutive store.
1081810822 if (IsConstantSrc) {
1083510839 // Find a legal type for the constant store.
1083610840 unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
1083710841 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
10838 if (TLI.isTypeLegal(StoreTy))
10842 if (TLI.isTypeLegal(StoreTy) &&
10843 allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
10844 FirstStoreAlign)) {
1083910845 LastLegalType = i+1;
1084010846 // Or check whether a truncstore is legal.
10841 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
10842 TargetLowering::TypePromoteInteger) {
10847 } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
10848 TargetLowering::TypePromoteInteger) {
1084310849 EVT LegalizedStoredValueTy =
1084410850 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
10845 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
10846 LastLegalType = i+1;
10851 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
10852 allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
10853 FirstStoreAlign)) {
10854 LastLegalType = i + 1;
10855 }
1084710856 }
1084810857
1084910858 // Find a legal type for the vector store.
1085010859 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
10851 if (TLI.isTypeLegal(Ty))
10860 if (TLI.isTypeLegal(Ty) &&
10861 allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
1085210862 LastLegalVectorType = i + 1;
10863 }
1085310864 }
1085410865
1085510866 // We only use vectors if the constant is known to be zero and the
1088510896
1088610897 // Find a legal type for the vector store.
1088710898 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
10888 if (TLI.isTypeLegal(Ty))
10899 if (TLI.isTypeLegal(Ty) &&
10900 allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
1088910901 NumElem = i + 1;
1089010902 }
1089110903
1091010922
1091110923 // Loads must only have one use.
1091210924 if (!Ld->hasNUsesOfValue(1, 0))
10913 break;
10914
10915 // Check that the alignment is the same as the stores.
10916 if (Ld->getAlignment() != St->getAlignment())
1091710925 break;
1091810926
1091910927 // The memory operands must not be volatile.
1095310961 St->getAlignment() >= RequiredAlignment)
1095410962 return false;
1095510963
10964 LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode);
10965 unsigned FirstLoadAS = FirstLoad->getAddressSpace();
10966 unsigned FirstLoadAlign = FirstLoad->getAlignment();
10967
1095610968 // Scan the memory operations on the chain and find the first non-consecutive
1095710969 // load memory address. These variables hold the index in the store node
1095810970 // array.
1096110973 unsigned LastLegalVectorType = 0;
1096210974 unsigned LastLegalIntegerType = 0;
1096310975 StartAddress = LoadNodes[0].OffsetFromBase;
10964 SDValue FirstChain = LoadNodes[0].MemNode->getChain();
10976 SDValue FirstChain = FirstLoad->getChain();
1096510977 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
1096610978 // All loads much share the same chain.
1096710979 if (LoadNodes[i].MemNode->getChain() != FirstChain)
1097410986
1097510987 // Find a legal type for the vector store.
1097610988 EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
10977 if (TLI.isTypeLegal(StoreTy))
10989 if (TLI.isTypeLegal(StoreTy) &&
10990 allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
10991 allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
1097810992 LastLegalVectorType = i + 1;
10993 }
1097910994
1098010995 // Find a legal type for the integer store.
1098110996 unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
1098210997 StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
10983 if (TLI.isTypeLegal(StoreTy))
10998 if (TLI.isTypeLegal(StoreTy) &&
10999 allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
11000 allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
1098411001 LastLegalIntegerType = i + 1;
1098511002 // Or check whether a truncstore and extload is legal.
1098611003 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
1099011007 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
1099111008 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
1099211009 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
10993 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy))
11010 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11011 allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
11012 FirstStoreAlign) &&
11013 allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
11014 FirstLoadAlign))
1099411015 LastLegalIntegerType = i+1;
1099511016 }
1099611017 }
1103411055 SDLoc LoadDL(LoadNodes[0].MemNode);
1103511056 SDLoc StoreDL(StoreNodes[0].MemNode);
1103611057
11037 LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode);
11038 SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
11039 FirstLoad->getChain(),
11040 FirstLoad->getBasePtr(),
11041 FirstLoad->getPointerInfo(),
11042 false, false, false,
11043 FirstLoad->getAlignment());
11044
11045 SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad,
11046 FirstInChain->getBasePtr(),
11047 FirstInChain->getPointerInfo(), false, false,
11048 FirstInChain->getAlignment());
11058 SDValue NewLoad = DAG.getLoad(
11059 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
11060 FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
11061
11062 SDValue NewStore = DAG.getStore(
11063 LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
11064 FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
1104911065
1105011066 // Replace one of the loads with the new load.
1105111067 LoadSDNode *Ld = cast(LoadNodes[0].MemNode);
0 ; RUN: llc -march=ppc32 -mattr=+altivec < %s | FileCheck %s
1
2 ;; This test ensures that MergeConsecutiveStores does not attempt to
3 ;; merge stores or loads when doing so would result in unaligned
4 ;; memory operations (unless the target supports those, e.g. X86).
5
6 ;; This issue happen in other situations for other targets, but PPC
7 ;; with Altivec extensions was chosen for the test because it does not
8 ;; support unaligned access with AltiVec instructions. If the 4
9 ;; load/stores get merged to an v4i32 vector type severely bad code
10 ;; gets generated: it painstakingly copies the values to a temporary
11 ;; location on the stack, with vector ops, in order to then use
12 ;; integer ops to load from the temporary stack location and store to
13 ;; the final location. Yuck!
14
15 %struct.X = type { i32, i32, i32, i32 }
16
17 @fx = common global %struct.X zeroinitializer, align 4
18 @fy = common global %struct.X zeroinitializer, align 4
19
20 ;; In this test case, lvx and stvx instructions should NOT be
21 ;; generated, as the alignment is not sufficient for it to be
22 ;; worthwhile.
23
24 ;; CHECK-LABEL: f:
25 ;; CHECK: lwzu
26 ;; CHECK-NEXT: lwz
27 ;; CHECK-NEXT: lwz
28 ;; CHECK-NEXT: lwz
29 ;; CHECK-NEXT: stwu
30 ;; CHECK-NEXT: stw
31 ;; CHECK-NEXT: stw
32 ;; CHECK-NEXT: stw
33 ;; CHECK-NEXT: blr
34 define void @f() {
35 entry:
36 %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4
37 %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
38 %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
39 %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
40 store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4
41 store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
42 store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
43 store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
44 ret void
45 }
46
47 @gx = common global %struct.X zeroinitializer, align 16
48 @gy = common global %struct.X zeroinitializer, align 16
49
50 ;; In this test, lvx and stvx instructions SHOULD be generated, as
51 ;; the 16-byte alignment of the new load/store is acceptable.
52 ;; CHECK-LABEL: g:
53 ;; CHECK: lvx
54 ;; CHECK: stvx
55 ;; CHECK: blr
56 define void @g() {
57 entry:
58 %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16
59 %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
60 %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
61 %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
62 store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16
63 store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
64 store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
65 store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
66 ret void
67 }
290290 ret void
291291 }
292292
293 ;; On x86, even unaligned copies can be merged to vector ops.
293294 ; CHECK-LABEL: merge_loads_no_align:
294295 ; load:
295 ; CHECK: movl
296 ; CHECK: movl
297 ; CHECK: movl
298 ; CHECK: movl
296 ; CHECK: vmovups
299297 ; store:
300 ; CHECK: movl
301 ; CHECK: movl
302 ; CHECK: movl
303 ; CHECK: movl
298 ; CHECK: vmovups
304299 ; CHECK: ret
305300 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
306301 %a1 = icmp sgt i32 %count, 0