llvm.org GIT mirror llvm / ef867d4
[3.9.1] Merging r283070 - [X86][AVX] Ensure broadcast loads respect dependencies To allow broadcast loads of a non-zero'th vector element, lowerVectorShuffleAsBroadcast can replace a load with a new load with an adjusted address, but unfortunately we weren't ensuring that the new load respected the same dependencies. This patch adds a TokenFactor and updates all dependencies of the old load to reference the new load instead. Bug found during internal testing. Differential Revision: https://reviews.llvm.org/D25039 As discussed on PR30596 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@286251 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
2 changed file(s) with 24 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
86558655 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
86568656 DAG.getMachineFunction().getMachineMemOperand(
86578657 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
8658
8659 // Make sure the newly-created LOAD is in the same position as Ld in
8660 // terms of dependency. We create a TokenFactor for Ld and V,
8661 // and update uses of Ld's output chain to use the TokenFactor.
8662 if (Ld->hasAnyUseOfValue(1)) {
8663 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
8664 SDValue(Ld, 1), SDValue(V.getNode(), 1));
8665 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
8666 DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
8667 SDValue(V.getNode(), 1));
8668 }
86588669 } else if (!BroadcastFromReg) {
86598670 // We can't broadcast from a vector register.
86608671 return SDValue();
547547 }
548548
549549 ;
550 ; FIXME: When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
550 ; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
551551 ;
552552 define float @broadcast_lifetime() nounwind {
553553 ; X32-LABEL: broadcast_lifetime:
554554 ; X32: ## BB#0:
555555 ; X32-NEXT: pushl %esi
556 ; X32-NEXT: subl $40, %esp
556 ; X32-NEXT: subl $56, %esp
557557 ; X32-NEXT: leal {{[0-9]+}}(%esp), %esi
558558 ; X32-NEXT: movl %esi, (%esp)
559559 ; X32-NEXT: calll _gfunc
560 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
561 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) ## 16-byte Spill
560562 ; X32-NEXT: movl %esi, (%esp)
561563 ; X32-NEXT: calll _gfunc
562564 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
563 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
564 ; X32-NEXT: vsubss %xmm0, %xmm1, %xmm0
565 ; X32-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## 16-byte Folded Reload
565566 ; X32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
566567 ; X32-NEXT: flds {{[0-9]+}}(%esp)
567 ; X32-NEXT: addl $40, %esp
568 ; X32-NEXT: addl $56, %esp
568569 ; X32-NEXT: popl %esi
569570 ; X32-NEXT: retl
570571 ;
571572 ; X64-LABEL: broadcast_lifetime:
572573 ; X64: ## BB#0:
573 ; X64-NEXT: subq $24, %rsp
574 ; X64-NEXT: leaq (%rsp), %rdi
575 ; X64-NEXT: callq _gfunc
574 ; X64-NEXT: subq $40, %rsp
576575 ; X64-NEXT: leaq (%rsp), %rdi
577576 ; X64-NEXT: callq _gfunc
578577 ; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0
579 ; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm1
580 ; X64-NEXT: vsubss %xmm0, %xmm1, %xmm0
581 ; X64-NEXT: addq $24, %rsp
578 ; X64-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill
579 ; X64-NEXT: leaq (%rsp), %rdi
580 ; X64-NEXT: callq _gfunc
581 ; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0
582 ; X64-NEXT: vsubss {{[0-9]+}}(%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload
583 ; X64-NEXT: addq $40, %rsp
582584 ; X64-NEXT: retq
583585 %1 = alloca <4 x float>, align 16
584586 %2 = alloca <4 x float>, align 16