llvm.org GIT mirror llvm / 2a0c417
PR 18466: Fix ARM Pseudo Expansion When expanding neon pseudo stores, it may miss the implicit uses of sub regs, which may cause post RA scheduler reorder instructions that breakes anti dependency. For example: VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg will be expanded to VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg; An instruction that defines %D20 may be scheduled before the store by mistake. This patches adds implicit uses for such case. For the example above, it emits: VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199282 91177308-0d34-0410-b5e6-96231b3b80d8 Weiming Zhao 6 years ago
2 changed file(s) with 59 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
478478
479479 if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
480480 MIB->addRegisterKilled(SrcReg, TRI, true);
481 else if (!SrcIsUndef)
482 MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
481483 TransferImpOps(MI, MIB, MIB);
482484
483485 // Transfer memoperands.
603605 MIB.addOperand(MI.getOperand(OpIdx++));
604606 MIB.addOperand(MI.getOperand(OpIdx++));
605607
606 if (SrcIsKill) // Add an implicit kill for the super-reg.
607 MIB->addRegisterKilled(SrcReg, TRI, true);
608 // Add an implicit kill and use for the super-reg.
609 MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
608610 TransferImpOps(MI, MIB, MIB);
609611 MI.eraseFromParent();
610612 }
0 ; RUN: llc < %s -march=arm -mattr=+neon -print-before=post-RA-sched > %t 2>&1 && FileCheck < %t %s
1
2 define void @vst(i8* %m, [4 x i64] %v) {
3 entry:
4 ; CHECK: vst:
5 ; CHECK: VST1d64Q %R{{[0-9]+}}, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}
6
7 %v0 = extractvalue [4 x i64] %v, 0
8 %v1 = extractvalue [4 x i64] %v, 1
9 %v2 = extractvalue [4 x i64] %v, 2
10 %v3 = extractvalue [4 x i64] %v, 3
11
12 %t0 = bitcast i64 %v0 to <8 x i8>
13 %t1 = bitcast i64 %v1 to <8 x i8>
14 %t2 = bitcast i64 %v2 to <8 x i8>
15 %t3 = bitcast i64 %v3 to <8 x i8>
16
17 %s0 = bitcast <8 x i8> %t0 to <1 x i64>
18 %s1 = bitcast <8 x i8> %t1 to <1 x i64>
19 %s2 = bitcast <8 x i8> %t2 to <1 x i64>
20 %s3 = bitcast <8 x i8> %t3 to <1 x i64>
21
22 %tmp0 = bitcast <1 x i64> %s2 to i64
23 %tmp1 = bitcast <1 x i64> %s3 to i64
24
25 %n0 = insertelement <2 x i64> undef, i64 %tmp0, i32 0
26 %n1 = insertelement <2 x i64> %n0, i64 %tmp1, i32 1
27
28 call void @llvm.arm.neon.vst4.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8)
29
30 call void @bar(<2 x i64> %n1)
31
32 ret void
33 }
34
35 %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
36 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
37 ; CHECK: vtbx4:
38 ; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}
39 %tmp1 = load <8 x i8>* %A
40 %tmp2 = load %struct.__neon_int8x8x4_t* %B
41 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
42 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
43 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
44 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
45 %tmp7 = load <8 x i8>* %C
46 %tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
47 call void @bar2(%struct.__neon_int8x8x4_t %tmp2, <8 x i8> %tmp8)
48 ret <8 x i8> %tmp8
49 }
50
51 declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
52 declare <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
53 declare void @bar2(%struct.__neon_int8x8x4_t, <8 x i8>)
54 declare void @bar(<2 x i64> %arg)