llvm.org GIT mirror llvm / 99d3814
[ARM] MVE VPT Block Pass Initial commit of a new pass to create vector predication blocks, called VPT blocks, that are supported by the Armv8.1-M MVE architecture. This is a first naive implementation. I.e., for 2 consecutive predicated instructions I1 and I2, for example, it will generate 2 VPT blocks: VPST I1 VPST I2 A more optimal implementation would obviously put instructions in the same VPT block when they are predicated on the same condition and when it is allowed to do this: VPTT I1 I2 We will address this optimisation with follow up patches when the groundwork is in. Creating VPT Blocks is very similar to IT Blocks, which is the reason I added this to Thumb2ITBlocks.cpp. This allows reuse of the def use analysis that we need for the more optimal implementation. VPT blocks cannot be nested in IT blocks, and vice versa, and so these 2 passes cannot interact with each other. Instructions allowed in VPT blocks must be MVE instructions that are marked as VPT compatible. Differential Revision: https://reviews.llvm.org/D63247 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363370 91177308-0d34-0410-b5e6-96231b3b80d8 Sjoerd Meijer 3 months ago
7 changed file(s) with 227 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
4545 FunctionPass *createARMConstantIslandPass();
4646 FunctionPass *createMLxExpansionPass();
4747 FunctionPass *createThumb2ITBlockPass();
48 FunctionPass *createMVEVPTBlockPass();
4849 FunctionPass *createARMOptimizeBarriersPass();
4950 FunctionPass *createThumb2SizeReductionPass(
5051 std::function Ftor = nullptr);
6768 void initializeARMConstantIslandsPass(PassRegistry &);
6869 void initializeARMExpandPseudoPass(PassRegistry &);
6970 void initializeThumb2SizeReducePass(PassRegistry &);
71 void initializeMVEVPTBlockPass(PassRegistry &);
7072
7173 } // end namespace llvm
7274
9494 initializeARMExecutionDomainFixPass(Registry);
9595 initializeARMExpandPseudoPass(Registry);
9696 initializeThumb2SizeReducePass(Registry);
97 initializeMVEVPTBlockPass(Registry);
9798 }
9899
99100 static std::unique_ptr createTLOF(const Triple &TT) {
507508 return !MF.getSubtarget().isThumb1Only();
508509 }));
509510 }
511 addPass(createMVEVPTBlockPass());
510512 addPass(createThumb2ITBlockPass());
511513 }
512514
315315 FunctionPass *llvm::createThumb2ITBlockPass() {
316316 return new Thumb2ITBlockPass();
317317 }
318
319 #undef DEBUG_TYPE
320 #define DEBUG_TYPE "arm-mve-vpt"
321
322 namespace {
323 class MVEVPTBlock : public MachineFunctionPass {
324 public:
325 static char ID;
326 const Thumb2InstrInfo *TII;
327 const TargetRegisterInfo *TRI;
328
329 MVEVPTBlock() : MachineFunctionPass(ID) {}
330
331 bool runOnMachineFunction(MachineFunction &Fn) override;
332
333 MachineFunctionProperties getRequiredProperties() const override {
334 return MachineFunctionProperties().set(
335 MachineFunctionProperties::Property::NoVRegs);
336 }
337
338 StringRef getPassName() const override {
339 return "MVE VPT block insertion pass";
340 }
341
342 private:
343 bool InsertVPTBlocks(MachineBasicBlock &MBB);
344 };
345
346 char MVEVPTBlock::ID = 0;
347
348 } // end anonymous namespace
349
350 INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
351
352 enum VPTMaskValue {
353 T = 8, // 0b1000
354 TT = 4, // 0b0100
355 TE = 12, // 0b1100
356 TTT = 2, // 0b0010
357 TTE = 6, // 0b0110
358 TEE = 10, // 0b1010
359 TET = 14, // 0b1110
360 TTTT = 1, // 0b0001
361 TTTE = 3, // 0b0011
362 TTEE = 5, // 0b0101
363 TTET = 7, // 0b0111
364 TEEE = 9, // 0b1001
365 TEET = 11, // 0b1011
366 TETT = 13, // 0b1101
367 TETE = 15 // 0b1111
368 };
369
370 bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
371 bool Modified = false;
372 MachineBasicBlock::iterator MBIter = Block.begin();
373 MachineBasicBlock::iterator EndIter = Block.end();
374
375 while (MBIter != EndIter) {
376 MachineInstr *MI = &*MBIter;
377 unsigned PredReg = 0;
378 DebugLoc dl = MI->getDebugLoc();
379
380 ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
381
382 // The idea of the predicate is that None, Then and Else are for use when
383 // handling assembly language: they correspond to the three possible
384 // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
385 // from assembly source or disassembled from object code, you expect to see
386 // a mixture whenever there's a long VPT block. But in code generation, we
387 // hope we'll never generate an Else as input to this pass.
388
389 assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
390
391 if (Pred == ARMVCC::None) {
392 ++MBIter;
393 continue;
394 }
395
396 MachineInstrBuilder MIBuilder =
397 BuildMI(Block, MBIter, dl, TII->get(ARM::t2VPST));
398 MachineInstr *LastITMI = MI;
399 MachineBasicBlock::iterator InsertPos = MIBuilder.getInstr();
400
401 // The mask value for the VPST instruction is T = 0b1000 = 8
402 MIBuilder.addImm(VPTMaskValue::T);
403
404 finalizeBundle(Block, InsertPos.getInstrIterator(),
405 ++LastITMI->getIterator());
406 Modified = true;
407 LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump(););
408
409 ++MBIter;
410 }
411 return Modified;
412 }
413
414 bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
415 const ARMSubtarget &STI =
416 static_cast(Fn.getSubtarget());
417
418 if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
419 return false;
420
421 TII = static_cast(STI.getInstrInfo());
422 TRI = STI.getRegisterInfo();
423
424 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
425 << "********** Function: " << Fn.getName() << '\n');
426
427 bool Modified = false;
428 for (MachineBasicBlock &MBB : Fn)
429 Modified |= InsertVPTBlocks(MBB);
430
431 LLVM_DEBUG(dbgs() << "**************************************\n");
432 return Modified;
433 }
434
435 /// createMVEVPTBlock - Returns an instance of the MVE VPT block
436 /// insertion pass.
437 FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
684684 return ARMCC::AL;
685685 return getInstrPredicate(MI, PredReg);
686686 }
687
688 int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) {
689 const MCInstrDesc &MCID = MI.getDesc();
690
691 if (!MCID.OpInfo)
692 return -1;
693
694 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
695 if (ARM::isVpred(MCID.OpInfo[i].OperandType))
696 return i;
697
698 return -1;
699 }
700
701 ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
702 unsigned &PredReg) {
703 int PIdx = findFirstVPTPredOperandIdx(MI);
704 if (PIdx == -1) {
705 PredReg = 0;
706 return ARMVCC::None;
707 }
708
709 PredReg = MI.getOperand(PIdx+1).getReg();
710 return (ARMVCC::VPTCodes)MI.getOperand(PIdx).getImm();
711 }
6767 /// to llvm::getInstrPredicate except it returns AL for conditional branch
6868 /// instructions which are "predicated", but are not in IT blocks.
6969 ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
70
71 // getVPTInstrPredicate: VPT analogue of that, plus a helper function
72 // corresponding to MachineInstr::findFirstPredOperandIdx.
73 int findFirstVPTPredOperandIdx(const MachineInstr &MI);
74 ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI,
75 unsigned &PredReg);
7076 }
7177
7278 #endif
124124 ; CHECK-NEXT: Machine Natural Loop Construction
125125 ; CHECK-NEXT: Machine Block Frequency Analysis
126126 ; CHECK-NEXT: If Converter
127 ; CHECK-NEXT: MVE VPT block insertion pass
127128 ; CHECK-NEXT: Thumb IT blocks insertion pass
128129 ; CHECK-NEXT: MachineDominator Tree Construction
129130 ; CHECK-NEXT: Machine Natural Loop Construction
0 # RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s
1
2 --- |
3 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
4 target triple = "thumbv8.1m.main-arm-none-eabi"
5
6 define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 {
7 entry:
8 %conv.i = zext i16 %p to i32
9 %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2
10 ret <4 x float> %0
11 }
12
13 declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1
14
15 attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
16 attributes #1 = { nounwind readnone }
17 attributes #2 = { nounwind }
18
19
20 ...
21 ---
22 name: test_vminnmq_m_f32_v2
23 alignment: 2
24 exposesReturnsTwice: false
25 legalized: false
26 regBankSelected: false
27 selected: false
28 failedISel: false
29 tracksRegLiveness: true
30 hasWinCFI: false
31 registers: []
32 liveins:
33 - { reg: '$q0', virtual-reg: '' }
34 - { reg: '$q1', virtual-reg: '' }
35 - { reg: '$q2', virtual-reg: '' }
36 - { reg: '$r0', virtual-reg: '' }
37 frameInfo:
38 isFrameAddressTaken: false
39 isReturnAddressTaken: false
40 hasStackMap: false
41 hasPatchPoint: false
42 stackSize: 0
43 offsetAdjustment: 0
44 maxAlignment: 0
45 adjustsStack: false
46 hasCalls: false
47 stackProtector: ''
48 maxCallFrameSize: 0
49 cvBytesOfCalleeSavedRegisters: 0
50 hasOpaqueSPAdjustment: false
51 hasVAStart: false
52 hasMustTailInVarArgFunc: false
53 localFrameSize: 0
54 savePoint: ''
55 restorePoint: ''
56 fixedStack: []
57 stack: []
58 constants: []
59 body: |
60 bb.0.entry:
61 liveins: $q0, $q1, $q2, $r0
62
63 ; CHECK: VPST 8, implicit-def $p0
64 ; CHECK-NEXT: $q0 = nnan ninf nsz VMINNMf32 killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0
65
66 $vpr = VMSR_P0 killed $r0, 14, $noreg
67 renamable $q0 = nnan ninf nsz VMINNMf32 killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0
68 tBX_RET 14, $noreg, implicit $q0
69
70 ...