llvm.org GIT mirror llvm / e596ed0
[ARM] v8.3-a complex number support New instructions are added to AArch32 and AArch64 to aid floating-point multiplication and addition of complex numbers, where the complex numbers are packed in a vector register as a pair of elements. The Imaginary part of the number is placed in the more significant element, and the Real part of the number is placed in the less significant element. This patch adds assembler for the ARM target. Differential Revision: https://reviews.llvm.org/D36789 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314511 91177308-0d34-0410-b5e6-96231b3b80d8 Sam Parker 2 years ago
10 changed file(s) with 610 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
6868 def NVMulSLFrm : Format<40>;
6969 def NVTBLFrm : Format<41>;
7070 def DPSoRegImmFrm : Format<42>;
71 def N3RegCplxFrm : Format<43>;
7172
7273 // Misc flags.
7374
25122513 class NEONDataTypeAsmPseudoInst :
25132514 AsmPseudoInst, Requires<[HasNEON]>;
25142515
2516 // Extension of NEON 3-vector data processing instructions in coprocessor 8
2517 // encoding space, introduced in ARMv8.3-A.
2518 class N3VCP8 op24_23, bits<2> op21_20, bit op6, bit op4,
2519 dag oops, dag iops, InstrItinClass itin,
2520 string opc, string dt, string asm, string cstr, list pattern>
2521 : NeonInp
2522 dt, asm, cstr, pattern> {
2523 bits<5> Vd;
2524 bits<5> Vn;
2525 bits<5> Vm;
2526
2527 let DecoderNamespace = "VFPV8";
2528 // These have the same encodings in ARM and Thumb2
2529 let PostEncoderMethod = "";
2530
2531 let Inst{31-25} = 0b1111110;
2532 let Inst{24-23} = op24_23;
2533 let Inst{22} = Vd{4};
2534 let Inst{21-20} = op21_20;
2535 let Inst{19-16} = Vn{3-0};
2536 let Inst{15-12} = Vd{3-0};
2537 let Inst{11-8} = 0b1000;
2538 let Inst{7} = Vn{4};
2539 let Inst{6} = op6;
2540 let Inst{5} = Vm{4};
2541 let Inst{4} = op4;
2542 let Inst{3-0} = Vm{3-0};
2543 }
2544
2545 // Extension of NEON 2-vector-and-scalar data processing instructions in
2546 // coprocessor 8 encoding space, introduced in ARMv8.3-A.
2547 class N3VLaneCP8 op21_20, bit op6, bit op4,
2548 dag oops, dag iops, InstrItinClass itin,
2549 string opc, string dt, string asm, string cstr, list pattern>
2550 : NeonInp
2551 dt, asm, cstr, pattern> {
2552 bits<5> Vd;
2553 bits<5> Vn;
2554 bits<5> Vm;
2555
2556 let DecoderNamespace = "VFPV8";
2557 // These have the same encodings in ARM and Thumb2
2558 let PostEncoderMethod = "";
2559
2560 let Inst{31-24} = 0b11111110;
2561 let Inst{23} = op23;
2562 let Inst{22} = Vd{4};
2563 let Inst{21-20} = op21_20;
2564 let Inst{19-16} = Vn{3-0};
2565 let Inst{15-12} = Vd{3-0};
2566 let Inst{11-8} = 0b1000;
2567 let Inst{7} = Vn{4};
2568 let Inst{6} = op6;
2569 // Bit 5 set by sub-classes
2570 let Inst{4} = op4;
2571 let Inst{3-0} = Vm{3-0};
2572 }
2573
2574 // Operand types for complex instructions
2575 class ComplexRotationOperand
2576 : AsmOperandClass {
2577 let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">";
2578 let DiagnosticType = "InvalidComplexRotation" # Type;
2579 let Name = "ComplexRotation" # Type;
2580 }
2581 def complexrotateop : Operand {
2582 let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
2583 let PrintMethod = "printComplexRotationOp<90, 0>";
2584 }
2585 def complexrotateopodd : Operand {
2586 let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
2587 let PrintMethod = "printComplexRotationOp<180, 90>";
2588 }
2589
25152590 // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
25162591 def : TokenAlias<".s8", ".i8">;
25172592 def : TokenAlias<".u8", ".i8">;
107107 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
108108 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
109109 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
110 def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
110111 def VectorIndex8 : Operand, ImmLeaf
111112 return ((uint64_t)Imm) < 8;
112113 }]> {
125126 return ((uint64_t)Imm) < 2;
126127 }]> {
127128 let ParserMatchClass = VectorIndex32Operand;
129 let PrintMethod = "printVectorIndex";
130 let MIOperandInfo = (ops i32imm);
131 }
132 def VectorIndex64 : Operand, ImmLeaf
133 return ((uint64_t)Imm) < 1;
134 }]> {
135 let ParserMatchClass = VectorIndex64Operand;
128136 let PrintMethod = "printVectorIndex";
129137 let MIOperandInfo = (ops i32imm);
130138 }
47224730 def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>;
47234731
47244732 } // HasDotProd
4733
4734 // ARMv8.3 complex operations
4735 class BaseN3VCP8ComplexTied
4736 InstrItinClass itin, dag oops, dag iops,
4737 string opc, string dt, list pattern>
4738 : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
4739 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
4740 bits<2> rot;
4741 let Inst{24-23} = rot;
4742 }
4743
4744 class BaseN3VCP8ComplexOdd
4745 InstrItinClass itin, dag oops, dag iops, string opc,
4746 string dt, list pattern>
4747 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
4748 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
4749 bits<1> rot;
4750 let Inst{24} = rot;
4751 }
4752
4753 class BaseN3VCP8ComplexTiedLane32
4754 dag oops, dag iops, string opc, string dt,
4755 list pattern>
4756 : N3VLaneCP8
4757 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4758 bits<2> rot;
4759 bit lane;
4760
4761 let Inst{21-20} = rot;
4762 let Inst{5} = lane;
4763 }
4764
4765 class BaseN3VCP8ComplexTiedLane64
4766 dag oops, dag iops, string opc, string dt,
4767 list pattern>
4768 : N3VLaneCP8
4769 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4770 bits<2> rot;
4771 bit lane;
4772
4773 let Inst{21-20} = rot;
4774 let Inst{5} = Vm{4};
4775 // This is needed because the lane operand does not have any bits in the
4776 // encoding (it only has one possible value), so we need to manually set it
4777 // to it's default value.
4778 let DecoderMethod = "DecodeNEONComplexLane64Instruction";
4779 }
4780
4781 multiclass N3VCP8ComplexTied
4782 string OpcodeStr, SDPatternOperator Op> {
4783 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4784 def v4f16 : BaseN3VCP8ComplexTied
4785 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4786 OpcodeStr, "f16", []>;
4787 def v8f16 : BaseN3VCP8ComplexTied
4788 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4789 OpcodeStr, "f16", []>;
4790 }
4791 let Predicates = [HasNEON,HasV8_3a] in {
4792 def v2f32 : BaseN3VCP8ComplexTied
4793 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4794 OpcodeStr, "f32", []>;
4795 def v4f32 : BaseN3VCP8ComplexTied
4796 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4797 OpcodeStr, "f32", []>;
4798 }
4799 }
4800
4801 multiclass N3VCP8ComplexOdd
4802 string OpcodeStr, SDPatternOperator Op> {
4803 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4804 def v4f16 : BaseN3VCP8ComplexOdd
4805 (outs DPR:$Vd),
4806 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4807 OpcodeStr, "f16", []>;
4808 def v8f16 : BaseN3VCP8ComplexOdd
4809 (outs QPR:$Vd),
4810 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4811 OpcodeStr, "f16", []>;
4812 }
4813 let Predicates = [HasNEON,HasV8_3a] in {
4814 def v2f32 : BaseN3VCP8ComplexOdd
4815 (outs DPR:$Vd),
4816 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4817 OpcodeStr, "f32", []>;
4818 def v4f32 : BaseN3VCP8ComplexOdd
4819 (outs QPR:$Vd),
4820 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4821 OpcodeStr, "f32", []>;
4822 }
4823 }
4824
4825 // These instructions index by pairs of lanes, so the VectorIndexes are twice
4826 // as wide as the data types.
4827 multiclass N3VCP8ComplexTiedLane
4828 SDPatternOperator Op> {
4829 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4830 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32
4831 (outs DPR:$Vd),
4832 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4833 VectorIndex32:$lane, complexrotateop:$rot),
4834 OpcodeStr, "f16", []>;
4835 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32
4836 (outs QPR:$Vd),
4837 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
4838 VectorIndex32:$lane, complexrotateop:$rot),
4839 OpcodeStr, "f16", []>;
4840 }
4841 let Predicates = [HasNEON,HasV8_3a] in {
4842 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64
4843 (outs DPR:$Vd),
4844 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4845 complexrotateop:$rot),
4846 OpcodeStr, "f32", []>;
4847 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64
4848 (outs QPR:$Vd),
4849 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4850 complexrotateop:$rot),
4851 OpcodeStr, "f32", []>;
4852 }
4853 }
4854
4855 defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
4856 defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
4857 defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
47254858
47264859 // Vector Subtract Operations.
47274860
17551755 if (Kind != k_VectorIndex) return false;
17561756 return VectorIndex.Val < 2;
17571757 }
1758 bool isVectorIndex64() const {
1759 if (Kind != k_VectorIndex) return false;
1760 return VectorIndex.Val < 1;
1761 }
17581762
17591763 bool isNEONi8splat() const {
17601764 if (!isImm()) return false;
18841888 return true;
18851889 }
18861890
1891 template
1892 bool isComplexRotation() const {
1893 if (!isImm()) return false;
1894
1895 const MCConstantExpr *CE = dyn_cast(getImm());
1896 if (!CE) return false;
1897 uint64_t Value = CE->getValue();
1898
1899 return (Value % Angle == Remainder && Value <= 270);
1900 }
1901
18871902 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
18881903 // Add as immediates when possible. Null MCExpr = 0.
18891904 if (!Expr)
26272642 Inst.addOperand(MCOperand::createImm(getVectorIndex()));
26282643 }
26292644
2645 void addVectorIndex64Operands(MCInst &Inst, unsigned N) const {
2646 assert(N == 1 && "Invalid number of operands!");
2647 Inst.addOperand(MCOperand::createImm(getVectorIndex()));
2648 }
2649
26302650 void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
26312651 assert(N == 1 && "Invalid number of operands!");
26322652 // The immediate encodes the type of constant as well as the value.
27372757 Imm |= (Value & 1) << i;
27382758 }
27392759 Inst.addOperand(MCOperand::createImm(Imm | 0x1e00));
2760 }
2761
2762 void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const {
2763 assert(N == 1 && "Invalid number of operands!");
2764 const MCConstantExpr *CE = dyn_cast(getImm());
2765 Inst.addOperand(MCOperand::createImm(CE->getValue() / 90));
2766 }
2767
2768 void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const {
2769 assert(N == 1 && "Invalid number of operands!");
2770 const MCConstantExpr *CE = dyn_cast(getImm());
2771 Inst.addOperand(MCOperand::createImm((CE->getValue() - 90) / 180));
27402772 }
27412773
27422774 void print(raw_ostream &OS) const override;
54315463 Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
54325464 Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
54335465 Mnemonic == "bxns" || Mnemonic == "blxns" ||
5434 Mnemonic == "vudot" || Mnemonic == "vsdot")
5466 Mnemonic == "vudot" || Mnemonic == "vsdot" ||
5467 Mnemonic == "vcmla" || Mnemonic == "vcadd")
54355468 return Mnemonic;
54365469
54375470 // First, split out any predication code. Ignore mnemonics we know aren't
55205553 Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
55215554 (FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
55225555 Mnemonic == "vmovx" || Mnemonic == "vins" ||
5523 Mnemonic == "vudot" || Mnemonic == "vsdot") {
5556 Mnemonic == "vudot" || Mnemonic == "vsdot" ||
5557 Mnemonic == "vcmla" || Mnemonic == "vcadd") {
55245558 // These mnemonics are never predicable
55255559 CanAcceptPredicationCode = false;
55265560 } else if (!isThumb()) {
91549188 return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted");
91559189 }
91569190 }
9191 case Match_InvalidComplexRotationEven:
9192 return Error(IDLoc, "complex rotation must be 0, 90, 180 or 270");
9193 case Match_InvalidComplexRotationOdd:
9194 return Error(IDLoc, "complex rotation must be 90 or 270");
91579195 }
91589196
91599197 llvm_unreachable("Implement any new match types added!");
321321 uint64_t Address, const void *Decoder);
322322 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
323323 uint64_t Address, const void *Decoder);
324 static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
325 unsigned Val,
326 uint64_t Address,
327 const void *Decoder);
324328
325329 static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
326330 uint64_t Address, const void *Decoder);
52145218 return S;
52155219 }
52165220
5221 static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
5222 unsigned Insn,
5223 uint64_t Address,
5224 const void *Decoder) {
5225 unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
5226 Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
5227 unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0);
5228 Vn |= (fieldFromInstruction(Insn, 7, 1) << 4);
5229 unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
5230 Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
5231 unsigned q = (fieldFromInstruction(Insn, 6, 1) << 0);
5232 unsigned rotate = (fieldFromInstruction(Insn, 20, 2) << 0);
5233
5234 DecodeStatus S = MCDisassembler::Success;
5235
5236 auto DestRegDecoder = q ? DecodeQPRRegisterClass : DecodeDPRRegisterClass;
5237
5238 if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder)))
5239 return MCDisassembler::Fail;
5240 if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder)))
5241 return MCDisassembler::Fail;
5242 if (!Check(S, DestRegDecoder(Inst, Vn, Address, Decoder)))
5243 return MCDisassembler::Fail;
5244 if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
5245 return MCDisassembler::Fail;
5246 // The lane index does not have any bits in the encoding, because it can only
5247 // be 0.
5248 Inst.addOperand(MCOperand::createImm(0));
5249 Inst.addOperand(MCOperand::createImm(rotate));
5250
5251 return S;
5252 }
5253
52175254 static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
52185255 uint64_t Address, const void *Decoder) {
52195256 DecodeStatus S = MCDisassembler::Success;
15341534 printRegName(O, MI->getOperand(OpNum).getReg() + 6);
15351535 O << "}";
15361536 }
1537
1538 template
1539 void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
1540 const MCSubtargetInfo &STI,
1541 raw_ostream &O) {
1542 unsigned Val = MI->getOperand(OpNo).getImm();
1543 O << "#" << (Val * Angle) + Remainder;
1544 }
1545
230230 const MCSubtargetInfo &STI, raw_ostream &O);
231231 void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
232232 const MCSubtargetInfo &STI, raw_ostream &O);
233 template
234 void printComplexRotationOp(const MCInst *MI, unsigned OpNum,
235 const MCSubtargetInfo &STI, raw_ostream &O);
233236 };
234237
235238 } // end namespace llvm
342342 NVExtFrm = 39 << FormShift,
343343 NVMulSLFrm = 40 << FormShift,
344344 NVTBLFrm = 41 << FormShift,
345 N3RegCplxFrm = 43 << FormShift,
345346
346347 //===------------------------------------------------------------------===//
347348 // Misc flags.
0 // RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,+neon,+fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=THUMB --check-prefix=FP16-THUMB
1 // RUN: FileCheck --check-prefix=STDERR <%t %s
2 // RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,+neon,+fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=ARM --check-prefix=FP16-ARM
3 // RUN: FileCheck --check-prefix=STDERR <%t %s
4
5 // RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,+neon,-fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=THUMB
6 // RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-FP16-STDERR <%t %s
7 // RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,+neon,-fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=ARM
8 // RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-FP16-STDERR <%t %s
9
10 // RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,-neon,+fullfp16 -show-encoding < %s 2>%t
11 // RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-NEON-STDERR <%t %s
12 // RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,-neon,+fullfp16 -show-encoding < %s 2>%t
13 // RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-NEON-STDERR <%t %s
14
15 // RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.2a,+neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=V82A
16 // RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=V82A
17
18 /* ==== VCMLA vector ==== */
19
20 // Valid types
21 vcmla.f16 d0, d1, d2, #0
22 // FP16-ARM: vcmla.f16 d0, d1, d2, #0 @ encoding: [0x02,0x08,0x21,0xfc]
23 // FP16-THUMB: vcmla.f16 d0, d1, d2, #0 @ encoding: [0x21,0xfc,0x02,0x08]
24 // NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
25 // V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
26 // NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
27 vcmla.f16 q0, q1, q2, #0
28 // FP16-ARM: vcmla.f16 q0, q1, q2, #0 @ encoding: [0x44,0x08,0x22,0xfc]
29 // FP16-THUMB: vcmla.f16 q0, q1, q2, #0 @ encoding: [0x22,0xfc,0x44,0x08]
30 // NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
31 // V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
32 // NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
33 vcmla.f32 d0, d1, d2, #0
34 // ARM: vcmla.f32 d0, d1, d2, #0 @ encoding: [0x02,0x08,0x31,0xfc]
35 // THUMB: vcmla.f32 d0, d1, d2, #0 @ encoding: [0x31,0xfc,0x02,0x08]
36 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
37 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
38 vcmla.f32 q0, q1, q2, #0
39 // ARM: vcmla.f32 q0, q1, q2, #0 @ encoding: [0x44,0x08,0x32,0xfc]
40 // THUMB: vcmla.f32 q0, q1, q2, #0 @ encoding: [0x32,0xfc,0x44,0x08]
41 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
42 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
43
44 // Valid rotations
45 vcmla.f32 d0, d1, d2, #90
46 // ARM: vcmla.f32 d0, d1, d2, #90 @ encoding: [0x02,0x08,0xb1,0xfc]
47 // THUMB: vcmla.f32 d0, d1, d2, #90 @ encoding: [0xb1,0xfc,0x02,0x08]
48 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
49 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
50 vcmla.f32 d0, d1, d2, #180
51 // ARM: vcmla.f32 d0, d1, d2, #180 @ encoding: [0x02,0x08,0x31,0xfd]
52 // THUMB: vcmla.f32 d0, d1, d2, #180 @ encoding: [0x31,0xfd,0x02,0x08]
53 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
54 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
55 vcmla.f32 d0, d1, d2, #270
56 // ARM: vcmla.f32 d0, d1, d2, #270 @ encoding: [0x02,0x08,0xb1,0xfd]
57 // THUMB: vcmla.f32 d0, d1, d2, #270 @ encoding: [0xb1,0xfd,0x02,0x08]
58 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
59 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
60
61 // Invalid rotations
62 vcmla.f32 d0, d1, d2, #-90
63 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
64 vcmla.f32 d0, d1, d2, #1
65 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
66 vcmla.f32 d0, d1, d2, #360
67 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
68
69 /* ==== VCADD vector ==== */
70
71 // Valid types
72 vcadd.f16 d0, d1, d2, #90
73 // FP16-ARM: vcadd.f16 d0, d1, d2, #90 @ encoding: [0x02,0x08,0x81,0xfc]
74 // FP16-THUMB: vcadd.f16 d0, d1, d2, #90 @ encoding: [0x81,0xfc,0x02,0x08]
75 // NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
76 // V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
77 // NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
78 vcadd.f16 q0, q1, q2, #90
79 // FP16-ARM: vcadd.f16 q0, q1, q2, #90 @ encoding: [0x44,0x08,0x82,0xfc]
80 // FP16-THUMB: vcadd.f16 q0, q1, q2, #90 @ encoding: [0x82,0xfc,0x44,0x08]
81 // NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
82 // V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
83 // NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
84 vcadd.f32 d0, d1, d2, #90
85 // ARM: vcadd.f32 d0, d1, d2, #90 @ encoding: [0x02,0x08,0x91,0xfc]
86 // THUMB: vcadd.f32 d0, d1, d2, #90 @ encoding: [0x91,0xfc,0x02,0x08]
87 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
88 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
89 vcadd.f32 q0, q1, q2, #90
90 // ARM: vcadd.f32 q0, q1, q2, #90 @ encoding: [0x44,0x08,0x92,0xfc]
91 // THUMB: vcadd.f32 q0, q1, q2, #90 @ encoding: [0x92,0xfc,0x44,0x08]
92 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
93 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
94
95 // Valid rotations
96 vcadd.f32 d0, d1, d2, #270
97 // ARM: vcadd.f32 d0, d1, d2, #270 @ encoding: [0x02,0x08,0x91,0xfd]
98 // THUMB: vcadd.f32 d0, d1, d2, #270 @ encoding: [0x91,0xfd,0x02,0x08]
99 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
100 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
101
102 // Invalid rotations
103 vcadd.f32 d0, d1, d2, #0
104 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
105 vcadd.f32 d0, d1, d2, #180
106 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
107 vcadd.f32 d0, d1, d2, #-90
108 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
109 vcadd.f32 d0, d1, d2, #1
110 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
111 vcadd.f32 d0, d1, d2, #360
112 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
113
114
115 /* ==== VCMLA indexed ==== */
116
117 // Valid types
118 vcmla.f16 d0, d1, d2[0], #0
119 // FP16-ARM: vcmla.f16 d0, d1, d2[0], #0 @ encoding: [0x02,0x08,0x01,0xfe]
120 // FP16-THUMB: vcmla.f16 d0, d1, d2[0], #0 @ encoding: [0x01,0xfe,0x02,0x08]
121 // NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
122 // V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
123 // NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
124 vcmla.f16 q0, q1, d2[0], #0
125 // FP16-ARM: vcmla.f16 q0, q1, d2[0], #0 @ encoding: [0x42,0x08,0x02,0xfe]
126 // FP16-THUMB: vcmla.f16 q0, q1, d2[0], #0 @ encoding: [0x02,0xfe,0x42,0x08]
127 // NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
128 // V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
129 // NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
130 vcmla.f32 d0, d1, d2[0], #0
131 // ARM: vcmla.f32 d0, d1, d2[0], #0 @ encoding: [0x02,0x08,0x81,0xfe]
132 // THUMB: vcmla.f32 d0, d1, d2[0], #0 @ encoding: [0x81,0xfe,0x02,0x08]
133 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
134 // NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
135 vcmla.f32 q0, q1, d2[0], #0
136 // ARM: vcmla.f32 q0, q1, d2[0], #0 @ encoding: [0x42,0x08,0x82,0xfe]
137 // THUMB: vcmla.f32 q0, q1, d2[0], #0 @ encoding: [0x82,0xfe,0x42,0x08]
138 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
139 // NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
140
141 // Valid rotations
142 vcmla.f32 d0, d1, d2[0], #90
143 // ARM: vcmla.f32 d0, d1, d2[0], #90 @ encoding: [0x02,0x08,0x91,0xfe]
144 // THUMB: vcmla.f32 d0, d1, d2[0], #90 @ encoding: [0x91,0xfe,0x02,0x08]
145 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
146 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
147 vcmla.f32 d0, d1, d2[0], #180
148 // ARM: vcmla.f32 d0, d1, d2[0], #180 @ encoding: [0x02,0x08,0xa1,0xfe]
149 // THUMB: vcmla.f32 d0, d1, d2[0], #180 @ encoding: [0xa1,0xfe,0x02,0x08]
150 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
151 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
152 vcmla.f32 d0, d1, d2[0], #270
153 // ARM: vcmla.f32 d0, d1, d2[0], #270 @ encoding: [0x02,0x08,0xb1,0xfe]
154 // THUMB: vcmla.f32 d0, d1, d2[0], #270 @ encoding: [0xb1,0xfe,0x02,0x08]
155 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
156 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
157
158 // Invalid rotations
159 vcmla.f32 d0, d1, d2[0], #-90
160 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
161 vcmla.f32 d0, d1, d2[0], #1
162 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
163 vcmla.f32 d0, d1, d2[0], #360
164 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
165
166 // Valid indices
167 vcmla.f16 d0, d1, d2[1], #0
168 // FP16-ARM: vcmla.f16 d0, d1, d2[1], #0 @ encoding: [0x22,0x08,0x01,0xfe]
169 // FP16-THUMB: vcmla.f16 d0, d1, d2[1], #0 @ encoding: [0x01,0xfe,0x22,0x08]
170 // V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
171 // NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
172
173 // Invalid indices
174 // FIXME: These error messages are emitted because the index operand is not
175 // valid as a rotation, so they are a bit unintuitive. Can we do better?
176 vcmla.f16 d0, d1, d2[2], #0
177 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
178 vcmla.f32 d0, d1, d2[1], #0
179 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
0 # RUN: llvm-mc -triple armv8a -mattr=+v8.3a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
1 # RUN: not llvm-mc -triple armv8a -mattr=+v8.2a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16
2 # RUN: not llvm-mc -triple armv8a -mattr=+v8.3a,-neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16
3 # RUN: not llvm-mc -triple armv8a -mattr=+v8.3a,+neon,-fullfp16 -disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
4 # RUN: FileCheck %s < %t --check-prefix=MISSING-FP16
5
6 [0x02,0x08,0x21,0xfc]
7 # CHECK-FP16: vcmla.f16 d0, d1, d2, #0
8 # MISSING-FP16: warning: invalid instruction encoding
9 [0x44,0x08,0x22,0xfc]
10 # CHECK-FP16: vcmla.f16 q0, q1, q2, #0
11 # MISSING-FP16: warning: invalid instruction encoding
12 [0x02,0x08,0x31,0xfc]
13 # CHECK: vcmla.f32 d0, d1, d2, #0
14 # MISSING: warning: invalid instruction encoding
15 [0x44,0x08,0x32,0xfc]
16 # CHECK: vcmla.f32 q0, q1, q2, #0
17 # MISSING: warning: invalid instruction encoding
18 [0x02,0x08,0xb1,0xfc]
19 # CHECK: vcmla.f32 d0, d1, d2, #90
20 # MISSING: warning: invalid instruction encoding
21 [0x02,0x08,0x31,0xfd]
22 # CHECK: vcmla.f32 d0, d1, d2, #180
23 # MISSING: warning: invalid instruction encoding
24 [0x02,0x08,0xb1,0xfd]
25 # CHECK: vcmla.f32 d0, d1, d2, #270
26 # MISSING: warning: invalid instruction encoding
27 [0x02,0x08,0x81,0xfc]
28 # CHECK-FP16: vcadd.f16 d0, d1, d2, #90
29 # MISSING-FP16: warning: invalid instruction encoding
30 [0x44,0x08,0x82,0xfc]
31 # CHECK-FP16: vcadd.f16 q0, q1, q2, #90
32 # MISSING-FP16: warning: invalid instruction encoding
33 [0x02,0x08,0x91,0xfc]
34 # CHECK: vcadd.f32 d0, d1, d2, #90
35 # MISSING: warning: invalid instruction encoding
36 [0x44,0x08,0x92,0xfc]
37 # CHECK: vcadd.f32 q0, q1, q2, #90
38 # MISSING: warning: invalid instruction encoding
39 [0x02,0x08,0x91,0xfd]
40 # CHECK: vcadd.f32 d0, d1, d2, #270
41 # MISSING: warning: invalid instruction encoding
42 [0x02,0x08,0x01,0xfe]
43 # CHECK-FP16: vcmla.f16 d0, d1, d2[0], #0
44 # MISSING-FP16: warning: invalid instruction encoding
45 [0x42,0x08,0x02,0xfe]
46 # CHECK-FP16: vcmla.f16 q0, q1, d2[0], #0
47 # MISSING-FP16: warning: invalid instruction encoding
48 [0x02,0x08,0x81,0xfe]
49 # CHECK: vcmla.f32 d0, d1, d2[0], #0
50 # MISSING: warning: invalid instruction encoding
51 [0x42,0x08,0x82,0xfe]
52 # CHECK: vcmla.f32 q0, q1, d2[0], #0
53 # MISSING: warning: invalid instruction encoding
54 [0x02,0x08,0x91,0xfe]
55 # CHECK: vcmla.f32 d0, d1, d2[0], #90
56 # MISSING: warning: invalid instruction encoding
57 [0x02,0x08,0xa1,0xfe]
58 # CHECK: vcmla.f32 d0, d1, d2[0], #180
59 # MISSING: warning: invalid instruction encoding
60 [0x02,0x08,0xb1,0xfe]
61 # CHECK: vcmla.f32 d0, d1, d2[0], #270
62 # MISSING: warning: invalid instruction encoding
63 [0x22,0x08,0x01,0xfe]
64 # CHECK-FP16: vcmla.f16 d0, d1, d2[1], #0
65 # MISSING-FP16: warning: invalid instruction encoding
0 # RUN: llvm-mc -triple thumbv8a -mattr=+v8.3a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
1 # RUN: not llvm-mc -triple thumbv8a -mattr=+v8.2a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16
2 # RUN: not llvm-mc -triple thumbv8a -mattr=+v8.3a,-neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16
3 # RUN: not llvm-mc -triple thumbv8a -mattr=+v8.3a,+neon,-fullfp16 -disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
4 # RUN: FileCheck %s < %t --check-prefix=MISSING-FP16
5
6 [0x21,0xfc,0x02,0x08]
7 # CHECK-FP16: vcmla.f16 d0, d1, d2, #0
8 # MISSING-FP16: warning: invalid instruction encoding
9 [0x22,0xfc,0x44,0x08]
10 # CHECK-FP16: vcmla.f16 q0, q1, q2, #0
11 # MISSING-FP16: warning: invalid instruction encoding
12 [0x31,0xfc,0x02,0x08]
13 # CHECK: vcmla.f32 d0, d1, d2, #0
14 # MISSING: warning: invalid instruction encoding
15 [0x32,0xfc,0x44,0x08]
16 # CHECK: vcmla.f32 q0, q1, q2, #0
17 # MISSING: warning: invalid instruction encoding
18 [0xb1,0xfc,0x02,0x08]
19 # CHECK: vcmla.f32 d0, d1, d2, #90
20 # MISSING: warning: invalid instruction encoding
21 [0x31,0xfd,0x02,0x08]
22 # CHECK: vcmla.f32 d0, d1, d2, #180
23 # MISSING: warning: invalid instruction encoding
24 [0xb1,0xfd,0x02,0x08]
25 # CHECK: vcmla.f32 d0, d1, d2, #270
26 # MISSING: warning: invalid instruction encoding
27 [0x81,0xfc,0x02,0x08]
28 # CHECK-FP16: vcadd.f16 d0, d1, d2, #90
29 # MISSING-FP16: warning: invalid instruction encoding
30 [0x82,0xfc,0x44,0x08]
31 # CHECK-FP16: vcadd.f16 q0, q1, q2, #90
32 # MISSING-FP16: warning: invalid instruction encoding
33 [0x91,0xfc,0x02,0x08]
34 # CHECK: vcadd.f32 d0, d1, d2, #90
35 # MISSING: warning: invalid instruction encoding
36 [0x92,0xfc,0x44,0x08]
37 # CHECK: vcadd.f32 q0, q1, q2, #90
38 # MISSING: warning: invalid instruction encoding
39 [0x91,0xfd,0x02,0x08]
40 # CHECK: vcadd.f32 d0, d1, d2, #270
41 # MISSING: warning: invalid instruction encoding
42 [0x01,0xfe,0x02,0x08]
43 # CHECK-FP16: vcmla.f16 d0, d1, d2[0], #0
44 # MISSING-FP16: warning: invalid instruction encoding
45 [0x02,0xfe,0x42,0x08]
46 # CHECK-FP16: vcmla.f16 q0, q1, d2[0], #0
47 # MISSING-FP16: warning: invalid instruction encoding
48 [0x81,0xfe,0x02,0x08]
49 # CHECK: vcmla.f32 d0, d1, d2[0], #0
50 # MISSING: warning: invalid instruction encoding
51 [0x82,0xfe,0x42,0x08]
52 # CHECK: vcmla.f32 q0, q1, d2[0], #0
53 # MISSING: warning: invalid instruction encoding
54 [0x91,0xfe,0x02,0x08]
55 # CHECK: vcmla.f32 d0, d1, d2[0], #90
56 # MISSING: warning: invalid instruction encoding
57 [0xa1,0xfe,0x02,0x08]
58 # CHECK: vcmla.f32 d0, d1, d2[0], #180
59 # MISSING: warning: invalid instruction encoding
60 [0xb1,0xfe,0x02,0x08]
61 # CHECK: vcmla.f32 d0, d1, d2[0], #270
62 # MISSING: warning: invalid instruction encoding
63 [0x01,0xfe,0x22,0x08]
64 # CHECK-FP16: vcmla.f16 d0, d1, d2[1], #0
65 # MISSING-FP16: warning: invalid instruction encoding