llvm.org GIT mirror llvm / 5ab47f4
[ARM] Assembler support for the ARMv8.2a dot product instructions Commit r310480 added the AArch64 ARMv8.2a dot product instructions; this adds the AArch32 instructions. Differential Revision: https://reviews.llvm.org/D36575 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310701 91177308-0d34-0410-b5e6-96231b3b80d8 Sjoerd Meijer 2 years ago
13 changed file(s) with 236 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
113113 def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
114114 "Enable support for CRC instructions">;
115115
116 def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true",
117 "Enable support for dot product instructions",
118 [FeatureNEON]>;
116119
117120 // Not to be confused with FeatureHasRetAddrStack (return address stack)
118121 def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
258258 AssemblerPredicate<"FeatureNEON", "NEON">;
259259 def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
260260 AssemblerPredicate<"FeatureCrypto", "crypto">;
261 def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
262 AssemblerPredicate<"FeatureDotProd", "dotprod">;
261263 def HasCRC : Predicate<"Subtarget->hasCRC()">,
262264 AssemblerPredicate<"FeatureCRC", "crc">;
263265 def HasRAS : Predicate<"Subtarget->hasRAS()">,
50365038 let Inst{15-12} = CRd;
50375039 let Inst{19-16} = CRn;
50385040 let Inst{23-20} = opc1;
5041
5042 let DecoderNamespace = "CoProc";
50395043 }
50405044
50415045 def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
50595063 let Inst{15-12} = CRd;
50605064 let Inst{19-16} = CRn;
50615065 let Inst{23-20} = opc1;
5066
5067 let DecoderNamespace = "CoProc";
50625068 }
50635069
50645070 class ACI
50745080 let Inst{31-28} = 0b1111;
50755081 let Inst{27-25} = 0b110;
50765082 }
5083
5084 let DecoderNamespace = "CoProc" in {
50775085 multiclass LdStCop pattern> {
50785086 def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
50795087 asm, "\t$cop, $CRd, $addr", pattern> {
52275235 defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
52285236 defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
52295237
5238 } // DecoderNamespace = "CoProc"
5239
52305240 //===----------------------------------------------------------------------===//
52315241 // Move between coprocessor and ARM core register.
52325242 //
52515261 let Inst{7-5} = opc2;
52525262 let Inst{3-0} = CRm;
52535263 let Inst{19-16} = CRn;
5264
5265 let DecoderNamespace = "CoProc";
52545266 }
52555267
52565268 def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
52955307 let Inst{7-5} = opc2;
52965308 let Inst{3-0} = CRm;
52975309 let Inst{19-16} = CRn;
5310
5311 let DecoderNamespace = "CoProc";
52985312 }
52995313
53005314 def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
46704670 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
46714671 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
46724672 Requires<[HasVFP4]>;
4673
4674 // ARMv8.2a dot product instructions.
4675 // We put them in the VFPV8 decoder namespace because the ARM and Thumb
4676 // encodings are the same and thus no further bit twiddling is necessary
4677 // in the disassembler.
4678 let Predicates = [HasDotProd], DecoderNamespace = "VFPV8" in {
4679
4680 def VUDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b1,
4681 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm),
4682 N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>;
4683 def VSDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b0,
4684 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm),
4685 N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>;
4686 def VUDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b1,
4687 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm),
4688 N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>;
4689 def VSDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b0,
4690 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm),
4691 N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>;
4692
4693 // Indexed dot product instructions:
4694 class DOTI :
4695 N3Vnp<0b11100, 0b10, 0b1101, Q, U,
4696 (outs Ty:$Vd), (ins Ty:$Vn, DPR:$Vm, VectorIndex32:$lane),
4697 N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
4698 bit lane;
4699 let Inst{5} = lane;
4700 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4701 }
4702
4703 def VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR>;
4704 def VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR>;
4705 def VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR>;
4706 def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>;
4707
4708 } // HasDotProd
46734709
46744710 // Vector Subtract Operations.
46754711
39633963 }
39643964 }
39653965
3966 let DecoderNamespace = "Thumb2CoProc" in {
39663967 defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
39673968 defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
39683969 defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
39723973 defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
39733974 defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
39743975 defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
3976 }
39753977
39763978
39773979 //===----------------------------------------------------------------------===//
41244126 let Inst{7-5} = opc2;
41254127 let Inst{3-0} = CRm;
41264128 let Inst{19-16} = CRn;
4129
4130 let DecoderNamespace = "Thumb2CoProc";
41274131 }
41284132
41294133 class t2MovRRCopro Op, string opc, bit direction, dag oops, dag iops,
41444148 let Inst{11-8} = cop;
41454149 let Inst{7-4} = opc1;
41464150 let Inst{3-0} = CRm;
4151
4152 let DecoderNamespace = "Thumb2CoProc";
41474153 }
41484154
41494155 /* from ARM core register to coprocessor */
42424248 let Inst{23-20} = opc1;
42434249
42444250 let Predicates = [IsThumb2, PreV8];
4251 let DecoderNamespace = "Thumb2CoProc";
42454252 }
42464253
42474254 def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
42674274 let Inst{23-20} = opc1;
42684275
42694276 let Predicates = [IsThumb2, PreV8];
4277 let DecoderNamespace = "Thumb2CoProc";
42704278 }
42714279
42724280
413413 def IIC_VTBX2 : InstrItinClass;
414414 def IIC_VTBX3 : InstrItinClass;
415415 def IIC_VTBX4 : InstrItinClass;
416 def IIC_VDOTPROD : InstrItinClass;
416417
417418 //===----------------------------------------------------------------------===//
418419 // Processor instruction itineraries.
155155 bool HasFPARMv8 = false;
156156 bool HasNEON = false;
157157
158 /// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
159 bool HasDotProd = false;
160
158161 /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
159162 /// specified. Use the method useNEONForSinglePrecisionFP() to
160163 /// determine if NEON should actually be used.
520523 bool hasFPARMv8() const { return HasFPARMv8; }
521524 bool hasNEON() const { return HasNEON; }
522525 bool hasCrypto() const { return HasCrypto; }
526 bool hasDotProd() const { return HasDotProd; }
523527 bool hasCRC() const { return HasCRC; }
524528 bool hasRAS() const { return HasRAS; }
525529 bool hasVirtualization() const { return HasVirtualization; }
53475347 Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
53485348 Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
53495349 Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
5350 Mnemonic == "bxns" || Mnemonic == "blxns")
5350 Mnemonic == "bxns" || Mnemonic == "blxns" ||
5351 Mnemonic == "vudot" || Mnemonic == "vsdot")
53515352 return Mnemonic;
53525353
53535354 // First, split out any predication code. Ignore mnemonics we know aren't
54535454 Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
54545455 Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
54555456 (FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
5456 Mnemonic == "vmovx" || Mnemonic == "vins") {
5457 Mnemonic == "vmovx" || Mnemonic == "vins" ||
5458 Mnemonic == "vudot" || Mnemonic == "vsdot") {
54575459 // These mnemonics are never predicable
54585460 CanAcceptPredicationCode = false;
54595461 } else if (!isThumb()) {
485485 }
486486 }
487487
488 Result =
489 decodeInstruction(DecoderTableCoProc32, MI, Insn, Address, this, STI);
490 if (Result != MCDisassembler::Fail) {
491 Size = 4;
492 return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
493 }
494
488495 Size = 4;
489496 return MCDisassembler::Fail;
490497 }
818825 Size = 4;
819826 return Result;
820827 }
828 }
829
830 Result =
831 decodeInstruction(DecoderTableThumb2CoProc32, MI, Insn32, Address, this, STI);
832 if (Result != MCDisassembler::Fail) {
833 Size = 4;
834 Check(Result, AddThumbPredicate(MI));
835 return Result;
821836 }
822837
823838 Size = 0;
0 // RUN: llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s | FileCheck %s --check-prefix=CHECK
1
2 // RUN: not llvm-mc -triple arm -mattr=-dotprod -show-encoding < %s 2> %t
3 // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
4 // RUN: not llvm-mc -triple arm -show-encoding < %s 2> %t
5 // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
6 // RUN: not llvm-mc -triple arm -mattr=+v8.1a -show-encoding < %s 2> %t
7 // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
8 // RUN: not llvm-mc -triple arm -mattr=+v8.2a -show-encoding < %s 2> %t
9 // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
10
11 vudot.u8 d0, d1, d2
12 vsdot.s8 d0, d1, d2
13 vudot.u8 q0, q1, q4
14 vsdot.s8 q0, q1, q4
15 vudot.u8 d0, d1, d2[0]
16 vsdot.s8 d0, d1, d2[1]
17 vudot.u8 q0, q1, d4[0]
18 vsdot.s8 q0, q1, d4[1]
19
20 // CHECK: vudot.u8 d0, d1, d2 @ encoding: [0x12,0x0d,0x21,0xfc]
21 // CHECK: vsdot.s8 d0, d1, d2 @ encoding: [0x02,0x0d,0x21,0xfc]
22 // CHECK: vudot.u8 q0, q1, q4 @ encoding: [0x58,0x0d,0x22,0xfc]
23 // CHECK: vsdot.s8 q0, q1, q4 @ encoding: [0x48,0x0d,0x22,0xfc]
24 // CHECK: vudot.u8 d0, d1, d2[0] @ encoding: [0x12,0x0d,0x21,0xfe]
25 // CHECK: vsdot.s8 d0, d1, d2[1] @ encoding: [0x22,0x0d,0x21,0xfe]
26 // CHECK: vudot.u8 q0, q1, d4[0] @ encoding: [0x54,0x0d,0x22,0xfe]
27 // CHECK: vsdot.s8 q0, q1, d4[1] @ encoding: [0x64,0x0d,0x22,0xfe]
28
29 // CHECK-NO-DOTPROD: error: instruction requires: dotprod
30 // CHECK-NO-DOTPROD: error: instruction requires: dotprod
31 // CHECK-NO-DOTPROD: error: instruction requires: dotprod
32 // CHECK-NO-DOTPROD: error: instruction requires: dotprod
33 // CHECK-NO-DOTPROD: error: instruction requires: dotprod
34 // CHECK-NO-DOTPROD: error: instruction requires: dotprod
35 // CHECK-NO-DOTPROD: error: instruction requires: dotprod
36 // CHECK-NO-DOTPROD: error: instruction requires: dotprod
0 // RUN: not llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s 2> %t
1 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
2 // RUN: not llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s 2> %t
3 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
4
5 vudot.u8 d0, d1, d2[2]
6 vsdot.s8 d0, d1, d2[2]
7 vudot.u8 q0, q1, d4[2]
8 vsdot.s8 q0, q1, d4[2]
9
10 // CHECK-ERROR: error: invalid operand for instruction
11 // CHECK-ERROR: error: invalid operand for instruction
12 // CHECK-ERROR: error: invalid operand for instruction
13 // CHECK-ERROR: error: invalid operand for instruction
0 // RUN: llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s | FileCheck %s --check-prefix=CHECK
1
2 // RUN: not llvm-mc -triple thumb -mattr=-dotprod -show-encoding < %s 2> %t
3 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
4 // RUN: not llvm-mc -triple thumb -show-encoding < %s 2> %t
5 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
6 // RUN: not llvm-mc -triple thumb -mattr=+v8.1a -show-encoding < %s 2> %t
7 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
8 // RUN: not llvm-mc -triple thumb -mattr=+v8.2a -show-encoding < %s 2> %t
9 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
10
11 vudot.u8 d0, d1, d2
12 vsdot.s8 d0, d1, d2
13 vudot.u8 q0, q1, q4
14 vsdot.s8 q0, q1, q4
15 vudot.u8 d0, d1, d2[0]
16 vsdot.s8 d0, d1, d2[1]
17 vudot.u8 q0, q1, d4[0]
18 vsdot.s8 q0, q1, d4[1]
19
20 //CHECK: vudot.u8 d0, d1, d2 @ encoding: [0x21,0xfc,0x12,0x0d]
21 //CHECK: vsdot.s8 d0, d1, d2 @ encoding: [0x21,0xfc,0x02,0x0d]
22 //CHECK: vudot.u8 q0, q1, q4 @ encoding: [0x22,0xfc,0x58,0x0d]
23 //CHECK: vsdot.s8 q0, q1, q4 @ encoding: [0x22,0xfc,0x48,0x0d]
24 //CHECK: vudot.u8 d0, d1, d2[0] @ encoding: [0x21,0xfe,0x12,0x0d]
25 //CHECK: vsdot.s8 d0, d1, d2[1] @ encoding: [0x21,0xfe,0x22,0x0d]
26 //CHECK: vudot.u8 q0, q1, d4[0] @ encoding: [0x22,0xfe,0x54,0x0d]
27 //CHECK: vsdot.s8 q0, q1, d4[1] @ encoding: [0x22,0xfe,0x64,0x0d]
28
29 //CHECK-ERROR: error: instruction requires: dotprod
30 //CHECK-ERROR: error: instruction requires: dotprod
31 //CHECK-ERROR: error: instruction requires: dotprod
32 //CHECK-ERROR: error: instruction requires: dotprod
33 //CHECK-ERROR: error: instruction requires: dotprod
34 //CHECK-ERROR: error: instruction requires: dotprod
35 //CHECK-ERROR: error: instruction requires: dotprod
36 //CHECK-ERROR: error: instruction requires: dotprod
37
0 # RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+dotprod --disassemble < %s | FileCheck %s
1 # RUN: llvm-mc -triple arm-none-linux-gnu -mattr=-dotprod --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
2
3 0x12,0x0d,0x21,0xfc
4 0x02,0x0d,0x21,0xfc
5 0x58,0x0d,0x22,0xfc
6 0x48,0x0d,0x22,0xfc
7 0x12,0x0d,0x21,0xfe
8 0x22,0x0d,0x21,0xfe
9 0x54,0x0d,0x22,0xfe
10 0x64,0x0d,0x22,0xfe
11
12 #CHECK: vudot.u8 d0, d1, d2
13 #CHECK: vsdot.s8 d0, d1, d2
14 #CHECK: vudot.u8 q0, q1, q4
15 #CHECK: vsdot.s8 q0, q1, q4
16 #CHECK: vudot.u8 d0, d1, d2[0]
17 #CHECK: vsdot.s8 d0, d1, d2[1]
18 #CHECK: vudot.u8 q0, q1, d4[0]
19 #CHECK: vsdot.s8 q0, q1, d4[1]
20
21 # without dot product enabled, the instructions get disassembled to these
22 # coprocessor instructions:
23
24 #CHECK-ERROR: stc2 p13, c0, [r1], #-72
25 #CHECK-ERROR: stc2 p13, c0, [r1], #-8
26 #CHECK-ERROR: stc2 p13, c0, [r2], #-352
27 #CHECK-ERROR: stc2 p13, c0, [r2], #-288
28 #CHECK-ERROR: mcr2 p13, #1, r0, c1, c2, #0
29 #CHECK-ERROR: cdp2 p13, #2, c0, c1, c2, #1
30 #CHECK-ERROR: mcr2 p13, #1, r0, c2, c4, #2
31 #CHECK-ERROR: cdp2 p13, #2, c0, c2, c4, #3
32
0 # RUN: llvm-mc -triple thumbv7a -mattr=+dotprod --disassemble < %s | FileCheck %s
1 # RUN: llvm-mc -triple thumbv7a -mattr=-dotprod --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
2
3 [0x21,0xfc,0x12,0x0d]
4 [0x21,0xfc,0x02,0x0d]
5 [0x22,0xfc,0x58,0x0d]
6 [0x22,0xfc,0x48,0x0d]
7 [0x21,0xfe,0x12,0x0d]
8 [0x21,0xfe,0x22,0x0d]
9 [0x22,0xfe,0x54,0x0d]
10 [0x22,0xfe,0x64,0x0d]
11
12 #CHECK: vudot.u8 d0, d1, d2
13 #CHECK: vsdot.s8 d0, d1, d2
14 #CHECK: vudot.u8 q0, q1, q4
15 #CHECK: vsdot.s8 q0, q1, q4
16 #CHECK: vudot.u8 d0, d1, d2[0]
17 #CHECK: vsdot.s8 d0, d1, d2[1]
18 #CHECK: vudot.u8 q0, q1, d4[0]
19 #CHECK: vsdot.s8 q0, q1, d4[1]
20
21 #CHECK-ERROR: stc2 p13, c0, [r1], #-72
22 #CHECK-ERROR: stc2 p13, c0, [r1], #-8
23 #CHECK-ERROR: stc2 p13, c0, [r2], #-352
24 #CHECK-ERROR: stc2 p13, c0, [r2], #-288
25 #CHECK-ERROR: mcr2 p13, #1, r0, c1, c2, #0
26 #CHECK-ERROR: cdp2 p13, #2, c0, c1, c2, #1
27 #CHECK-ERROR: mcr2 p13, #1, r0, c2, c4, #2
28 #CHECK-ERROR: cdp2 p13, #2, c0, c2, c4, #3