llvm.org GIT mirror llvm / 4a88808
AMDGPU/SI: Update amd_kernel_code_t definition and add assembler support Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D10772 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240839 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 4 years ago
11 changed file(s) with 756 addition(s) and 206 deletion(s). Raw diff Collapse all Expand all
117117
118118 ISA version, *vendor*, and *arch* will all be stored in a single entry of the
119119 .note section.
120
121 .amd_kernel_code_t
122 ^^^^^^^^^^^^^^^^^^
123
124 This directive marks the beginning of a list of key / value pairs that are used
125 to specify the amd_kernel_code_t object that will be emitted by the assembler.
126 The list must be terminated by the *.end_amd_kernel_code_t* directive. For
127 any amd_kernel_code_t values that are unspecified a default value will be
128 used. The default value for all keys is 0, with the following exceptions:
129
130 - *kernel_code_version_major* defaults to 1.
131 - *machine_kind* defaults to 1.
132 - *machine_version_major*, *machine_version_minor*, and
133 *machine_version_stepping* are derived from the value of the -mcpu option
134 that is passed to the assembler.
135 - *kernel_code_entry_byte_offset* defaults to 256.
136 - *wavefront_size* defaults to 6.
137 - *kernarg_segment_alignment*, *group_segment_alignment*, and
138 *private_segment_alignment* default to 4. Note that alignments are specified
139 as a power of two, so a value of **n** means an alignment of 2^ **n**.
140
141 The *.amd_kernel_code_t* directive must be placed immediately after the
142 function label and before any instructions.
143
144 For a full list of amd_kernel_code_t keys, see the examples in
145 test/CodeGen/AMDGPU/hsa.s. For an explanation of the meanings of the different
146 keys, see the comments in lib/Target/AMDGPU/AmdKernelCodeT.h
147
148 Here is an example of a minimal amd_kernel_code_t specification:
149
150 .. code-block:: nasm
151
152 .hsa_code_object_version 1,0
153 .hsa_code_object_isa
154
155 .text
156
157 hello_world:
158
159 .amd_kernel_code_t
160 enable_sgpr_kernarg_segment_ptr = 1
161 is_ptr64 = 1
162 compute_pgm_rsrc1_vgprs = 0
163 compute_pgm_rsrc1_sgprs = 0
164 compute_pgm_rsrc2_user_sgpr = 2
165 kernarg_segment_byte_size = 8
166 wavefront_sgpr_count = 2
167 workitem_vgpr_count = 3
168 .end_amd_kernel_code_t
169
170 s_load_dwordx2 s[0:1], s[0:1] 0x0
171 v_mov_b32 v0, 3.14159
172 s_waitcnt lgkmcnt(0)
173 v_mov_b32 v1, s0
174 v_mov_b32 v2, s1
175 flat_store_dword v0, v[1:2]
176 s_endpgm
474474 }
475475
476476 void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
477 const SIProgramInfo &KernelInfo) const {
477 const SIProgramInfo &KernelInfo) const {
478478 const SIMachineFunctionInfo *MFI = MF.getInfo();
479479 const AMDGPUSubtarget &STM = MF.getSubtarget();
480480 amd_kernel_code_t header;
481481
482 memset(&header, 0, sizeof(header));
483
484 header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
485 header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
486
487 header.struct_byte_size = sizeof(amd_kernel_code_t);
488
489 header.target_chip = STM.getAmdKernelCodeChipID();
490
491 header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
482 AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
492483
493484 header.compute_pgm_resource_registers =
494485 KernelInfo.ComputePGMRSrc1 |
495486 (KernelInfo.ComputePGMRSrc2 << 32);
496
497 // Code Properties:
498 header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
499 AMD_CODE_PROPERTY_IS_PTR64;
500
501 if (KernelInfo.FlatUsed)
502 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
503
504 if (KernelInfo.ScratchBlocks)
505 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
506
507 header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
508 header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
509
510 // MFI->ABIArgOffset is the number of bytes for the kernel arguments
511 // plus 36. 36 is the number of bytes reserved at the begining of the
512 // input buffer to store work-group size information.
513 // FIXME: We should be adding the size of the implicit arguments
514 // to this value.
487 header.code_properties =
488 AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
489 AMD_CODE_PROPERTY_IS_PTR64;
490
515491 header.kernarg_segment_byte_size = MFI->ABIArgOffset;
516
517492 header.wavefront_sgpr_count = KernelInfo.NumSGPR;
518493 header.workitem_vgpr_count = KernelInfo.NumVGPR;
519494
520 // FIXME: What values do I put for these alignments
521 header.kernarg_segment_alignment = 0;
522 header.group_segment_alignment = 0;
523 header.private_segment_alignment = 0;
524
525 header.code_type = 1; // HSA_EXT_CODE_KERNEL
526
527 header.wavefront_size = STM.getWavefrontSize();
528
529 MCSectionELF *VersionSection =
530 OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
531 OutStreamer->SwitchSection(VersionSection);
532 OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
533 Twine(header.hsail_version_major) + "." +
534 Twine(header.hsail_version_minor) + ":" +
535 "AMD:" +
536 Twine(header.amd_code_version_major) + "." +
537 Twine(header.amd_code_version_minor) + ":" +
538 "GFX8.1:0").str());
539
540 OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
541
542 if (isVerbose()) {
543 OutStreamer->emitRawComment("amd_code_version_major = " +
544 Twine(header.amd_code_version_major), false);
545 OutStreamer->emitRawComment("amd_code_version_minor = " +
546 Twine(header.amd_code_version_minor), false);
547 OutStreamer->emitRawComment("struct_byte_size = " +
548 Twine(header.struct_byte_size), false);
549 OutStreamer->emitRawComment("target_chip = " +
550 Twine(header.target_chip), false);
551 OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
552 Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
553 false);
554 OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
555 Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
556 false);
557 OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
558 Twine((bool)(header.code_properties &
559 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
560 OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
561 Twine((bool)(header.code_properties &
562 AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
563 OutStreamer->emitRawComment("private_element_size = 2 ", false);
564 OutStreamer->emitRawComment("is_ptr64 = " +
565 Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
566 OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
567 Twine(header.workitem_private_segment_byte_size),
568 false);
569 OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
570 Twine(header.workgroup_group_segment_byte_size),
571 false);
572 OutStreamer->emitRawComment("gds_segment_byte_size = " +
573 Twine(header.gds_segment_byte_size), false);
574 OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
575 Twine(header.kernarg_segment_byte_size), false);
576 OutStreamer->emitRawComment("wavefront_sgpr_count = " +
577 Twine(header.wavefront_sgpr_count), false);
578 OutStreamer->emitRawComment("workitem_vgpr_count = " +
579 Twine(header.workitem_vgpr_count), false);
580 OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
581 OutStreamer->emitRawComment("wavefront_size = " +
582 Twine((int)header.wavefront_size), false);
583 OutStreamer->emitRawComment("optimization_level = " +
584 Twine(header.optimization_level), false);
585 OutStreamer->emitRawComment("hsail_profile = " +
586 Twine(header.hsail_profile), false);
587 OutStreamer->emitRawComment("hsail_machine_model = " +
588 Twine(header.hsail_machine_model), false);
589 OutStreamer->emitRawComment("hsail_version_major = " +
590 Twine(header.hsail_version_major), false);
591 OutStreamer->emitRawComment("hsail_version_minor = " +
592 Twine(header.hsail_version_minor), false);
593 }
594
595 OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
495
496 AMDGPUTargetStreamer *TS =
497 static_cast(OutStreamer->getTargetStreamer());
498 TS->EmitAMDKernelCodeT(header);
596499 }
597500
598501 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1111 #ifndef AMDKERNELCODET_H
1212 #define AMDKERNELCODET_H
1313
14 #include "llvm/MC/SubtargetFeature.h"
15
1416 #include
1517 #include
1618
19 #include "llvm/Support/Debug.h"
1720 //---------------------------------------------------------------------------//
1821 // AMD Kernel Code, and its dependencies //
1922 //---------------------------------------------------------------------------//
141144 /// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
142145 /// is generally DWORD.
143146 ///
144 /// Use values from the amd_element_byte_size_t enum.
147 /// uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
145148 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
146149 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
147150 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
170173 /// Indicate if code generated has support for debugging.
171174 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
172175 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
173 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
176 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
177
178 AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 15,
179 AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
180 AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT
174181 };
175182
176183 /// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
368375 /// Scratch Wave Offset must be added by the kernel code and moved to
369376 /// SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
370377 ///
371 /// The second SGPR is 32 bit byte size of a single work-items scratch
378 /// The second SGPR is 32 bit byte size of a single work-item's scratch
372379 /// memory usage. This is directly loaded from the dispatch packet Private
373380 /// Segment Byte Size and rounded up to a multiple of DWORD.
374381 ///
384391 ///
385392 /// Private Segment Size (enable_sgpr_private_segment_size):
386393 /// Number of User SGPR registers: 1. The 32 bit byte size of a single
387 /// work-items scratch memory allocation. This is the value from the dispatch
394 /// work-item's scratch memory allocation. This is the value from the dispatch
388395 /// packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
389396 ///
390397 /// \todo [Does CP need to round this to >4 byte alignment?]
432439 /// present
433440 ///
434441 /// Work-Group Info (enable_sgpr_workgroup_info):
435 /// Number of System SGPR registers: 1. {first_wave, 14b0000,
442 /// Number of System SGPR registers: 1. {first_wave, 14'b0000,
436443 /// ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
437444 ///
438445 /// Private Segment Wave Byte Offset
498505 /// Alternatively scalar loads can be used if the kernarg offset is uniform, as
499506 /// the kernarg segment is constant for the duration of the kernel execution.
500507 ///
508
501509 typedef struct amd_kernel_code_s {
502 /// The AMD major version of the Code Object. Must be the value
503 /// AMD_CODE_VERSION_MAJOR.
504 amd_code_version32_t amd_code_version_major;
505
506 /// The AMD minor version of the Code Object. Minor versions must be
507 /// backward compatible. Must be the value
508 /// AMD_CODE_VERSION_MINOR.
509 amd_code_version32_t amd_code_version_minor;
510
511 /// The byte size of this struct. Must be set to
512 /// sizeof(amd_kernel_code_t). Used for backward
513 /// compatibility.
514 uint32_t struct_byte_size;
515
516 /// The target chip instruction set for which code has been
517 /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
518 /// in sc/Interface/SCCommon.h.
519 uint32_t target_chip;
510 uint32_t amd_kernel_code_version_major;
511 uint32_t amd_kernel_code_version_minor;
512 uint16_t amd_machine_kind;
513 uint16_t amd_machine_version_major;
514 uint16_t amd_machine_version_minor;
515 uint16_t amd_machine_version_stepping;
520516
521517 /// Byte offset (possibly negative) from start of amd_kernel_code_t
522518 /// object to kernel's entry point instruction. The actual code for
534530 /// and size. The offset is from the start (possibly negative) of
535531 /// amd_kernel_code_t object. Set both to 0 if no prefetch
536532 /// information is available.
537 ///
538 /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
539 /// not make the size a uint64_t as prefetching more than 4GiB seems
540 /// excessive.
541533 int64_t kernel_code_prefetch_byte_offset;
542534 uint64_t kernel_code_prefetch_byte_size;
543535
552544
553545 /// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
554546 /// COMPUTE_PGM_RSRC2 registers.
555 amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
547 uint64_t compute_pgm_resource_registers;
556548
557549 /// Code properties. See amd_code_property_mask_t for a full list of
558550 /// properties.
559 amd_code_property32_t code_properties;
551 uint32_t code_properties;
560552
561553 /// The amount of memory required for the combined private, spill
562554 /// and arg segments for a work-item in bytes. If
628620 /// The maximum byte alignment of variables used by the kernel in
629621 /// the specified memory segment. Expressed as a power of two. Must
630622 /// be at least HSA_POWERTWO_16.
631 hsa_powertwo8_t kernarg_segment_alignment;
632 hsa_powertwo8_t group_segment_alignment;
633 hsa_powertwo8_t private_segment_alignment;
634
635 uint8_t reserved3;
636
637 /// Type of code object.
638 hsa_ext_code_kind32_t code_type;
639
640 /// Reserved for code properties if any are defined in the future.
641 /// There are currently no code properties so this field must be 0.
642 uint32_t reserved4;
623 uint8_t kernarg_segment_alignment;
624 uint8_t group_segment_alignment;
625 uint8_t private_segment_alignment;
643626
644627 /// Wavefront size expressed as a power of two. Must be a power of 2
645628 /// in range 1..64 inclusive. Used to support runtime query that
646629 /// obtains wavefront size, which may be used by application to
647630 /// allocated dynamic group memory and set the dispatch work-group
648631 /// size.
649 hsa_powertwo8_t wavefront_size;
650
651 /// The optimization level specified when the kernel was
652 /// finalized.
653 uint8_t optimization_level;
654
655 /// The HSAIL profile defines which features are used. This
656 /// information is from the HSAIL version directive. If this
657 /// amd_kernel_code_t is not generated from an HSAIL compilation
658 /// unit then must be 0.
659 hsa_ext_brig_profile8_t hsail_profile;
660
661 /// The HSAIL machine model gives the address sizes used by the
662 /// code. This information is from the HSAIL version directive. If
663 /// not generated from an HSAIL compilation unit then must still
664 /// indicate for what machine mode the code is generated.
665 hsa_ext_brig_machine_model8_t hsail_machine_model;
666
667 /// The HSAIL major version. This information is from the HSAIL
668 /// version directive. If this amd_kernel_code_t is not
669 /// generated from an HSAIL compilation unit then must be 0.
670 uint32_t hsail_version_major;
671
672 /// The HSAIL minor version. This information is from the HSAIL
673 /// version directive. If this amd_kernel_code_t is not
674 /// generated from an HSAIL compilation unit then must be 0.
675 uint32_t hsail_version_minor;
676
677 /// Reserved for HSAIL target options if any are defined in the
678 /// future. There are currently no target options so this field
679 /// must be 0.
680 uint16_t reserved5;
681
682 /// Reserved. Must be 0.
683 uint16_t reserved6;
684
685 /// The values should be the actually values used by the finalizer
686 /// in generating the code. This may be the union of values
687 /// specified as finalizer arguments and explicit HSAIL control
688 /// directives. If the finalizer chooses to ignore a control
689 /// directive, and not generate constrained code, then the control
690 /// directive should not be marked as enabled even though it was
691 /// present in the HSAIL or finalizer argument. The values are
692 /// intended to reflect the constraints that the code actually
693 /// requires to correctly execute, not the values that were
694 /// actually specified at finalize time.
695 hsa_ext_control_directives_t control_directive;
696
697 /// The code can immediately follow the amd_kernel_code_t, or can
698 /// come after subsequent amd_kernel_code_t structs when there are
699 /// multiple kernels in the compilation unit.
700
632 uint8_t wavefront_size;
633
634 int32_t call_convention;
635 uint8_t reserved3[12];
636 uint64_t runtime_loader_kernel_symbol;
637 uint64_t control_directives[16];
701638 } amd_kernel_code_t;
702639
703640 #endif // AMDKERNELCODET_H
99 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1010 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
1111 #include "Utils/AMDGPUBaseInfo.h"
12 #include "AMDKernelCodeT.h"
1213 #include "SIDefines.h"
1314 #include "llvm/ADT/APFloat.h"
1415 #include "llvm/ADT/SmallString.h"
319320 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
320321 bool ParseDirectiveHSACodeObjectVersion();
321322 bool ParseDirectiveHSACodeObjectISA();
323 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
324 bool ParseDirectiveAMDKernelCodeT();
322325
323326 public:
324327 AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
682685 return false;
683686 }
684687
688 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
689 amd_kernel_code_t &Header) {
690
691 if (getLexer().isNot(AsmToken::Equal))
692 return TokError("expected '='");
693 Lex();
694
695 if (getLexer().isNot(AsmToken::Integer))
696 return TokError("amd_kernel_code_t values must be integers");
697
698 uint64_t Value = getLexer().getTok().getIntVal();
699 Lex();
700
701 if (ID == "kernel_code_version_major")
702 Header.amd_kernel_code_version_major = Value;
703 else if (ID == "kernel_code_version_minor")
704 Header.amd_kernel_code_version_minor = Value;
705 else if (ID == "machine_kind")
706 Header.amd_machine_kind = Value;
707 else if (ID == "machine_version_major")
708 Header.amd_machine_version_major = Value;
709 else if (ID == "machine_version_minor")
710 Header.amd_machine_version_minor = Value;
711 else if (ID == "machine_version_stepping")
712 Header.amd_machine_version_stepping = Value;
713 else if (ID == "kernel_code_entry_byte_offset")
714 Header.kernel_code_entry_byte_offset = Value;
715 else if (ID == "kernel_code_prefetch_byte_size")
716 Header.kernel_code_prefetch_byte_size = Value;
717 else if (ID == "max_scratch_backing_memory_byte_size")
718 Header.max_scratch_backing_memory_byte_size = Value;
719 else if (ID == "compute_pgm_rsrc1_vgprs")
720 Header.compute_pgm_resource_registers |= S_00B848_VGPRS(Value);
721 else if (ID == "compute_pgm_rsrc1_sgprs")
722 Header.compute_pgm_resource_registers |= S_00B848_SGPRS(Value);
723 else if (ID == "compute_pgm_rsrc1_priority")
724 Header.compute_pgm_resource_registers |= S_00B848_PRIORITY(Value);
725 else if (ID == "compute_pgm_rsrc1_float_mode")
726 Header.compute_pgm_resource_registers |= S_00B848_FLOAT_MODE(Value);
727 else if (ID == "compute_pgm_rsrc1_priv")
728 Header.compute_pgm_resource_registers |= S_00B848_PRIV(Value);
729 else if (ID == "compute_pgm_rsrc1_dx10_clamp")
730 Header.compute_pgm_resource_registers |= S_00B848_DX10_CLAMP(Value);
731 else if (ID == "compute_pgm_rsrc1_debug_mode")
732 Header.compute_pgm_resource_registers |= S_00B848_DEBUG_MODE(Value);
733 else if (ID == "compute_pgm_rsrc1_ieee_mode")
734 Header.compute_pgm_resource_registers |= S_00B848_IEEE_MODE(Value);
735 else if (ID == "compute_pgm_rsrc2_scratch_en")
736 Header.compute_pgm_resource_registers |= (S_00B84C_SCRATCH_EN(Value) << 32);
737 else if (ID == "compute_pgm_rsrc2_user_sgpr")
738 Header.compute_pgm_resource_registers |= (S_00B84C_USER_SGPR(Value) << 32);
739 else if (ID == "compute_pgm_rsrc2_tgid_x_en")
740 Header.compute_pgm_resource_registers |= (S_00B84C_TGID_X_EN(Value) << 32);
741 else if (ID == "compute_pgm_rsrc2_tgid_y_en")
742 Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Y_EN(Value) << 32);
743 else if (ID == "compute_pgm_rsrc2_tgid_z_en")
744 Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Z_EN(Value) << 32);
745 else if (ID == "compute_pgm_rsrc2_tg_size_en")
746 Header.compute_pgm_resource_registers |= (S_00B84C_TG_SIZE_EN(Value) << 32);
747 else if (ID == "compute_pgm_rsrc2_tidig_comp_cnt")
748 Header.compute_pgm_resource_registers |=
749 (S_00B84C_TIDIG_COMP_CNT(Value) << 32);
750 else if (ID == "compute_pgm_rsrc2_excp_en_msb")
751 Header.compute_pgm_resource_registers |=
752 (S_00B84C_EXCP_EN_MSB(Value) << 32);
753 else if (ID == "compute_pgm_rsrc2_lds_size")
754 Header.compute_pgm_resource_registers |= (S_00B84C_LDS_SIZE(Value) << 32);
755 else if (ID == "compute_pgm_rsrc2_excp_en")
756 Header.compute_pgm_resource_registers |= (S_00B84C_EXCP_EN(Value) << 32);
757 else if (ID == "compute_pgm_resource_registers")
758 Header.compute_pgm_resource_registers = Value;
759 else if (ID == "enable_sgpr_private_segment_buffer")
760 Header.code_properties |=
761 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT);
762 else if (ID == "enable_sgpr_dispatch_ptr")
763 Header.code_properties |=
764 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT);
765 else if (ID == "enable_sgpr_queue_ptr")
766 Header.code_properties |=
767 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT);
768 else if (ID == "enable_sgpr_kernarg_segment_ptr")
769 Header.code_properties |=
770 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT);
771 else if (ID == "enable_sgpr_dispatch_id")
772 Header.code_properties |=
773 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT);
774 else if (ID == "enable_sgpr_flat_scratch_init")
775 Header.code_properties |=
776 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT);
777 else if (ID == "enable_sgpr_private_segment_size")
778 Header.code_properties |=
779 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT);
780 else if (ID == "enable_sgpr_grid_workgroup_count_x")
781 Header.code_properties |=
782 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT);
783 else if (ID == "enable_sgpr_grid_workgroup_count_y")
784 Header.code_properties |=
785 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT);
786 else if (ID == "enable_sgpr_grid_workgroup_count_z")
787 Header.code_properties |=
788 (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT);
789 else if (ID == "enable_ordered_append_gds")
790 Header.code_properties |=
791 (Value << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT);
792 else if (ID == "private_element_size")
793 Header.code_properties |=
794 (Value << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT);
795 else if (ID == "is_ptr64")
796 Header.code_properties |=
797 (Value << AMD_CODE_PROPERTY_IS_PTR64_SHIFT);
798 else if (ID == "is_dynamic_callstack")
799 Header.code_properties |=
800 (Value << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT);
801 else if (ID == "is_debug_enabled")
802 Header.code_properties |=
803 (Value << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT);
804 else if (ID == "is_xnack_enabled")
805 Header.code_properties |=
806 (Value << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT);
807 else if (ID == "workitem_private_segment_byte_size")
808 Header.workitem_private_segment_byte_size = Value;
809 else if (ID == "workgroup_group_segment_byte_size")
810 Header.workgroup_group_segment_byte_size = Value;
811 else if (ID == "gds_segment_byte_size")
812 Header.gds_segment_byte_size = Value;
813 else if (ID == "kernarg_segment_byte_size")
814 Header.kernarg_segment_byte_size = Value;
815 else if (ID == "workgroup_fbarrier_count")
816 Header.workgroup_fbarrier_count = Value;
817 else if (ID == "wavefront_sgpr_count")
818 Header.wavefront_sgpr_count = Value;
819 else if (ID == "workitem_vgpr_count")
820 Header.workitem_vgpr_count = Value;
821 else if (ID == "reserved_vgpr_first")
822 Header.reserved_vgpr_first = Value;
823 else if (ID == "reserved_vgpr_count")
824 Header.reserved_vgpr_count = Value;
825 else if (ID == "reserved_sgpr_first")
826 Header.reserved_sgpr_first = Value;
827 else if (ID == "reserved_sgpr_count")
828 Header.reserved_sgpr_count = Value;
829 else if (ID == "debug_wavefront_private_segment_offset_sgpr")
830 Header.debug_wavefront_private_segment_offset_sgpr = Value;
831 else if (ID == "debug_private_segment_buffer_sgpr")
832 Header.debug_private_segment_buffer_sgpr = Value;
833 else if (ID == "kernarg_segment_alignment")
834 Header.kernarg_segment_alignment = Value;
835 else if (ID == "group_segment_alignment")
836 Header.group_segment_alignment = Value;
837 else if (ID == "private_segment_alignment")
838 Header.private_segment_alignment = Value;
839 else if (ID == "wavefront_size")
840 Header.wavefront_size = Value;
841 else if (ID == "call_convention")
842 Header.call_convention = Value;
843 else if (ID == "runtime_loader_kernel_symbol")
844 Header.runtime_loader_kernel_symbol = Value;
845 else
846 return TokError("amd_kernel_code_t value not recognized.");
847
848 return false;
849 }
850
851 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
852
853 amd_kernel_code_t Header;
854 AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
855
856 while (true) {
857
858 if (getLexer().isNot(AsmToken::EndOfStatement))
859 return TokError("amd_kernel_code_t values must begin on a new line");
860
861 // Lex EndOfStatement. This is in a while loop, because lexing a comment
862 // will set the current token to EndOfStatement.
863 while(getLexer().is(AsmToken::EndOfStatement))
864 Lex();
865
866 if (getLexer().isNot(AsmToken::Identifier))
867 return TokError("expected value identifier or .end_amd_kernel_code_t");
868
869 StringRef ID = getLexer().getTok().getIdentifier();
870 Lex();
871
872 if (ID == ".end_amd_kernel_code_t")
873 break;
874
875 if (ParseAMDKernelCodeTValue(ID, Header))
876 return true;
877 }
878
879 getTargetStreamer().EmitAMDKernelCodeT(Header);
880
881 return false;
882 }
883
685884 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
686885 StringRef IDVal = DirectiveID.getString();
687886
690889
691890 if (IDVal == ".hsa_code_object_isa")
692891 return ParseDirectiveHSACodeObjectISA();
892
893 if (IDVal == ".amd_kernel_code_t")
894 return ParseDirectiveAMDKernelCodeT();
693895
694896 return true;
695897 }
1111 //===----------------------------------------------------------------------===//
1212
1313 #include "AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
1415 #include "llvm/ADT/Twine.h"
1516 #include "llvm/MC/MCContext.h"
1617 #include "llvm/MC/MCELFStreamer.h"
18 #include "llvm/MC/MCObjectFileInfo.h"
1719 #include "llvm/MC/MCSectionELF.h"
1820 #include "llvm/Support/ELF.h"
1921 #include "llvm/Support/FormattedStream.h"
4749 OS << "\t.hsa_code_object_isa " <<
4850 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
4951 ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
52
53 }
54
55 void
56 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
57 uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
58 bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
59 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
60 bool EnableSGPRDispatchPtr = (Header.code_properties &
61 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
62 bool EnableSGPRQueuePtr = (Header.code_properties &
63 AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
64 bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
65 AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
66 bool EnableSGPRDispatchID = (Header.code_properties &
67 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
68 bool EnableSGPRFlatScratchInit = (Header.code_properties &
69 AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
70 bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
71 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
72 bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
73 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
74 bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
75 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
76 bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
77 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
78 bool EnableOrderedAppendGDS = (Header.code_properties &
79 AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
80 uint32_t PrivateElementSize = (Header.code_properties &
81 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
82 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
83 bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
84 bool IsDynamicCallstack = (Header.code_properties &
85 AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
86 bool IsDebugEnabled = (Header.code_properties &
87 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
88 bool IsXNackEnabled = (Header.code_properties &
89 AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
90
91 OS << "\t.amd_kernel_code_t\n" <<
92 "\t\tkernel_code_version_major = " <<
93 Header.amd_kernel_code_version_major << '\n' <<
94 "\t\tkernel_code_version_minor = " <<
95 Header.amd_kernel_code_version_minor << '\n' <<
96 "\t\tmachine_kind = " <<
97 Header.amd_machine_kind << '\n' <<
98 "\t\tmachine_version_major = " <<
99 Header.amd_machine_version_major << '\n' <<
100 "\t\tmachine_version_minor = " <<
101 Header.amd_machine_version_minor << '\n' <<
102 "\t\tmachine_version_stepping = " <<
103 Header.amd_machine_version_stepping << '\n' <<
104 "\t\tkernel_code_entry_byte_offset = " <<
105 Header.kernel_code_entry_byte_offset << '\n' <<
106 "\t\tkernel_code_prefetch_byte_size = " <<
107 Header.kernel_code_prefetch_byte_size << '\n' <<
108 "\t\tmax_scratch_backing_memory_byte_size = " <<
109 Header.max_scratch_backing_memory_byte_size << '\n' <<
110 "\t\tcompute_pgm_rsrc1_vgprs = " <<
111 G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
112 "\t\tcompute_pgm_rsrc1_sgprs = " <<
113 G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
114 "\t\tcompute_pgm_rsrc1_priority = " <<
115 G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
116 "\t\tcompute_pgm_rsrc1_float_mode = " <<
117 G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
118 "\t\tcompute_pgm_rsrc1_priv = " <<
119 G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
120 "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
121 G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
122 "\t\tcompute_pgm_rsrc1_debug_mode = " <<
123 G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
124 "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
125 G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
126 "\t\tcompute_pgm_rsrc2_scratch_en = " <<
127 G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
128 "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
129 G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
130 "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
131 G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
132 "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
133 G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
134 "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
135 G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
136 "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
137 G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
138 "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
139 G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
140 "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
141 G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
142 "\t\tcompute_pgm_rsrc2_lds_size = " <<
143 G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
144 "\t\tcompute_pgm_rsrc2_excp_en = " <<
145 G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
146
147 "\t\tenable_sgpr_private_segment_buffer = " <<
148 EnableSGPRPrivateSegmentBuffer << '\n' <<
149 "\t\tenable_sgpr_dispatch_ptr = " <<
150 EnableSGPRDispatchPtr << '\n' <<
151 "\t\tenable_sgpr_queue_ptr = " <<
152 EnableSGPRQueuePtr << '\n' <<
153 "\t\tenable_sgpr_kernarg_segment_ptr = " <<
154 EnableSGPRKernargSegmentPtr << '\n' <<
155 "\t\tenable_sgpr_dispatch_id = " <<
156 EnableSGPRDispatchID << '\n' <<
157 "\t\tenable_sgpr_flat_scratch_init = " <<
158 EnableSGPRFlatScratchInit << '\n' <<
159 "\t\tenable_sgpr_private_segment_size = " <<
160 EnableSGPRPrivateSegmentSize << '\n' <<
161 "\t\tenable_sgpr_grid_workgroup_count_x = " <<
162 EnableSGPRGridWorkgroupCountX << '\n' <<
163 "\t\tenable_sgpr_grid_workgroup_count_y = " <<
164 EnableSGPRGridWorkgroupCountY << '\n' <<
165 "\t\tenable_sgpr_grid_workgroup_count_z = " <<
166 EnableSGPRGridWorkgroupCountZ << '\n' <<
167 "\t\tenable_ordered_append_gds = " <<
168 EnableOrderedAppendGDS << '\n' <<
169 "\t\tprivate_element_size = " <<
170 PrivateElementSize << '\n' <<
171 "\t\tis_ptr64 = " <<
172 IsPtr64 << '\n' <<
173 "\t\tis_dynamic_callstack = " <<
174 IsDynamicCallstack << '\n' <<
175 "\t\tis_debug_enabled = " <<
176 IsDebugEnabled << '\n' <<
177 "\t\tis_xnack_enabled = " <<
178 IsXNackEnabled << '\n' <<
179 "\t\tworkitem_private_segment_byte_size = " <<
180 Header.workitem_private_segment_byte_size << '\n' <<
181 "\t\tworkgroup_group_segment_byte_size = " <<
182 Header.workgroup_group_segment_byte_size << '\n' <<
183 "\t\tgds_segment_byte_size = " <<
184 Header.gds_segment_byte_size << '\n' <<
185 "\t\tkernarg_segment_byte_size = " <<
186 Header.kernarg_segment_byte_size << '\n' <<
187 "\t\tworkgroup_fbarrier_count = " <<
188 Header.workgroup_fbarrier_count << '\n' <<
189 "\t\twavefront_sgpr_count = " <<
190 Header.wavefront_sgpr_count << '\n' <<
191 "\t\tworkitem_vgpr_count = " <<
192 Header.workitem_vgpr_count << '\n' <<
193 "\t\treserved_vgpr_first = " <<
194 Header.reserved_vgpr_first << '\n' <<
195 "\t\treserved_vgpr_count = " <<
196 Header.reserved_vgpr_count << '\n' <<
197 "\t\treserved_sgpr_first = " <<
198 Header.reserved_sgpr_first << '\n' <<
199 "\t\treserved_sgpr_count = " <<
200 Header.reserved_sgpr_count << '\n' <<
201 "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
202 Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
203 "\t\tdebug_private_segment_buffer_sgpr = " <<
204 Header.debug_private_segment_buffer_sgpr << '\n' <<
205 "\t\tkernarg_segment_alignment = " <<
206 (uint32_t)Header.kernarg_segment_alignment << '\n' <<
207 "\t\tgroup_segment_alignment = " <<
208 (uint32_t)Header.group_segment_alignment << '\n' <<
209 "\t\tprivate_segment_alignment = " <<
210 (uint32_t)Header.private_segment_alignment << '\n' <<
211 "\t\twavefront_size = " <<
212 (uint32_t)Header.wavefront_size << '\n' <<
213 "\t\tcall_convention = " <<
214 Header.call_convention << '\n' <<
215 "\t\truntime_loader_kernel_symbol = " <<
216 Header.runtime_loader_kernel_symbol << '\n' <<
217 // TODO: control_directives
218 "\t.end_amd_kernel_code_t\n";
50219
51220 }
52221
115284 OS.EmitValueToAlignment(4);
116285 OS.PopSection();
117286 }
287
288 void
289 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
290
291 MCStreamer &OS = getStreamer();
292 OS.PushSection();
293 OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
294 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
295 OS.PopSection();
296 }
66 //
77 //===----------------------------------------------------------------------===//
88
9 #include "AMDKernelCodeT.h"
910 #include "llvm/MC/MCStreamer.h"
10
1111 #include "llvm/MC/MCSymbol.h"
1212 #include "llvm/Support/Debug.h"
1313 namespace llvm {
2424 uint32_t Stepping,
2525 StringRef VendorName,
2626 StringRef ArchName) = 0;
27
28 virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
2729 };
2830
2931 class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
3638 void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
3739 uint32_t Stepping, StringRef VendorName,
3840 StringRef ArchName) override;
41
42 void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
3943 };
4044
4145 class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
6468 void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
6569 uint32_t Stepping, StringRef VendorName,
6670 StringRef ArchName) override;
71
72 void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
73
6774 };
6875
6976 }
9999 #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
100100 #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
101101 #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
102
102103 #define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
103104 #define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0)
105 #define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1)
106 #define C_00B84C_SCRATCH_EN 0xFFFFFFFE
104107 #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
108 #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
109 #define C_00B84C_USER_SGPR 0xFFFFFFC1
105110 #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
111 #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
112 #define C_00B84C_TGID_X_EN 0xFFFFFF7F
106113 #define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8)
114 #define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1)
115 #define C_00B84C_TGID_Y_EN 0xFFFFFEFF
107116 #define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9)
117 #define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1)
118 #define C_00B84C_TGID_Z_EN 0xFFFFFDFF
108119 #define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10)
120 #define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1)
121 #define C_00B84C_TG_SIZE_EN 0xFFFFFBFF
109122 #define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11)
123 #define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03)
124 #define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF
125 /* CIK */
126 #define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13)
127 #define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03)
128 #define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF
129 /* */
130 #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
131 #define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF)
132 #define C_00B84C_LDS_SIZE 0xFF007FFF
133 #define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
134 #define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
135 #define C_00B84C_EXCP_EN
110136
111 #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
112137 #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
113138
114139
3232 return {0, 0, 0};
3333 }
3434
35 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
36 const FeatureBitset &Features) {
37
38 IsaVersion ISA = getIsaVersion(Features);
39
40 memset(&Header, 0, sizeof(Header));
41
42 Header.amd_kernel_code_version_major = 1;
43 Header.amd_kernel_code_version_minor = 0;
44 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
45 Header.amd_machine_version_major = ISA.Major;
46 Header.amd_machine_version_minor = ISA.Minor;
47 Header.amd_machine_version_stepping = ISA.Stepping;
48 Header.kernel_code_entry_byte_offset = sizeof(Header);
49 // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
50 Header.wavefront_size = 6;
51 // These alignment values are specified in powers of two, so alignment =
52 // 2^n. The minimum alignment is 2^4 = 16.
53 Header.kernarg_segment_alignment = 4;
54 Header.group_segment_alignment = 4;
55 Header.private_segment_alignment = 4;
56 }
57
3558 } // End namespace AMDGPU
3659 } // End namespace llvm
2424 };
2525
2626 IsaVersion getIsaVersion(const FeatureBitset &Features);
27 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
28 const FeatureBitset &Features);
2729
2830 } // end namespace AMDGPU
2931 } // end namespace llvm
1515 ; HSA: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
1616
1717 ; HSA: {{^}}simple:
18 ; HSA: .section .hsa.version
19 ; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
20 ; Test that the amd_kernel_code_t object is emitted
21 ; HSA: .asciz
18 ; HSA: .amd_kernel_code_t
19 ; HSA: .end_amd_kernel_code_t
2220 ; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0
2321 ; Make sure we are setting the ATC bit:
2422 ; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
1313 .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
1414 // ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
1515
16 .text
17 amd_kernel_code_t_test_all:
18 ; Test all amd_kernel_code_t members with non-default values.
19 .amd_kernel_code_t
20 kernel_code_version_major = 100
21 kernel_code_version_minor = 100
22 machine_kind = 0
23 machine_version_major = 5
24 machine_version_minor = 5
25 machine_version_stepping = 5
26 kernel_code_entry_byte_offset = 512
27 kernel_code_prefetch_byte_size = 1
28 max_scratch_backing_memory_byte_size = 1
29 compute_pgm_rsrc1_vgprs = 1
30 compute_pgm_rsrc1_sgprs = 1
31 compute_pgm_rsrc1_priority = 1
32 compute_pgm_rsrc1_float_mode = 1
33 compute_pgm_rsrc1_priv = 1
34 compute_pgm_rsrc1_dx10_clamp = 1
35 compute_pgm_rsrc1_debug_mode = 1
36 compute_pgm_rsrc1_ieee_mode = 1
37 compute_pgm_rsrc2_scratch_en = 1
38 compute_pgm_rsrc2_user_sgpr = 1
39 compute_pgm_rsrc2_tgid_x_en = 1
40 compute_pgm_rsrc2_tgid_y_en = 1
41 compute_pgm_rsrc2_tgid_z_en = 1
42 compute_pgm_rsrc2_tg_size_en = 1
43 compute_pgm_rsrc2_tidig_comp_cnt = 1
44 compute_pgm_rsrc2_excp_en_msb = 1
45 compute_pgm_rsrc2_lds_size = 1
46 compute_pgm_rsrc2_excp_en = 1
47 enable_sgpr_private_segment_buffer = 1
48 enable_sgpr_dispatch_ptr = 1
49 enable_sgpr_queue_ptr = 1
50 enable_sgpr_kernarg_segment_ptr = 1
51 enable_sgpr_dispatch_id = 1
52 enable_sgpr_flat_scratch_init = 1
53 enable_sgpr_private_segment_size = 1
54 enable_sgpr_grid_workgroup_count_x = 1
55 enable_sgpr_grid_workgroup_count_y = 1
56 enable_sgpr_grid_workgroup_count_z = 1
57 enable_ordered_append_gds = 1
58 private_element_size = 1
59 is_ptr64 = 1
60 is_dynamic_callstack = 1
61 is_debug_enabled = 1
62 is_xnack_enabled = 1
63 workitem_private_segment_byte_size = 1
64 workgroup_group_segment_byte_size = 1
65 gds_segment_byte_size = 1
66 kernarg_segment_byte_size = 1
67 workgroup_fbarrier_count = 1
68 wavefront_sgpr_count = 1
69 workitem_vgpr_count = 1
70 reserved_vgpr_first = 1
71 reserved_vgpr_count = 1
72 reserved_sgpr_first = 1
73 reserved_sgpr_count = 1
74 debug_wavefront_private_segment_offset_sgpr = 1
75 debug_private_segment_buffer_sgpr = 1
76 kernarg_segment_alignment = 5
77 group_segment_alignment = 5
78 private_segment_alignment = 5
79 wavefront_size = 5
80 call_convention = 1
81 runtime_loader_kernel_symbol = 1
82 .end_amd_kernel_code_t
83
84 // ASM-LABEL: {{^}}amd_kernel_code_t_test_all:
85 // ASM: .amd_kernel_code_t
86 // ASM: kernel_code_version_major = 100
87 // ASM: kernel_code_version_minor = 100
88 // ASM: machine_kind = 0
89 // ASM: machine_version_major = 5
90 // ASM: machine_version_minor = 5
91 // ASM: machine_version_stepping = 5
92 // ASM: kernel_code_entry_byte_offset = 512
93 // ASM: kernel_code_prefetch_byte_size = 1
94 // ASM: max_scratch_backing_memory_byte_size = 1
95 // ASM: compute_pgm_rsrc1_vgprs = 1
96 // ASM: compute_pgm_rsrc1_sgprs = 1
97 // ASM: compute_pgm_rsrc1_priority = 1
98 // ASM: compute_pgm_rsrc1_float_mode = 1
99 // ASM: compute_pgm_rsrc1_priv = 1
100 // ASM: compute_pgm_rsrc1_dx10_clamp = 1
101 // ASM: compute_pgm_rsrc1_debug_mode = 1
102 // ASM: compute_pgm_rsrc1_ieee_mode = 1
103 // ASM: compute_pgm_rsrc2_scratch_en = 1
104 // ASM: compute_pgm_rsrc2_user_sgpr = 1
105 // ASM: compute_pgm_rsrc2_tgid_x_en = 1
106 // ASM: compute_pgm_rsrc2_tgid_y_en = 1
107 // ASM: compute_pgm_rsrc2_tgid_z_en = 1
108 // ASM: compute_pgm_rsrc2_tg_size_en = 1
109 // ASM: compute_pgm_rsrc2_tidig_comp_cnt = 1
110 // ASM: compute_pgm_rsrc2_excp_en_msb = 1
111 // ASM: compute_pgm_rsrc2_lds_size = 1
112 // ASM: compute_pgm_rsrc2_excp_en = 1
113 // ASM: enable_sgpr_private_segment_buffer = 1
114 // ASM: enable_sgpr_dispatch_ptr = 1
115 // ASM: enable_sgpr_queue_ptr = 1
116 // ASM: enable_sgpr_kernarg_segment_ptr = 1
117 // ASM: enable_sgpr_dispatch_id = 1
118 // ASM: enable_sgpr_flat_scratch_init = 1
119 // ASM: enable_sgpr_private_segment_size = 1
120 // ASM: enable_sgpr_grid_workgroup_count_x = 1
121 // ASM: enable_sgpr_grid_workgroup_count_y = 1
122 // ASM: enable_sgpr_grid_workgroup_count_z = 1
123 // ASM: enable_ordered_append_gds = 1
124 // ASM: private_element_size = 1
125 // ASM: is_ptr64 = 1
126 // ASM: is_dynamic_callstack = 1
127 // ASM: is_debug_enabled = 1
128 // ASM: is_xnack_enabled = 1
129 // ASM: workitem_private_segment_byte_size = 1
130 // ASM: workgroup_group_segment_byte_size = 1
131 // ASM: gds_segment_byte_size = 1
132 // ASM: kernarg_segment_byte_size = 1
133 // ASM: workgroup_fbarrier_count = 1
134 // ASM: wavefront_sgpr_count = 1
135 // ASM: workitem_vgpr_count = 1
136 // ASM: reserved_vgpr_first = 1
137 // ASM: reserved_vgpr_count = 1
138 // ASM: reserved_sgpr_first = 1
139 // ASM: reserved_sgpr_count = 1
140 // ASM: debug_wavefront_private_segment_offset_sgpr = 1
141 // ASM: debug_private_segment_buffer_sgpr = 1
142 // ASM: kernarg_segment_alignment = 5
143 // ASM: group_segment_alignment = 5
144 // ASM: private_segment_alignment = 5
145 // ASM: wavefront_size = 5
146 // ASM: call_convention = 1
147 // ASM: runtime_loader_kernel_symbol = 1
148 // ASM: .end_amd_kernel_code_t
149
150 amd_kernel_code_t_minimal:
151 .amd_kernel_code_t
152 enable_sgpr_kernarg_segment_ptr = 1
153 is_ptr64 = 1
154 compute_pgm_rsrc1_vgprs = 1
155 compute_pgm_rsrc1_sgprs = 1
156 compute_pgm_rsrc2_user_sgpr = 2
157 kernarg_segment_byte_size = 16
158 wavefront_sgpr_count = 8
159 // wavefront_sgpr_count = 7
160 ; wavefront_sgpr_count = 7
161 // Make sure a blank line won't break anything:
162
163 // Make sure a line with whitespace won't break anything:
164
165 workitem_vgpr_count = 16
166 .end_amd_kernel_code_t
167
168 // ASM-LABEL: {{^}}amd_kernel_code_t_minimal:
169 // ASM: .amd_kernel_code_t
170 // ASM: kernel_code_version_major = 1
171 // ASM: kernel_code_version_minor = 0
172 // ASM: machine_kind = 1
173 // ASM: machine_version_major = 7
174 // ASM: machine_version_minor = 0
175 // ASM: machine_version_stepping = 0
176 // ASM: kernel_code_entry_byte_offset = 256
177 // ASM: kernel_code_prefetch_byte_size = 0
178 // ASM: max_scratch_backing_memory_byte_size = 0
179 // ASM: compute_pgm_rsrc1_vgprs = 1
180 // ASM: compute_pgm_rsrc1_sgprs = 1
181 // ASM: compute_pgm_rsrc1_priority = 0
182 // ASM: compute_pgm_rsrc1_float_mode = 0
183 // ASM: compute_pgm_rsrc1_priv = 0
184 // ASM: compute_pgm_rsrc1_dx10_clamp = 0
185 // ASM: compute_pgm_rsrc1_debug_mode = 0
186 // ASM: compute_pgm_rsrc1_ieee_mode = 0
187 // ASM: compute_pgm_rsrc2_scratch_en = 0
188 // ASM: compute_pgm_rsrc2_user_sgpr = 2
189 // ASM: compute_pgm_rsrc2_tgid_x_en = 0
190 // ASM: compute_pgm_rsrc2_tgid_y_en = 0
191 // ASM: compute_pgm_rsrc2_tgid_z_en = 0
192 // ASM: compute_pgm_rsrc2_tg_size_en = 0
193 // ASM: compute_pgm_rsrc2_tidig_comp_cnt = 0
194 // ASM: compute_pgm_rsrc2_excp_en_msb = 0
195 // ASM: compute_pgm_rsrc2_lds_size = 0
196 // ASM: compute_pgm_rsrc2_excp_en = 0
197 // ASM: enable_sgpr_private_segment_buffer = 0
198 // ASM: enable_sgpr_dispatch_ptr = 0
199 // ASM: enable_sgpr_queue_ptr = 0
200 // ASM: enable_sgpr_kernarg_segment_ptr = 1
201 // ASM: enable_sgpr_dispatch_id = 0
202 // ASM: enable_sgpr_flat_scratch_init = 0
203 // ASM: enable_sgpr_private_segment_size = 0
204 // ASM: enable_sgpr_grid_workgroup_count_x = 0
205 // ASM: enable_sgpr_grid_workgroup_count_y = 0
206 // ASM: enable_sgpr_grid_workgroup_count_z = 0
207 // ASM: enable_ordered_append_gds = 0
208 // ASM: private_element_size = 0
209 // ASM: is_ptr64 = 1
210 // ASM: is_dynamic_callstack = 0
211 // ASM: is_debug_enabled = 0
212 // ASM: is_xnack_enabled = 0
213 // ASM: workitem_private_segment_byte_size = 0
214 // ASM: workgroup_group_segment_byte_size = 0
215 // ASM: gds_segment_byte_size = 0
216 // ASM: kernarg_segment_byte_size = 16
217 // ASM: workgroup_fbarrier_count = 0
218 // ASM: wavefront_sgpr_count = 8
219 // ASM: workitem_vgpr_count = 16
220 // ASM: reserved_vgpr_first = 0
221 // ASM: reserved_vgpr_count = 0
222 // ASM: reserved_sgpr_first = 0
223 // ASM: reserved_sgpr_count = 0
224 // ASM: debug_wavefront_private_segment_offset_sgpr = 0
225 // ASM: debug_private_segment_buffer_sgpr = 0
226 // ASM: kernarg_segment_alignment = 4
227 // ASM: group_segment_alignment = 4
228 // ASM: private_segment_alignment = 4
229 // ASM: wavefront_size = 6
230 // ASM: call_convention = 0
231 // ASM: runtime_loader_kernel_symbol = 0
232 // ASM: .end_amd_kernel_code_t