llvm.org GIT mirror llvm / 755155f
AMDGPU: Add AMDGPU HSA Kernel Descriptor - Update docs to match llvm coding style - Add missing FP16_OVFL bit for gfx9 - Fix the size of the kernel descriptor in the docs Differential Revision: https://reviews.llvm.org/D38902 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315822 91177308-0d34-0410-b5e6-96231b3b80d8 Konstantin Zhuravlyov 1 year, 10 months ago
2 changed file(s) with 252 addition(s) and 91 deletion(s). Raw diff Collapse all Expand all
14261426 ======= ======= =============================== ===========================
14271427 Bits Size Field Name Description
14281428 ======= ======= =============================== ===========================
1429 31:0 4 bytes group_segment_fixed_size The amount of fixed local
1429 31:0 4 bytes GroupSegmentFixedSize The amount of fixed local
14301430 address space memory
14311431 required for a work-group
14321432 in bytes. This does not
14351435 space memory that may be
14361436 added when the kernel is
14371437 dispatched.
1438 63:32 4 bytes private_segment_fixed_size The amount of fixed
1438 63:32 4 bytes PrivateSegmentFixedSize The amount of fixed
14391439 private address space
14401440 memory required for a
14411441 work-item in bytes. If
14431443 then additional space must
14441444 be added to this value for
14451445 the call stack.
1446 95:64 4 bytes max_flat_workgroup_size Maximum flat work-group
1446 95:64 4 bytes MaxFlatWorkgroupSize Maximum flat work-group
14471447 size supported by the
14481448 kernel in work-items.
1449 96 1 bit is_dynamic_call_stack Indicates if the generated
1449 96 1 bit IsDynamicCallStack Indicates if the generated
14501450 machine code is using a
14511451 dynamically sized call
14521452 stack.
1453 97 1 bit is_xnack_enabled Indicates if the generated
1453 97 1 bit IsXNACKEnabled Indicates if the generated
14541454 machine code is capable of
14551455 suppoting XNACK.
14561456 127:98 30 bits Reserved. Must be 0.
1457 191:128 8 bytes kernel_code_entry_byte_offset Byte offset (possibly
1457 191:128 8 bytes KernelCodeEntryByteOffset Byte offset (possibly
14581458 negative) from base
14591459 address of kernel
14601460 descriptor to kernel's
14631463 aligned.
14641464 383:192 24 Reserved. Must be 0.
14651465 bytes
1466 415:384 4 bytes compute_pgm_rsrc1 Compute Shader (CS)
1466 415:384 4 bytes ComputePgmRsrc1 Compute Shader (CS)
14671467 program settings used by
14681468 CP to set up
14691469 ``COMPUTE_PGM_RSRC1``
14701470 configuration
14711471 register. See
14721472 :ref:`amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table`.
1473 447:416 4 bytes compute_pgm_rsrc2 Compute Shader (CS)
1473 447:416 4 bytes ComputePgmRsrc2 Compute Shader (CS)
14741474 program settings used by
14751475 CP to set up
14761476 ``COMPUTE_PGM_RSRC2``
14771477 configuration
14781478 register. See
14791479 :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table`.
1480 448 1 bit enable_sgpr_private_segment Enable the setup of the
1481 _buffer SGPR user data registers
1480 448 1 bit EnableSGPRPrivateSegmentBuffer Enable the setup of the
1481 SGPR user data registers
14821482 (see
14831483 :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
14841484
14891489 ``compute_pgm_rsrc2.user_sgpr.user_sgpr_count``.
14901490 Any requests beyond 16
14911491 will be ignored.
1492 449 1 bit enable_sgpr_dispatch_ptr *see above*
1493 450 1 bit enable_sgpr_queue_ptr *see above*
1494 451 1 bit enable_sgpr_kernarg_segment_ptr *see above*
1495 452 1 bit enable_sgpr_dispatch_id *see above*
1496 453 1 bit enable_sgpr_flat_scratch_init *see above*
1497 454 1 bit enable_sgpr_private_segment *see above*
1498 _size
1499 455 1 bit enable_sgpr_grid_workgroup Not implemented in CP and
1500 _count_X should always be 0.
1501 456 1 bit enable_sgpr_grid_workgroup Not implemented in CP and
1502 _count_Y should always be 0.
1503 457 1 bit enable_sgpr_grid_workgroup Not implemented in CP and
1504 _count_Z should always be 0.
1492 449 1 bit EnableSGPRDispatchPtr *see above*
1493 450 1 bit EnableSGPRQueuePtr *see above*
1494 451 1 bit EnableSGPRKernargSegmentPtr *see above*
1495 452 1 bit EnableSGPRDispatchID *see above*
1496 453 1 bit EnableSGPRFlatScratchInit *see above*
1497 454 1 bit EnableSGPRPrivateSegmentSize *see above*
1498 455 1 bit EnableSGPRGridWorkgroupCountX Not implemented in CP and
1499 should always be 0.
1500 456 1 bit EnableSGPRGridWorkgroupCountY Not implemented in CP and
1501 should always be 0.
1502 457 1 bit EnableSGPRGridWorkgroupCountZ Not implemented in CP and
1503 should always be 0.
15051504 463:458 6 bits Reserved. Must be 0.
1506 511:464 4 Reserved. Must be 0.
1505 511:464 6 Reserved. Must be 0.
15071506 bytes
15081507 512 **Total size 64 bytes.**
15091508 ======= ===================================================================
15161515 ======= ======= =============================== ===========================================================================
15171516 Bits Size Field Name Description
15181517 ======= ======= =============================== ===========================================================================
1519 5:0 6 bits granulated_workitem_vgpr_count Number of vector registers
1518 5:0 6 bits GRANULATED_WORKITEM_VGPR_COUNT Number of vector registers
15201519 used by each work-item,
15211520 granularity is device
15221521 specific:
15271526
15281527 Used by CP to set up
15291528 ``COMPUTE_PGM_RSRC1.VGPRS``.
1530 9:6 4 bits granulated_wavefront_sgpr_count Number of scalar registers
1529 9:6 4 bits GRANULATED_WAVEFRONT_SGPR_COUNT Number of scalar registers
15311530 used by a wavefront,
15321531 granularity is device
15331532 specific:
15491548
15501549 Used by CP to set up
15511550 ``COMPUTE_PGM_RSRC1.SGPRS``.
1552 11:10 2 bits priority Must be 0.
1551 11:10 2 bits PRIORITY Must be 0.
15531552
15541553 Start executing wavefront
15551554 at the specified priority.
15571556 CP is responsible for
15581557 filling in
15591558 ``COMPUTE_PGM_RSRC1.PRIORITY``.
1560 13:12 2 bits float_mode_round_32 Wavefront starts execution
1559 13:12 2 bits FLOAT_ROUND_MODE_32 Wavefront starts execution
15611560 with specified rounding
15621561 mode for single (32
15631562 bit) floating point
15701569
15711570 Used by CP to set up
15721571 ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
1573 15:14 2 bits float_mode_round_16_64 Wavefront starts execution
1572 15:14 2 bits FLOAT_ROUND_MODE_16_64 Wavefront starts execution
15741573 with specified rounding
15751574 denorm mode for half/double (16
15761575 and 64 bit) floating point
15831582
15841583 Used by CP to set up
15851584 ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
1586 17:16 2 bits float_mode_denorm_32 Wavefront starts execution
1585 17:16 2 bits FLOAT_DENORM_MODE_32 Wavefront starts execution
15871586 with specified denorm mode
15881587 for single (32
15891588 bit) floating point
15961595
15971596 Used by CP to set up
15981597 ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
1599 19:18 2 bits float_mode_denorm_16_64 Wavefront starts execution
1598 19:18 2 bits FLOAT_DENORM_MODE_16_64 Wavefront starts execution
16001599 with specified denorm mode
16011600 for half/double (16
16021601 and 64 bit) floating point
16091608
16101609 Used by CP to set up
16111610 ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
1612 20 1 bit priv Must be 0.
1611 20 1 bit PRIV Must be 0.
16131612
16141613 Start executing wavefront
16151614 in privilege trap handler
16181617 CP is responsible for
16191618 filling in
16201619 ``COMPUTE_PGM_RSRC1.PRIV``.
1621 21 1 bit enable_dx10_clamp Wavefront starts execution
1620 21 1 bit ENABLE_DX10_CLAMP Wavefront starts execution
16221621 with DX10 clamp mode
16231622 enabled. Used by the vector
16241623 ALU to force DX-10 style
16291628
16301629 Used by CP to set up
16311630 ``COMPUTE_PGM_RSRC1.DX10_CLAMP``.
1632 22 1 bit debug_mode Must be 0.
1631 22 1 bit DEBUG_MODE Must be 0.
16331632
16341633 Start executing wavefront
16351634 in single step mode.
16371636 CP is responsible for
16381637 filling in
16391638 ``COMPUTE_PGM_RSRC1.DEBUG_MODE``.
1640 23 1 bit enable_ieee_mode Wavefront starts execution
1639 23 1 bit ENABLE_IEEE_MODE Wavefront starts execution
16411640 with IEEE mode
16421641 enabled. Floating point
16431642 opcodes that support
16521651
16531652 Used by CP to set up
16541653 ``COMPUTE_PGM_RSRC1.IEEE_MODE``.
1655 24 1 bit bulky Must be 0.
1654 24 1 bit BULKY Must be 0.
16561655
16571656 Only one work-group allowed
16581657 to execute on a compute
16611660 CP is responsible for
16621661 filling in
16631662 ``COMPUTE_PGM_RSRC1.BULKY``.
1664 25 1 bit cdbg_user Must be 0.
1663 25 1 bit CDBG_USER Must be 0.
16651664
16661665 Flag that can be used to
16671666 control debugging code.
16691668 CP is responsible for
16701669 filling in
16711670 ``COMPUTE_PGM_RSRC1.CDBG_USER``.
1672 31:26 6 bits Reserved. Must be 0.
1671 26 1 bit FP16_OVFL GFX6-8:
1672 Reserved. Must be 0.
1673 GFX9:
1674 Wavefront starts
1675 execution with specified
1676 fp16 overflow mode.
1677
1678 - If 0, then fp16
1679 overflow generates
1680 +/-INF values.
1681 - If 1, then fp16
1682 overflow that is the
1683 result of an +/-INF
1684 input value or divide
1685 by 0 generates a
1686 +/-INF, otherwise
1687 clamps computed
1688 overflow to +/-MAX_FP16
1689 as appropriate.
1690
1691 Used by CP to set up
1692 ``COMPUTE_PGM_RSRC1.FP16_OVFL``.
1693 31:27 5 bits Reserved. Must be 0.
16731694 32 **Total size 4 bytes**
16741695 ======= ===================================================================================================================
16751696
16811702 ======= ======= =============================== ===========================================================================
16821703 Bits Size Field Name Description
16831704 ======= ======= =============================== ===========================================================================
1684 0 1 bit enable_sgpr_private_segment Enable the setup of the
1685 _wave_offset SGPR wave scratch offset
1705 0 1 bit ENABLE_SGPR_PRIVATE_SEGMENT Enable the setup of the
1706 _WAVE_OFFSET SGPR wave scratch offset
16861707 system register (see
16871708 :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
16881709
16891710 Used by CP to set up
16901711 ``COMPUTE_PGM_RSRC2.SCRATCH_EN``.
1691 5:1 5 bits user_sgpr_count The total number of SGPR
1712 5:1 5 bits USER_SGPR_COUNT The total number of SGPR
16921713 user data registers
16931714 requested. This number must
16941715 match the number of user
16961717
16971718 Used by CP to set up
16981719 ``COMPUTE_PGM_RSRC2.USER_SGPR``.
1699 6 1 bit enable_trap_handler Set to 1 if code contains a
1720 6 1 bit ENABLE_TRAP_HANDLER Set to 1 if code contains a
17001721 TRAP instruction which
17011722 requires a trap handler to
17021723 be enabled.
17071728 installed a trap handler
17081729 regardless of the setting
17091730 of this field.
1710 7 1 bit enable_sgpr_workgroup_id_x Enable the setup of the
1731 7 1 bit ENABLE_SGPR_WORKGROUP_ID_X Enable the setup of the
17111732 system SGPR register for
17121733 the work-group id in the X
17131734 dimension (see
17151736
17161737 Used by CP to set up
17171738 ``COMPUTE_PGM_RSRC2.TGID_X_EN``.
1718 8 1 bit enable_sgpr_workgroup_id_y Enable the setup of the
1739 8 1 bit ENABLE_SGPR_WORKGROUP_ID_Y Enable the setup of the
17191740 system SGPR register for
17201741 the work-group id in the Y
17211742 dimension (see
17231744
17241745 Used by CP to set up
17251746 ``COMPUTE_PGM_RSRC2.TGID_Y_EN``.
1726 9 1 bit enable_sgpr_workgroup_id_z Enable the setup of the
1747 9 1 bit ENABLE_SGPR_WORKGROUP_ID_Z Enable the setup of the
17271748 system SGPR register for
17281749 the work-group id in the Z
17291750 dimension (see
17311752
17321753 Used by CP to set up
17331754 ``COMPUTE_PGM_RSRC2.TGID_Z_EN``.
1734 10 1 bit enable_sgpr_workgroup_info Enable the setup of the
1755 10 1 bit ENABLE_SGPR_WORKGROUP_INFO Enable the setup of the
17351756 system SGPR register for
17361757 work-group information (see
17371758 :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
17381759
17391760 Used by CP to set up
17401761 ``COMPUTE_PGM_RSRC2.TGID_SIZE_EN``.
1741 12:11 2 bits enable_vgpr_workitem_id Enable the setup of the
1762 12:11 2 bits ENABLE_VGPR_WORKITEM_ID Enable the setup of the
17421763 VGPR system registers used
17431764 for the work-item ID.
17441765 :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table`
17461767
17471768 Used by CP to set up
17481769 ``COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT``.
1749 13 1 bit enable_exception_address_watch Must be 0.
1770 13 1 bit ENABLE_EXCEPTION_ADDRESS_WATCH Must be 0.
17501771
17511772 Wavefront starts execution
17521773 with address watch
17621783 ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB``
17631784 according to what the
17641785 runtime requests.
1765 14 1 bit enable_exception_memory Must be 0.
1786 14 1 bit ENABLE_EXCEPTION_MEMORY Must be 0.
17661787
17671788 Wavefront starts execution
17681789 with memory violation
17811802 ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB``
17821803 according to what the
17831804 runtime requests.
1784 23:15 9 bits granulated_lds_size Must be 0.
1805 23:15 9 bits GRANULATED_LDS_SIZE Must be 0.
17851806
17861807 CP uses the rounded value
17871808 from the dispatch packet,
18021823 GFX7-GFX9:
18031824 roundup(lds-size / (128 * 4))
18041825
1805 24 1 bit enable_exception_ieee_754_fp Wavefront starts execution
1806 _invalid_operation with specified exceptions
1826 24 1 bit ENABLE_EXCEPTION_IEEE_754_FP Wavefront starts execution
1827 _INVALID_OPERATION with specified exceptions
18071828 enabled.
18081829
18091830 Used by CP to set up
18121833
18131834 IEEE 754 FP Invalid
18141835 Operation
1815 25 1 bit enable_exception_fp_denormal FP Denormal one or more
1816 _source input operands is a
1836 25 1 bit ENABLE_EXCEPTION_FP_DENORMAL FP Denormal one or more
1837 _SOURCE input operands is a
18171838 denormal number
1818 26 1 bit enable_exception_ieee_754_fp IEEE 754 FP Division by
1819 _division_by_zero Zero
1820 27 1 bit enable_exception_ieee_754_fp IEEE 754 FP FP Overflow
1821 _overflow
1822 28 1 bit enable_exception_ieee_754_fp IEEE 754 FP Underflow
1823 _underflow
1824 29 1 bit enable_exception_ieee_754_fp IEEE 754 FP Inexact
1825 _inexact
1826 30 1 bit enable_exception_int_divide_by Integer Division by Zero
1827 _zero (rcp_iflag_f32 instruction
1839 26 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Division by
1840 _DIVISION_BY_ZERO Zero
1841 27 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP FP Overflow
1842 _OVERFLOW
1843 28 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Underflow
1844 _UNDERFLOW
1845 29 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Inexact
1846 _INEXACT
1847 30 1 bit ENABLE_EXCEPTION_INT_DIVIDE_BY Integer Division by Zero
1848 _ZERO (rcp_iflag_f32 instruction
18281849 only)
18291850 31 1 bit Reserved. Must be 0.
18301851 32 **Total size 4 bytes.**
18351856 .. table:: Floating Point Rounding Mode Enumeration Values
18361857 :name: amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table
18371858
1838 ===================================== ===== ===============================
1839 Enumeration Name Value Description
1840 ===================================== ===== ===============================
1841 AMD_FLOAT_ROUND_MODE_NEAR_EVEN 0 Round Ties To Even
1842 AMD_FLOAT_ROUND_MODE_PLUS_INFINITY 1 Round Toward +infinity
1843 AMD_FLOAT_ROUND_MODE_MINUS_INFINITY 2 Round Toward -infinity
1844 AMD_FLOAT_ROUND_MODE_ZERO 3 Round Toward 0
1845 ===================================== ===== ===============================
1859 ====================================== ===== ==============================
1860 Enumeration Name Value Description
1861 ====================================== ===== ==============================
1862 AMDGPU_FLOAT_ROUND_MODE_NEAR_EVEN 0 Round Ties To Even
1863 AMDGPU_FLOAT_ROUND_MODE_PLUS_INFINITY 1 Round Toward +infinity
1864 AMDGPU_FLOAT_ROUND_MODE_MINUS_INFINITY 2 Round Toward -infinity
1865 AMDGPU_FLOAT_ROUND_MODE_ZERO 3 Round Toward 0
1866 ====================================== ===== ==============================
18461867
18471868 ..
18481869
18491870 .. table:: Floating Point Denorm Mode Enumeration Values
18501871 :name: amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table
18511872
1852 ===================================== ===== ===============================
1853 Enumeration Name Value Description
1854 ===================================== ===== ===============================
1855 AMD_FLOAT_DENORM_MODE_FLUSH_SRC_DST 0 Flush Source and Destination
1856 Denorms
1857 AMD_FLOAT_DENORM_MODE_FLUSH_DST 1 Flush Output Denorms
1858 AMD_FLOAT_DENORM_MODE_FLUSH_SRC 2 Flush Source Denorms
1859 AMD_FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush
1860 ===================================== ===== ===============================
1873 ====================================== ===== ==============================
1874 Enumeration Name Value Description
1875 ====================================== ===== ==============================
1876 AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC_DST 0 Flush Source and Destination
1877 Denorms
1878 AMDGPU_FLOAT_DENORM_MODE_FLUSH_DST 1 Flush Output Denorms
1879 AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC 2 Flush Source Denorms
1880 AMDGPU_FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush
1881 ====================================== ===== ==============================
18611882
18621883 ..
18631884
18641885 .. table:: System VGPR Work-Item ID Enumeration Values
18651886 :name: amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table
18661887
1867 ===================================== ===== ===============================
1868 Enumeration Name Value Description
1869 ===================================== ===== ===============================
1870 AMD_SYSTEM_VGPR_WORKITEM_ID_X 0 Set work-item X dimension ID.
1871 AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y 1 Set work-item X and Y
1872 dimensions ID.
1873 AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z 2 Set work-item X, Y and Z
1874 dimensions ID.
1875 AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3 Undefined.
1876 ===================================== ===== ===============================
1888 ======================================== ===== ============================
1889 Enumeration Name Value Description
1890 ======================================== ===== ============================
1891 AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X 0 Set work-item X dimension
1892 ID.
1893 AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y 1 Set work-item X and Y
1894 dimensions ID.
1895 AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z 2 Set work-item X, Y and Z
1896 dimensions ID.
1897 AMDGPU_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3 Undefined.
1898 ======================================== ===== ============================
18771899
18781900 .. _amdgpu-amdhsa-initial-kernel-execution-state:
18791901
0 //===--- AMDGPUKernelDescriptor.h -------------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief AMDGPU kernel descriptor definitions. For more information, visit
11 /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor-for-gfx6-gfx9
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H
16 #define LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H
17
18 #include
19
20 // Creates enumeration entries used for packing bits into integers. Enumeration
21 // entries include bit shift amount, bit width, and bit mask.
22 #define AMDGPU_BITS_ENUM_ENTRY(name, shift, width) \
23 name ## _SHIFT = (shift), \
24 name ## _WIDTH = (width), \
25 name = (((1 << (width)) - 1) << (shift)) \
26
27 // Gets bits for specified bit mask from specified source.
28 #define AMDGPU_BITS_GET(src, mask) \
29 ((src & mask) >> mask ## _SHIFT) \
30
31 // Sets bits for specified bit mask in specified destination.
32 #define AMDGPU_BITS_SET(dst, mask, val) \
33 dst &= (~(1 << mask ## _SHIFT) & ~mask); \
34 dst |= (((val) << mask ## _SHIFT) & mask) \
35
36 namespace llvm {
37 namespace AMDGPU {
38 namespace HSAKD {
39
40 /// \brief Floating point rounding modes.
41 enum : uint8_t {
42 AMDGPU_FLOAT_ROUND_MODE_NEAR_EVEN = 0,
43 AMDGPU_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
44 AMDGPU_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
45 AMDGPU_FLOAT_ROUND_MODE_ZERO = 3,
46 };
47
48 /// \brief Floating point denorm modes.
49 enum : uint8_t {
50 AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
51 AMDGPU_FLOAT_DENORM_MODE_FLUSH_DST = 1,
52 AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC = 2,
53 AMDGPU_FLOAT_DENORM_MODE_FLUSH_NONE = 3,
54 };
55
56 /// \brief System VGPR workitem IDs.
57 enum : uint8_t {
58 AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X = 0,
59 AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
60 AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
61 AMDGPU_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
62 };
63
64 /// \brief Compute program resource register one layout.
65 enum ComputePgmRsrc1 {
66 AMDGPU_BITS_ENUM_ENTRY(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
67 AMDGPU_BITS_ENUM_ENTRY(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
68 AMDGPU_BITS_ENUM_ENTRY(PRIORITY, 10, 2),
69 AMDGPU_BITS_ENUM_ENTRY(FLOAT_ROUND_MODE_32, 12, 2),
70 AMDGPU_BITS_ENUM_ENTRY(FLOAT_ROUND_MODE_16_64, 14, 2),
71 AMDGPU_BITS_ENUM_ENTRY(FLOAT_DENORM_MODE_32, 16, 2),
72 AMDGPU_BITS_ENUM_ENTRY(FLOAT_DENORM_MODE_16_64, 18, 2),
73 AMDGPU_BITS_ENUM_ENTRY(PRIV, 20, 1),
74 AMDGPU_BITS_ENUM_ENTRY(ENABLE_DX10_CLAMP, 21, 1),
75 AMDGPU_BITS_ENUM_ENTRY(DEBUG_MODE, 22, 1),
76 AMDGPU_BITS_ENUM_ENTRY(ENABLE_IEEE_MODE, 23, 1),
77 AMDGPU_BITS_ENUM_ENTRY(BULKY, 24, 1),
78 AMDGPU_BITS_ENUM_ENTRY(CDBG_USER, 25, 1),
79 AMDGPU_BITS_ENUM_ENTRY(FP16_OVFL, 26, 1),
80 AMDGPU_BITS_ENUM_ENTRY(RESERVED0, 27, 5),
81 };
82
83 /// \brief Compute program resource register two layout.
84 enum ComputePgmRsrc2 {
85 AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_OFFSET, 0, 1),
86 AMDGPU_BITS_ENUM_ENTRY(USER_SGPR_COUNT, 1, 5),
87 AMDGPU_BITS_ENUM_ENTRY(ENABLE_TRAP_HANDLER, 6, 1),
88 AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
89 AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
90 AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
91 AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
92 AMDGPU_BITS_ENUM_ENTRY(ENABLE_VGPR_WORKITEM_ID, 11, 2),
93 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
94 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_MEMORY, 14, 1),
95 AMDGPU_BITS_ENUM_ENTRY(GRANULATED_LDS_SIZE, 15, 9),
96 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
97 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
98 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
99 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
100 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
101 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
102 AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
103 AMDGPU_BITS_ENUM_ENTRY(RESERVED1, 31, 1),
104 };
105
106 /// \brief Kernel descriptor layout. This layout should be kept backwards
107 /// compatible as it is consumed by the command processor.
108 struct KernelDescriptor final {
109 uint32_t GroupSegmentFixedSize;
110 uint32_t PrivateSegmentFixedSize;
111 uint32_t MaxFlatWorkgroupSize;
112 uint64_t IsDynamicCallStack : 1;
113 uint64_t IsXNACKEnabled : 1;
114 uint64_t Reserved0 : 30;
115 int64_t KernelCodeEntryByteOffset;
116 uint64_t Reserved1[3];
117 uint32_t ComputePgmRsrc1;
118 uint32_t ComputePgmRsrc2;
119 uint64_t EnableSGPRPrivateSegmentBuffer : 1;
120 uint64_t EnableSGPRDispatchPtr : 1;
121 uint64_t EnableSGPRQueuePtr : 1;
122 uint64_t EnableSGPRKernargSegmentPtr : 1;
123 uint64_t EnableSGPRDispatchID : 1;
124 uint64_t EnableSGPRFlatScratchInit : 1;
125 uint64_t EnableSGPRPrivateSegmentSize : 1;
126 uint64_t EnableSGPRGridWorkgroupCountX : 1;
127 uint64_t EnableSGPRGridWorkgroupCountY : 1;
128 uint64_t EnableSGPRGridWorkgroupCountZ : 1;
129 uint64_t Reserved2 : 54;
130
131 KernelDescriptor() = default;
132 };
133
134 } // end namespace HSAKD
135 } // end namespace AMDGPU
136 } // end namespace llvm
137
138 #endif // LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H