llvm.org GIT mirror llvm / 1d928ef
AMD family 17h (znver1) enablement Summary: This patch enables the following 1. AMD family 17h architecture using "znver1" tune flag (-march, -mcpu). 2. ISAs that are enabled for "znver1" architecture. 3. Checks ADX isa from cpuid to identify "znver1" flag when -march=native is used. 4. ISAs FMA4, XOP are disabled as they are dropped from amdfam17. 5. For the time being, it uses the btver2 scheduler model. 6. Test file is updated to check this flag. This item is linked to clang review item https://reviews.llvm.org/D28018 Patch by Ganesh Gopalasubramanian Reviewers: RKSimon, craig.topper Subscribers: vprasad, RKSimon, ashutosh.nema, llvm-commits Differential Revision: https://reviews.llvm.org/D28017 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291543 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 3 years ago
6 changed file(s) with 60 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
110110 AMDATHLON,
111111 AMDFAM14H,
112112 AMDFAM16H,
113 AMDFAM17H,
113114 CPU_TYPE_MAX
114115 };
115116
148149 AMD_BTVER2,
149150 AMDFAM15H_BDVER3,
150151 AMDFAM15H_BDVER4,
152 AMDFAM17H_ZNVER1,
151153 CPU_SUBTYPE_MAX
152154 };
153155
741743 }
742744 *Subtype = AMD_BTVER2;
743745 break; // "btver2"
746 case 23:
747 *Type = AMDFAM17H;
748 if (Features & (1 << FEATURE_ADX)) {
749 *Subtype = AMDFAM17H_ZNVER1;
750 break; // "znver1"
751 }
752 *Subtype = AMD_BTVER1;
753 break;
744754 default:
745755 break; // "generic"
746756 }
949959 default:
950960 return "amdfam16";
951961 }
962 case AMDFAM17H:
963 switch (Subtype) {
964 case AMD_BTVER1:
965 return "btver1";
966 case AMDFAM17H_ZNVER1:
967 return "znver1";
968 default:
969 return "amdfam17";
970 }
952971 default:
953972 return "generic";
954973 }
759759 FeatureMWAITX
760760 ]>;
761761
762 // TODO: The scheduler model falls to BTVER2 model.
763 // The znver1 model has to be put in place.
764 // Zen
765 def: ProcessorModel<"znver1", BtVer2Model, [
766 FeatureADX,
767 FeatureAES,
768 FeatureAVX2,
769 FeatureBMI,
770 FeatureBMI2,
771 FeatureCLFLUSHOPT,
772 FeatureCMPXCHG16B,
773 FeatureF16C,
774 FeatureFMA,
775 FeatureFSGSBase,
776 FeatureFXSR,
777 FeatureFastLZCNT,
778 FeatureLAHFSAHF,
779 FeatureLZCNT,
780 FeatureMMX,
781 FeatureMOVBE,
782 FeatureMWAITX,
783 FeaturePCLMUL,
784 FeaturePOPCNT,
785 FeaturePRFCHW,
786 FeatureRDRAND,
787 FeatureRDSEED,
788 FeatureSHA,
789 FeatureSMAP,
790 FeatureSSE4A,
791 FeatureSlowSHLD,
792 FeatureX87,
793 FeatureXSAVE,
794 FeatureXSAVEC,
795 FeatureXSAVEOPT,
796 FeatureXSAVES]>;
797
762798 def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
763799
764800 def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
3232 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
3333 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
3434 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
35 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
22 ; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt))
33 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck %s
44 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s
5 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck %s
6 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s
57
68 ; Test one 32-bit input, output is 32-bit, no transformations expected.
79 define i32 @test_zext_cmp0(i32 %a) {
4545 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST
4646 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST
4747 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST
48 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST
4849
4950 ; Other chips with slow unaligned memory accesses
5051
1616 ; RUN: llc < %s -march=x86-64 -mcpu=bdver2 | FileCheck %s
1717 ; RUN: llc < %s -march=x86-64 -mcpu=bdver3 | FileCheck %s
1818 ; RUN: llc < %s -march=x86-64 -mcpu=bdver4 | FileCheck %s
19 ; RUN: llc < %s -march=x86-64 -mcpu=znver1 | FileCheck %s
1920
2021 ; Verify that for the X86_64 processors that are known to have poor latency
2122 ; double precision shift instructions we do not generate 'shld' or 'shrd'