llvm.org GIT mirror llvm / 2c3cc29
[X86] Change PMULLD to 10 cycles on Skylake per Agner's tables and llvm-exegesis. Also restrict to port 0 and 1 for SkylakeClient. It looks like the scheduler models don't account for client not having a full vector ALU on port 5 like server. Fixes PR36808. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328061 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
5 changed file(s) with 63 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
24152415 let NumMicroOps = 2;
24162416 let ResourceCycles = [2];
24172417 }
2418 def: InstRW<[SKLWriteResGroup105], (instregex "PMULLDrr")>;
24192418 def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPDr")>;
24202419 def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPSr")>;
24212420 def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDSDr")>;
24222421 def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDSSr")>;
2423 def: InstRW<[SKLWriteResGroup105], (instregex "VPMULLDYrr")>;
2424 def: InstRW<[SKLWriteResGroup105], (instregex "VPMULLDrr")>;
24252422 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDPDr")>;
24262423 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDPSr")>;
24272424 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDSDr")>;
24282425 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDSSr")>;
24292426 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDYPDr")>;
24302427 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDYPSr")>;
2428
2429 def SKLWriteResGroup105_2 : SchedWriteRes<[SKLPort01]> {
2430 let Latency = 10;
2431 let NumMicroOps = 2;
2432 let ResourceCycles = [2];
2433 }
2434 def: InstRW<[SKLWriteResGroup105_2], (instregex "PMULLDrr")>;
2435 def: InstRW<[SKLWriteResGroup105_2], (instregex "VPMULLDYrr")>;
2436 def: InstRW<[SKLWriteResGroup105_2], (instregex "VPMULLDrr")>;
24312437
24322438 def SKLWriteResGroup106 : SchedWriteRes<[SKLPort0,SKLPort23]> {
24332439 let Latency = 8;
32773283 let NumMicroOps = 3;
32783284 let ResourceCycles = [1,2];
32793285 }
3280 def: InstRW<[SKLWriteResGroup168], (instregex "PMULLDrm")>;
32813286 def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDPDm")>;
32823287 def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDPSm")>;
32833288 def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDSDm")>;
32843289 def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDSSm")>;
3285 def: InstRW<[SKLWriteResGroup168], (instregex "VPMULLDrm")>;
32863290 def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDPDm")>;
32873291 def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDPSm")>;
32883292 def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDSDm")>;
32893293 def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDSSm")>;
32903294
3295 def SKLWriteResGroup168_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
3296 let Latency = 16;
3297 let NumMicroOps = 3;
3298 let ResourceCycles = [1,2];
3299 }
3300 def: InstRW<[SKLWriteResGroup168_2], (instregex "PMULLDrm")>;
3301 def: InstRW<[SKLWriteResGroup168_2], (instregex "VPMULLDrm")>;
3302
32913303 def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
32923304 let Latency = 14;
32933305 let NumMicroOps = 3;
33173329 let NumMicroOps = 3;
33183330 let ResourceCycles = [1,2];
33193331 }
3320 def: InstRW<[SKLWriteResGroup172], (instregex "VPMULLDYrm")>;
33213332 def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPDm")>;
33223333 def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPSm")>;
3334
3335 def SKLWriteResGroup172_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
3336 let Latency = 17;
3337 let NumMicroOps = 3;
3338 let ResourceCycles = [1,2];
3339 }
3340 def: InstRW<[SKLWriteResGroup172_2], (instregex "VPMULLDYrm")>;
33233341
33243342 def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> {
33253343 let Latency = 15;
38683868 let NumMicroOps = 2;
38693869 let ResourceCycles = [2];
38703870 }
3871 def: InstRW<[SKXWriteResGroup116], (instregex "PMULLDrr")>;
38723871 def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPDr")>;
38733872 def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPSr")>;
38743873 def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDSDr")>;
38753874 def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDSSr")>;
3876 def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDYrr")>;
3877 def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZ128rr(b?)(k?)(z?)")>;
3878 def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZ256rr(b?)(k?)(z?)")>;
3879 def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZrr(b?)(k?)(z?)")>;
3880 def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDrr")>;
38813875 def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ128rri(b?)(k?)(z?)")>;
38823876 def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ256rri(b?)(k?)(z?)")>;
38833877 def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZrri(b?)(k?)(z?)")>;
38923886 def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDSSr")>;
38933887 def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDYPDr")>;
38943888 def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDYPSr")>;
3889
3890 def SKXWriteResGroup116_2 : SchedWriteRes<[SKXPort015]> {
3891 let Latency = 10;
3892 let NumMicroOps = 2;
3893 let ResourceCycles = [2];
3894 }
3895 def: InstRW<[SKXWriteResGroup116_2], (instregex "PMULLDrr")>;
3896 def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDYrr")>;
3897 def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDZ128rr(b?)(k?)(z?)")>;
3898 def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDZ256rr(b?)(k?)(z?)")>;
3899 def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDZrr(b?)(k?)(z?)")>;
3900 def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDrr")>;
38953901
38963902 def SKXWriteResGroup117 : SchedWriteRes<[SKXPort0,SKXPort23]> {
38973903 let Latency = 8;
55405546 let NumMicroOps = 3;
55415547 let ResourceCycles = [1,2];
55425548 }
5543 def: InstRW<[SKXWriteResGroup186], (instregex "PMULLDrm")>;
55445549 def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDPDm")>;
55455550 def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDPSm")>;
55465551 def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDSDm")>;
55475552 def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDSSm")>;
5548 def: InstRW<[SKXWriteResGroup186], (instregex "VPMULLDZ128rm(b?)(k?)(z?)")>;
5549 def: InstRW<[SKXWriteResGroup186], (instregex "VPMULLDrm")>;
55505553 def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPDZ128rm(b?)i(k?)(z?)")>;
55515554 def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPSZ128rm(b?)i(k?)(z?)")>;
55525555 def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALESDm(b?)(k?)(z?)")>;
55565559 def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDSDm")>;
55575560 def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDSSm")>;
55585561
5562 def SKXWriteResGroup186_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
5563 let Latency = 16;
5564 let NumMicroOps = 3;
5565 let ResourceCycles = [1,2];
5566 }
5567 def: InstRW<[SKXWriteResGroup186_2], (instregex "PMULLDrm")>;
5568 def: InstRW<[SKXWriteResGroup186_2], (instregex "VPMULLDZ128rm(b?)(k?)(z?)")>;
5569 def: InstRW<[SKXWriteResGroup186_2], (instregex "VPMULLDrm")>;
5570
55595571 def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
55605572 let Latency = 14;
55615573 let NumMicroOps = 3;
56085620 let NumMicroOps = 3;
56095621 let ResourceCycles = [1,2];
56105622 }
5611 def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDYrm")>;
5612 def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDZ256rm(b?)(k?)(z?)")>;
5613 def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDZrm(b?)(k?)(z?)")>;
56145623 def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i(k?)(z?)")>;
56155624 def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZrm(b?)i(k?)(z?)")>;
56165625 def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPSZ256rm(b?)i(k?)(z?)")>;
56175626 def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPSZrm(b?)i(k?)(z?)")>;
56185627 def: InstRW<[SKXWriteResGroup192], (instregex "VROUNDYPDm")>;
56195628 def: InstRW<[SKXWriteResGroup192], (instregex "VROUNDYPSm")>;
5629
5630 def SKXWriteResGroup192_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
5631 let Latency = 17;
5632 let NumMicroOps = 3;
5633 let ResourceCycles = [1,2];
5634 }
5635 def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDYrm")>;
5636 def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDZ256rm(b?)(k?)(z?)")>;
5637 def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDZrm(b?)(k?)(z?)")>;
56205638
56215639 def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
56225640 let Latency = 15;
49234923 ;
49244924 ; SKYLAKE-LABEL: test_pmulld:
49254925 ; SKYLAKE: # %bb.0:
4926 ; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
4927 ; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4926 ; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
4927 ; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00]
49284928 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
49294929 ;
49304930 ; SKX-LABEL: test_pmulld:
49314931 ; SKX: # %bb.0:
4932 ; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
4933 ; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4932 ; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:0.67]
4933 ; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:0.67]
49344934 ; SKX-NEXT: retq # sched: [7:1.00]
49354935 ;
49364936 ; ZNVER1-LABEL: test_pmulld:
542542 ;
543543 ; SKX-LABEL: vpmulld_test:
544544 ; SKX: # %bb.0:
545 ; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [8:0.67]
545 ; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:0.67]
546546 ; SKX-NEXT: retq # sched: [7:1.00]
547547 %x = mul <16 x i32> %i, %j
548548 ret <16 x i32> %x
28522852 ;
28532853 ; SKYLAKE-LABEL: test_pmulld:
28542854 ; SKYLAKE: # %bb.0:
2855 ; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67]
2856 ; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
2855 ; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
2856 ; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
28572857 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
28582858 ;
28592859 ; SKX-LABEL: test_pmulld:
28602860 ; SKX: # %bb.0:
2861 ; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67]
2862 ; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
2861 ; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67]
2862 ; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67]
28632863 ; SKX-NEXT: retq # sched: [7:1.00]
28642864 ;
28652865 ; BTVER2-LABEL: test_pmulld: