llvm.org GIT mirror llvm / c3427dc
[X86] Remove unnecessary WriteFVarBlend/WriteVarBlend InstRW overrides. This also fixes some of the ReadAfterLd issues due to InstRW. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330544 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 1 year, 6 months ago
8 changed file(s) with 48 addition(s) and 146 deletion(s). Raw diff Collapse all Expand all
167167 defm : BWWriteResPair; // Floating point vector shuffles.
168168 defm : BWWriteResPair; // Floating point vector variable shuffles.
169169 defm : BWWriteResPair; // Floating point vector blends.
170 defm : BWWriteResPair>; // Fp vector variable blends.
170 defm : BWWriteResPair, 2, 5>; // Fp vector variable blends.
171171
172172 // FMA Scheduling helper class.
173173 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
185185 defm : BWWriteResPair; // Vector shuffles.
186186 defm : BWWriteResPair; // Vector variable shuffles.
187187 defm : BWWriteResPair; // Vector blends.
188 defm : BWWriteResPair>; // Vector variable blends.
188 defm : BWWriteResPair, 2, 5>; // Vector variable blends.
189189 defm : BWWriteResPair; // Vector MPSAD.
190190 defm : BWWriteResPair; // Vector PSADBW.
191191
469469 let NumMicroOps = 2;
470470 let ResourceCycles = [2];
471471 }
472 def: InstRW<[BWWriteResGroup11], (instregex "BLENDVPDrr0",
473 "BLENDVPSrr0",
474 "MMX_PINSRWrr",
475 "PBLENDVBrr0",
476 "VBLENDVPD(Y?)rr",
477 "VBLENDVPS(Y?)rr",
478 "VPBLENDVB(Y?)rr",
472 def: InstRW<[BWWriteResGroup11], (instregex "MMX_PINSRWrr",
479473 "(V?)PINSRBrr",
480474 "(V?)PINSRDrr",
481475 "(V?)PINSRQrr",
13391333 let NumMicroOps = 3;
13401334 let ResourceCycles = [2,1];
13411335 }
1342 def: InstRW<[BWWriteResGroup79], (instregex "BLENDVPDrm0",
1343 "BLENDVPSrm0",
1344 "MMX_PACKSSDWirm",
1336 def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm",
13451337 "MMX_PACKSSWBirm",
13461338 "MMX_PACKUSWBirm",
1347 "PBLENDVBrm0",
1348 "VBLENDVPDrm",
1349 "VBLENDVPSrm",
13501339 "VMASKMOVPDrm",
13511340 "VMASKMOVPSrm",
1352 "VPBLENDVBrm",
13531341 "VPMASKMOVDrm",
13541342 "VPMASKMOVQrm")>;
13551343
166166 defm : HWWriteResPair;
167167 defm : HWWriteResPair;
168168 defm : HWWriteResPair;
169 defm : HWWriteResPair>;
169 defm : HWWriteResPair, 2, 6>;
170170
171171 // Vector integer operations.
172172 def : WriteRes;
183183 defm : HWWriteResPair;
184184 defm : HWWriteResPair;
185185 defm : HWWriteResPair;
186 defm : HWWriteResPair>;
186 defm : HWWriteResPair, 2, 6>;
187187 defm : HWWriteResPair;
188188 defm : HWWriteResPair;
189189 defm : HWWriteResPair;
12251225 let NumMicroOps = 2;
12261226 let ResourceCycles = [2];
12271227 }
1228 def: InstRW<[HWWriteResGroup27], (instregex "BLENDVPDrr0",
1229 "BLENDVPSrr0",
1230 "MMX_PINSRWrr",
1231 "PBLENDVBrr0",
1232 "VBLENDVPD(Y?)rr",
1233 "VBLENDVPS(Y?)rr",
1234 "VPBLENDVB(Y?)rr",
1228 def: InstRW<[HWWriteResGroup27], (instregex "MMX_PINSRWrr",
12351229 "(V?)PINSRBrr",
12361230 "(V?)PINSRDrr",
12371231 "(V?)PINSRQrr",
13361330 let NumMicroOps = 3;
13371331 let ResourceCycles = [2,1];
13381332 }
1339 def: InstRW<[HWWriteResGroup36], (instregex "BLENDVPDrm0",
1340 "BLENDVPSrm0",
1341 "PBLENDVBrm0",
1342 "VBLENDVPDrm",
1343 "VBLENDVPSrm",
1344 "VMASKMOVPDrm",
1333 def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPDrm",
13451334 "VMASKMOVPSrm",
1346 "VPBLENDVBrm",
13471335 "VPMASKMOVDrm",
13481336 "VPMASKMOVQrm")>;
13491337
153153 defm : SBWriteResPair;
154154 defm : SBWriteResPair;
155155 defm : SBWriteResPair;
156 defm : SBWriteResPair [SBPort0, SBPort5], 2>;
156 defm : SBWriteResPair[SBPort05], 2, [2], 2, 6>;
157157
158158 // Vector integer operations.
159159 def : WriteRes;
168168 defm : SBWriteResPair;
169169 defm : SBWriteResPair;
170170 defm : SBWriteResPair;
171 defm : SBWriteResPair, SBPort5], 2>;
171 defm : SBWriteResPair5], 2, [2], 2, 6>;
172172 defm : SBWriteResPair;
173173 defm : SBWriteResPair;
174174
471471 let NumMicroOps = 2;
472472 let ResourceCycles = [2];
473473 }
474 def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0",
475 "BLENDVPSrr0",
476 "ROL(8|16|32|64)r1",
474 def: InstRW<[SBWriteResGroup9], (instregex "ROL(8|16|32|64)r1",
477475 "ROL(8|16|32|64)ri",
478476 "ROR(8|16|32|64)r1",
479477 "ROR(8|16|32|64)ri",
480 "SET(A|BE)r",
481 "VBLENDVPD(Y?)rr",
482 "VBLENDVPS(Y?)rr")>;
483
484 def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> {
485 let Latency = 2;
486 let NumMicroOps = 2;
487 let ResourceCycles = [2];
488 }
489 def: InstRW<[SBWriteResGroup10], (instregex "PBLENDVBrr0",
490 "VPBLENDVBrr")>;
478 "SET(A|BE)r")>;
491479
492480 def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
493481 let Latency = 2;
12281216 let NumMicroOps = 3;
12291217 let ResourceCycles = [1,2];
12301218 }
1231 def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0",
1232 "BLENDVPSrm0",
1233 "VBLENDVPDrm",
1234 "VBLENDVPSrm",
1235 "VMASKMOVPDrm",
1219 def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm",
12361220 "VMASKMOVPSrm")>;
1237
1238 def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> {
1239 let Latency = 8;
1240 let NumMicroOps = 3;
1241 let ResourceCycles = [1,2];
1242 }
1243 def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrm0",
1244 "VPBLENDVBrm")>;
12451221
12461222 def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
12471223 let Latency = 8;
164164 defm : SKLWriteResPair; // Floating point vector shuffles.
165165 defm : SKLWriteResPair; // Floating point vector shuffles.
166166 defm : SKLWriteResPair; // Floating point vector blends.
167 defm : SKLWriteResPair [SKLPort5], 2, [2]>; // Fp vector variable blends.
167 defm : SKLWriteResPair[SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
168168
169169 // FMA Scheduling helper class.
170170 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
182182 defm : SKLWriteResPair; // Vector shuffles.
183183 defm : SKLWriteResPair; // Vector shuffles.
184184 defm : SKLWriteResPair; // Vector blends.
185 defm : SKLWriteResPair5], 2, [2]>; // Vector variable blends.
185 defm : SKLWriteResPair015], 2, [2], 2, 6>; // Vector variable blends.
186186 defm : SKLWriteResPair; // Vector MPSAD.
187187 defm : SKLWriteResPair; // Vector PSADBW.
188188
606606 "ROR(8|16|32|64)ri",
607607 "SET(A|BE)r")>;
608608
609 def SKLWriteResGroup16 : SchedWriteRes<[SKLPort015]> {
610 let Latency = 2;
611 let NumMicroOps = 2;
612 let ResourceCycles = [2];
613 }
614 def: InstRW<[SKLWriteResGroup16], (instregex "BLENDVPDrr0",
615 "BLENDVPSrr0",
616 "PBLENDVBrr0",
617 "VBLENDVPD(Y?)rr",
618 "VBLENDVPS(Y?)rr",
619 "VPBLENDVB(Y?)rr")>;
620
621609 def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> {
622610 let Latency = 2;
623611 let NumMicroOps = 2;
17251713 "VXORPDYrm",
17261714 "VXORPSYrm")>;
17271715
1728 def SKLWriteResGroup111 : SchedWriteRes<[SKLPort23,SKLPort015]> {
1729 let Latency = 8;
1730 let NumMicroOps = 3;
1731 let ResourceCycles = [1,2];
1732 }
1733 def: InstRW<[SKLWriteResGroup111], (instregex "BLENDVPDrm0",
1734 "BLENDVPSrm0",
1735 "PBLENDVBrm0",
1736 "VBLENDVPDrm",
1737 "VBLENDVPSrm",
1738 "VPBLENDVB(Y?)rm")>;
1739
17401716 def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
17411717 let Latency = 8;
17421718 let NumMicroOps = 4;
164164 defm : SKXWriteResPair; // Floating point vector shuffles.
165165 defm : SKXWriteResPair; // Floating point vector variable shuffles.
166166 defm : SKXWriteResPair; // Floating point vector blends.
167 defm : SKXWriteResPair [SKXPort5], 2, [2]>; // Fp vector variable blends.
167 defm : SKXWriteResPair[SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
168168
169169 // FMA Scheduling helper class.
170170 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
182182 defm : SKXWriteResPair; // Vector shuffles.
183183 defm : SKXWriteResPair; // Vector variable shuffles.
184184 defm : SKXWriteResPair; // Vector blends.
185 defm : SKXWriteResPair [SKXPort5], 2, [2]>; // Vector variable blends.
185 defm : SKXWriteResPair[SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
186186 defm : SKXWriteResPair; // Vector MPSAD.
187187 defm : SKXWriteResPair; // Vector PSADBW.
188188
10801080 "ROR(8|16|32|64)ri",
10811081 "SET(A|BE)r")>;
10821082
1083 def SKXWriteResGroup16 : SchedWriteRes<[SKXPort015]> {
1084 let Latency = 2;
1085 let NumMicroOps = 2;
1086 let ResourceCycles = [2];
1087 }
1088 def: InstRW<[SKXWriteResGroup16], (instregex "BLENDVPDrr0",
1089 "BLENDVPSrr0",
1090 "PBLENDVBrr0",
1091 "VBLENDVPDYrr",
1092 "VBLENDVPDrr",
1093 "VBLENDVPSYrr",
1094 "VBLENDVPSrr",
1095 "VPBLENDVBYrr",
1096 "VPBLENDVBrr")>;
1097
10981083 def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
10991084 let Latency = 2;
11001085 let NumMicroOps = 2;
35673552 "VXORPSZ256rm(b?)",
35683553 "VXORPSZrm(b?)")>;
35693554
3570 def SKXWriteResGroup122 : SchedWriteRes<[SKXPort23,SKXPort015]> {
3571 let Latency = 8;
3572 let NumMicroOps = 3;
3573 let ResourceCycles = [1,2];
3574 }
3575 def: InstRW<[SKXWriteResGroup122], (instregex "BLENDVPDrm0",
3576 "BLENDVPSrm0",
3577 "PBLENDVBrm0",
3578 "VBLENDVPDrm",
3579 "VBLENDVPSrm",
3580 "VPBLENDVBYrm",
3581 "VPBLENDVBrm")>;
3582
35833555 def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
35843556 let Latency = 8;
35853557 let NumMicroOps = 4;
16691669 ; GENERIC-LABEL: test_pblendvb:
16701670 ; GENERIC: # %bb.0:
16711671 ; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
1672 ; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1672 ; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
16731673 ; GENERIC-NEXT: retq # sched: [1:1.00]
16741674 ;
16751675 ; HASWELL-LABEL: test_pblendvb:
0 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
1 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=SANDY
12
23 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=IVY
1516 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
1617
1718
18 # SANDY: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
19 # SANDY-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
19 # SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
20 # SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
2021
21 # IVY: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
22 # IVY-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
22 # IVY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
23 # IVY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
2324
24 # HASWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
25 # HASWELL-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
25 # HASWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
26 # HASWELL-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
2627
27 # BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
28 # BDWELL-NEXT: [0,1] D===eeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
28 # BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
29 # BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
2930
30 # SKYLAKE: [0,0] DeeeeER . . vaddps %xmm0, %xmm0, %xmm1
31 # SKYLAKE-NEXT: [0,1] D====eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
31 # SKYLAKE: [0,0] DeeeeER . vaddps %xmm0, %xmm0, %xmm1
32 # SKYLAKE-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
3233
33 # BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
34 # BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
34 # BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
35 # BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
3536
36 # ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
37 # ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
37 # ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
38 # ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
0 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
1 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=SANDY
12
23 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=IVY
1516 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
1617
1718
18 # SANDY: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2
19 # SANDY-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
19 # SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
20 # SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
2021
21 # IVY: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2
22 # IVY-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
22 # IVY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
23 # IVY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
2324
24 # HASWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2
25 # HASWELL-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
25 # HASWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
26 # HASWELL-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
2627
27 # BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2
28 # BDWELL-NEXT: [0,1] D===eeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
28 # BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
29 # BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
2930
30 # SKYLAKE: [0,0] DeeeeER . . vaddps %xmm0, %xmm0, %xmm2
31 # SKYLAKE-NEXT: [0,1] D====eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
31 # SKYLAKE: [0,0] DeeeeER . vaddps %xmm0, %xmm0, %xmm2
32 # SKYLAKE-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
3233
33 # BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
34 # BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
34 # BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
35 # BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
3536
36 # ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
37 # ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
37 # ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
38 # ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3