[X86] SkylakeClientModel - conversion instructions don't use Port015
Fixes a lot of throughput mismatches - the more complicated conversion instructions use SKLPort5+SKLPort01, not SKLPort5+SKLPort015 (SKLPort015 is mainly used for basic Logic + blend ops) Fixing this should allow us to remove a lot of unnecessary scheduler overrides from SkylakeClientModel Confirmed by both Agner + uops.info
This commit is contained in:
parent
956489700e
commit
30498cf7c4
|
@ -469,14 +469,14 @@ defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort5,SKLPort01], 5, [1,1], 2, 6>;
|
|||
defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2, 6>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
|
||||
|
||||
defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort01], 5, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
|
||||
defm : X86WriteRes<WriteCvtPH2PSLd, [SKLPort23,SKLPort01], 9, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
|
||||
|
||||
defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort015], 5, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort01], 5, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPS2PHY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
|
||||
defm : X86WriteRes<WriteCvtPS2PHSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>;
|
||||
|
@ -928,7 +928,7 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> {
|
|||
}
|
||||
def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
|
||||
|
||||
def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> {
|
||||
def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
|
|
|
@ -1736,7 +1736,7 @@ vzeroupper
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
# CHECK-NEXT: - 126.00 333.42 202.42 173.17 173.17 34.00 324.92 5.25 12.67
|
||||
# CHECK-NEXT: - 126.00 334.58 203.58 173.17 173.17 34.00 322.58 5.25 12.67
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
|
@ -1832,7 +1832,7 @@ vzeroupper
|
|||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
|
||||
|
@ -1844,7 +1844,7 @@ vzeroupper
|
|||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %ymm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %ymm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2
|
||||
|
@ -1854,21 +1854,21 @@ vzeroupper
|
|||
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %ecx, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %rcx, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdq (%rax), %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2ss %ecx, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2ss %ecx, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssl (%rax), %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtss2sd (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %ecx
|
||||
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvtss2si %xmm0, %rcx
|
||||
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %ecx
|
||||
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %rcx
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
|
||||
|
|
|
@ -45,15 +45,15 @@ vcvtps2ph $0, %ymm0, (%rax)
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
# CHECK-NEXT: - - 3.67 3.67 1.67 1.67 2.00 6.67 - 0.67
|
||||
# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %ymm0, (%rax)
|
||||
|
|
|
@ -333,7 +333,7 @@ xorps (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
# CHECK-NEXT: - 24.00 71.33 24.33 32.00 32.00 8.00 32.83 0.50 3.00
|
||||
# CHECK-NEXT: - 24.00 71.83 24.83 32.00 32.00 8.00 31.83 0.50 3.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
|
@ -353,9 +353,9 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1
|
||||
# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2
|
||||
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pi %xmm0, %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pi %xmm0, %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pi (%rax), %mm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2ss %ecx, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2ss %ecx, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - cvtsi2ss %rcx, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssq (%rax), %xmm2
|
||||
|
@ -363,7 +363,7 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx
|
||||
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx
|
||||
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %rcx
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttps2pi %xmm0, %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2pi (%rax), %mm2
|
||||
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx
|
||||
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx
|
||||
|
|
|
@ -689,7 +689,7 @@ xorpd (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
# CHECK-NEXT: - 40.00 111.25 79.25 63.50 63.50 14.00 96.25 2.25 5.00
|
||||
# CHECK-NEXT: - 40.00 112.58 80.58 63.50 63.50 14.00 93.58 2.25 5.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
|
@ -712,9 +712,9 @@ xorpd (%rax), %xmm2
|
|||
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2pi %xmm0, %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2pi %xmm0, %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2pi (%rax), %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2
|
||||
|
@ -722,7 +722,7 @@ xorpd (%rax), %xmm2
|
|||
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %ecx
|
||||
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %rcx
|
||||
|
@ -730,15 +730,15 @@ xorpd (%rax), %xmm2
|
|||
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtsd2si (%rax), %rcx
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsd2ss %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %ecx, %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %rcx, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %ecx, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %rcx, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdl (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdq (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtss2sd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtss2sd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtss2sd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvttpd2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2pi %xmm0, %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2pi %xmm0, %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvttpd2pi (%rax), %mm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvttps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2dq (%rax), %xmm2
|
||||
|
|
Loading…
Reference in New Issue