[X86] Add scheduler classes for zmm vector reg-reg move instructions

Basic zmm reg-reg moves (with predication) are more port limited than xmm/ymm moves, so we need to add a separate class for them.

We still appear to be missing move-elimination patterns for most of the intel models, which looks to be one of the main diffs for basic codegen analysis between llvm-mca and uops.info

Load/stores are a bit messier and might be better handled as overrides.
This commit is contained in:
Simon Pilgrim 2021-12-27 12:13:17 +00:00
parent 3e65861131
commit 29475e0286
18 changed files with 158 additions and 126 deletions

View File

@ -255,6 +255,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
@ -418,6 +419,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15],
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [BWPort0], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [BWPort5], 1, [1], 1>;

View File

@ -257,6 +257,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [HWPort5], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
@ -416,6 +417,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15],
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [HWPort015], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [HWPort0], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [HWPort5], 1, [1], 1>;

View File

@ -252,6 +252,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveZ, [ICXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [ICXPort05,ICXPort0156], 10, [9,1], 10>;
defm : ICXWriteResPair<WriteFAdd, [ICXPort01], 4, [1], 1, 5>; // Floating point add/sub.
@ -367,6 +368,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveZ, [ICXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [ICXPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [ICXPort5], 1, [1], 1>;

View File

@ -223,6 +223,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1]
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveZ, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
@ -380,6 +381,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveZ, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [SBPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>;

View File

@ -244,6 +244,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKLPort015], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub.
@ -359,6 +360,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKLPort015], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [SKLPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SKLPort5], 1, [1], 1>;

View File

@ -244,6 +244,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveZ, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub.
@ -359,6 +360,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveZ, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>;

View File

@ -239,6 +239,7 @@ def WriteFMaskedStore64Y : SchedWrite;
def WriteFMove : SchedWrite;
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
def WriteFMoveZ : SchedWrite;
defm WriteFAdd : X86SchedWritePair<ReadAfterVecLd>; // Floating point add/sub.
defm WriteFAddX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
@ -354,6 +355,7 @@ def WriteVecMaskedStore64Y : SchedWrite;
def WriteVecMove : SchedWrite;
def WriteVecMoveX : SchedWrite;
def WriteVecMoveY : SchedWrite;
def WriteVecMoveZ : SchedWrite;
def WriteVecMoveToGpr : SchedWrite;
def WriteVecMoveFromGpr : SchedWrite;
@ -516,9 +518,11 @@ def WriteFMoveLSX
: X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
def WriteFMoveLSY
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
def WriteFMoveLSZ
: X86SchedWriteMoveLS<WriteFMoveZ, WriteFLoadY, WriteFStoreY>;
def SchedWriteFMoveLS
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
WriteFMoveLSY, WriteFMoveLSY>;
WriteFMoveLSY, WriteFMoveLSZ>;
def WriteFMoveLSNT
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
@ -536,9 +540,11 @@ def WriteVecMoveLSX
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
def WriteVecMoveLSY
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
def WriteVecMoveLSZ
: X86SchedWriteMoveLS<WriteVecMoveZ, WriteVecLoadY, WriteVecStoreY>;
def SchedWriteVecMoveLS
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
WriteVecMoveLSY, WriteVecMoveLSY>;
WriteVecMoveLSY, WriteVecMoveLSZ>;
def WriteVecMoveLSNT
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;

View File

@ -229,6 +229,7 @@ defm : X86WriteResUnsupported<WriteFMaskedStore64Y>;
def : WriteRes<WriteFMove, [AtomPort01]>;
def : WriteRes<WriteFMoveX, [AtomPort01]>;
defm : X86WriteResUnsupported<WriteFMoveY>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [AtomPort01], 5, [5], 1>;
@ -382,6 +383,7 @@ defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
def : WriteRes<WriteVecMove, [AtomPort0]>;
def : WriteRes<WriteVecMoveX, [AtomPort01]>;
defm : X86WriteResUnsupported<WriteVecMoveY>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [AtomPort0], 3, [3], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;

View File

@ -772,6 +772,7 @@ defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2
defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>;
defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>;
@ -1107,6 +1108,7 @@ defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>;
defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
}

View File

@ -525,6 +525,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU
defm : X86WriteRes<WriteFMove, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveX, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [JFPU01, JFPX], 2, [1, 1], 1>;
@ -682,6 +683,7 @@ defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveY, [JFPU01, JVALU], 1, [2, 2], 2>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [JFPU01, JFPX], 8, [1, 1], 2>;

View File

@ -200,6 +200,7 @@ def : WriteRes<WriteFMaskedStore64Y, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveY, [SLM_FPC_RSV01]>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
@ -345,6 +346,7 @@ def : WriteRes<WriteVecMaskedStore64Y, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
def : WriteRes<WriteVecMoveToGpr, [SLM_IEC_RSV01]>;
def : WriteRes<WriteVecMoveFromGpr, [SLM_IEC_RSV01]>;

View File

@ -286,6 +286,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
@ -404,6 +405,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>;
defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;

View File

@ -274,6 +274,7 @@ defm : X86WriteRes<WriteFStoreNTY, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteFMove, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [Zn2FPU], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : Zn2WriteResFpuPair<WriteFAdd, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAddX, [Zn2FPU0], 3>;
@ -388,6 +389,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [Zn2FPU], 2, [1], 2>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [Zn2FPU2], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [Zn2FPU2], 3, [1], 1>;
defm : X86WriteRes<WriteEMMS, [Zn2FPU], 2, [1], 1>;

View File

@ -1446,10 +1446,12 @@ defm : Zn3WriteResInt<WriteXCHG, [Zn3ALU0123], 0, [8], 2>; // Compare+Exc
defm : Zn3WriteResXMM<WriteFMove, [Zn3FPVMisc0123], 1, [1], 1>; // Empty sched class
defm : Zn3WriteResXMM<WriteFMoveX, [], 0, [], 1>;
defm : Zn3WriteResYMM<WriteFMoveY, [], 0, [], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : Zn3WriteResXMM<WriteVecMove, [Zn3FPFMisc0123], 1, [1], 1>; // MMX
defm : Zn3WriteResXMM<WriteVecMoveX, [], 0, [], 1>;
defm : Zn3WriteResYMM<WriteVecMoveY, [], 0, [], 1>;
defm : X86WriteResUnsupported<WriteVecMoveZ>;
def : IsOptimizableRegisterMove<[
InstructionEquivalenceClass<[

View File

@ -1068,21 +1068,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 4 0.50 vminps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 11 0.50 * vminps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 11 0.50 * vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19
@ -1090,37 +1090,37 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovshdup (%rax), %zmm19
@ -1134,21 +1134,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 1 1.00 vmovsldup %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 4 0.50 vmulpd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %zmm17, %zmm19
@ -1671,7 +1671,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: - 612.00 236.67 57.67 278.83 278.83 16.00 555.67 2.00 5.33 - -
# CHECK-NEXT: - 612.00 240.67 49.67 278.83 278.83 16.00 559.67 2.00 5.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@ -1859,21 +1859,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vminps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovapd %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovapd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovapd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovaps %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovaps %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovaps %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19
@ -1881,37 +1881,37 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa32 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa64 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu32 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu64 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovshdup %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovshdup (%rax), %zmm19
@ -1925,21 +1925,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovsldup (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovsldup %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovsldup (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovupd %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovupd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovupd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovups %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovups %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovups %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmulpd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vmulpd (%rax), %zmm17, %zmm19

View File

@ -298,21 +298,21 @@ vpmovw2m %zmm0, %k0
# CHECK-NEXT: 1 4 1.00 kshiftrq $2, %k1, %k2
# CHECK-NEXT: 1 4 1.00 kunpckdq %k0, %k1, %k2
# CHECK-NEXT: 1 4 1.00 kunpckwd %k0, %k1, %k2
# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19
# CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpabsb (%rax), %zmm19
@ -525,7 +525,7 @@ vpmovw2m %zmm0, %k0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: - - 36.50 13.50 52.50 52.50 6.00 181.50 0.50 2.00 - -
# CHECK-NEXT: - - 37.50 11.50 52.50 52.50 6.00 182.50 0.50 2.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@ -547,21 +547,21 @@ vpmovw2m %zmm0, %k0
# CHECK-NEXT: - - - - - - - 1.00 - - - - kshiftrq $2, %k1, %k2
# CHECK-NEXT: - - - - - - - 1.00 - - - - kunpckdq %k0, %k1, %k2
# CHECK-NEXT: - - - - - - - 1.00 - - - - kunpckwd %k0, %k1, %k2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu8 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19
# CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 - - vmovdqu8 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu8 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 - - vmovdqu8 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu8 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu16 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu16 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu16 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.00 - - - - - - - - - vpabsb %zmm16, %zmm19
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vpabsb (%rax), %zmm19

View File

@ -1068,21 +1068,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 4 0.50 vminps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 11 0.50 * vminps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 11 0.50 * vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19
@ -1090,37 +1090,37 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovshdup (%rax), %zmm19
@ -1134,21 +1134,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 1 1.00 vmovsldup %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 4 0.50 vmulpd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %zmm17, %zmm19
@ -1669,7 +1669,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 612.00 236.67 57.67 278.83 278.83 16.00 555.67 2.00 5.33
# CHECK-NEXT: - 612.00 240.67 49.67 278.83 278.83 16.00 559.67 2.00 5.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@ -1857,21 +1857,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vminps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovapd %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovapd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovapd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovaps %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovaps %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovaps %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19
@ -1879,37 +1879,37 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa32 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa64 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu32 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu32 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu32 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu64 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu64 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu64 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - 1.00 - - vmovshdup %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovshdup (%rax), %zmm19
@ -1923,21 +1923,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovsldup (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vmovsldup %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovupd %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovupd %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovupd %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovups %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovups %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovups %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmulpd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vmulpd (%rax), %zmm17, %zmm19

View File

@ -298,21 +298,21 @@ vpmovw2m %zmm0, %k0
# CHECK-NEXT: 1 4 1.00 kshiftrq $2, %k1, %k2
# CHECK-NEXT: 1 4 1.00 kunpckdq %k0, %k1, %k2
# CHECK-NEXT: 1 4 1.00 kunpckwd %k0, %k1, %k2
# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19
# CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19
# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19
# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19
# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1}
# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) {%k1}
# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpabsb (%rax), %zmm19
@ -523,7 +523,7 @@ vpmovw2m %zmm0, %k0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 36.50 13.50 52.50 52.50 6.00 181.50 0.50 2.00
# CHECK-NEXT: - - 37.50 11.50 52.50 52.50 6.00 182.50 0.50 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@ -545,21 +545,21 @@ vpmovw2m %zmm0, %k0
# CHECK-NEXT: - - - - - - - 1.00 - - kshiftrq $2, %k1, %k2
# CHECK-NEXT: - - - - - - - 1.00 - - kunpckdq %k0, %k1, %k2
# CHECK-NEXT: - - - - - - - 1.00 - - kunpckwd %k0, %k1, %k2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu8 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19
# CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 vmovdqu8 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu8 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 vmovdqu8 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu8 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu16 %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %zmm16, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu16 %zmm16, %zmm19 {%k1}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1}
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %zmm16, (%rax) {%k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu16 %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.00 - - - - - - - vpabsb %zmm16, %zmm19
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpabsb (%rax), %zmm19