[SLP]Fix PR59098: check if the vector type is scalarized for

extractelements.

If the resulting type is going to be scalarized, no need to adjust the
cost of removed extractelement and insert/extract subvector costs.
Otherwise, the compiler can crash because of the wrong type sizes.
This commit is contained in:
Alexey Bataev 2022-11-21 09:59:56 -08:00
parent 70180ee0ae
commit ac93b61165
2 changed files with 25 additions and 3 deletions

View File

@ -6275,6 +6275,10 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// FIXME: it tries to fix a problem with MSVC buildbots.
TargetTransformInfo *TTI = this->TTI;
auto AdjustExtractsCost = [=](InstructionCost &Cost) {
// If the resulting type is scalarized, do not adjust the cost.
unsigned VecNumParts = TTI->getNumberOfParts(VecTy);
if (VecNumParts == VecTy->getNumElements())
return;
DenseMap<Value *, int> ExtractVectorsTys;
SmallPtrSet<Value *, 4> CheckedExtracts;
for (auto *V : VL) {
@ -6296,8 +6300,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (!EEIdx)
continue;
unsigned Idx = *EEIdx;
if (TTI->getNumberOfParts(VecTy) !=
TTI->getNumberOfParts(EE->getVectorOperandType())) {
if (VecNumParts != TTI->getNumberOfParts(EE->getVectorOperandType())) {
auto It =
ExtractVectorsTys.try_emplace(EE->getVectorOperand(), Idx).first;
It->getSecond() = std::min<int>(It->second, Idx);
@ -6328,7 +6331,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
unsigned NumElts = VecTy->getNumElements();
if (Data.second % NumElts == 0)
continue;
if (TTI->getNumberOfParts(EEVTy) > TTI->getNumberOfParts(VecTy)) {
if (TTI->getNumberOfParts(EEVTy) > VecNumParts) {
unsigned Idx = (Data.second / NumElts) * NumElts;
unsigned EENumElts = EEVTy->getNumElements();
if (Idx + NumElts <= EENumElts) {

View File

@ -0,0 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x half> zeroinitializer, i64 1
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une half [[TMP0]], 0xH0000
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x half> zeroinitializer, i64 1
; CHECK-NEXT: [[TOBOOL3:%.*]] = fcmp une half [[TMP1]], 0xH0000
; CHECK-NEXT: ret void
;
entry:
%0 = extractelement <8 x half> zeroinitializer, i64 1
%tobool = fcmp une half %0, 0xH0000
%1 = extractelement <8 x half> zeroinitializer, i64 1
%tobool3 = fcmp une half %1, 0xH0000
ret void
}