[SLP]Fix PR51320: Try to vectorize single store operands.
Currently, we try to vectorize values, feeding into stores, only if slp-vectorize-hor-store option is provided. We can safely enable vectorization of the value operand of a single store in the basic block, if the operand value is used only in store. It should enable extra vectorization and should not increase compile time significantly. Fixes https://github.com/llvm/llvm-project/issues/51320 Differential Revision: https://reviews.llvm.org/D131894
This commit is contained in:
parent
b812db1464
commit
65c7cecb13
|
@ -12238,7 +12238,20 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||
(it->getType()->isVoidTy() || isa<CallInst, InvokeInst>(it))) {
|
||||
KeyNodes.insert(&*it);
|
||||
bool OpsChanged = false;
|
||||
if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(it)) {
|
||||
auto *SI = dyn_cast<StoreInst>(it);
|
||||
bool TryToVectorizeRoot = ShouldStartVectorizeHorAtStore || !SI;
|
||||
if (SI) {
|
||||
auto I = Stores.find(getUnderlyingObject(SI->getPointerOperand()));
|
||||
// Try to vectorize chain in store, if this is the only store to the
|
||||
// address in the block.
|
||||
// TODO: This is just a temporarily solution to save compile time. Need
|
||||
// to investigate if we can safely turn on slp-vectorize-hor-store
|
||||
// instead to allow lookup for reduction chains in all non-vectorized
|
||||
// stores (need to check side effects and compile time).
|
||||
TryToVectorizeRoot = (I == Stores.end() || I->second.size() == 1) &&
|
||||
SI->getValueOperand()->hasOneUse();
|
||||
}
|
||||
if (TryToVectorizeRoot) {
|
||||
for (auto *V : it->operand_values()) {
|
||||
// Try to match and vectorize a horizontal reduction.
|
||||
OpsChanged |= vectorizeRootInstruction(nullptr, V, BB, R, TTI);
|
||||
|
|
|
@ -37,13 +37,13 @@ define float @f(<2 x float> %x) {
|
|||
|
||||
define float @f_used_out_of_tree(<2 x float> %x) {
|
||||
; CHECK-LABEL: @f_used_out_of_tree(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]]
|
||||
; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
|
||||
; CHECK-NEXT: store float [[ADD]], float* @a, align 4
|
||||
; CHECK-NEXT: ret float [[X0]]
|
||||
; CHECK-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
; THRESH1-LABEL: @f_used_out_of_tree(
|
||||
; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue