From a9bd3d37bdec19fc8b4d9aad13f36902c20c7441 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Sat, 23 Jan 2021 00:54:04 +0100 Subject: [PATCH] [NewPM] Add ExtraVectorizerPasses support As it looks like NewPM generally is using SimpleLoopUnswitch instead of LoopUnswitch, this patch also use SimpleLoopUnswitch in the ExtraVectorizerPasses sequence (compared with LegacyPM which use the LoopUnswitch pass). Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D95457 --- llvm/lib/Passes/PassBuilder.cpp | 29 ++++++++++++++++++- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +- llvm/test/Other/opt-pipeline-vector-passes.ll | 26 +++++++++++++++-- 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 3530b1a35b24..d4c4c6e01ef5 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -287,6 +287,7 @@ PipelineTuningOptions::PipelineTuningOptions() { MergeFunctions = false; UniqueLinkageNames = false; } +extern cl::opt ExtraVectorizerPasses; extern cl::opt EnableConstraintElimination; extern cl::opt EnableGVNHoist; @@ -1255,6 +1256,28 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Cleanup after the loop optimization passes. OptimizePM.addPass(InstCombinePass()); + if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correlated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + OptimizePM.addPass(EarlyCSEPass()); + OptimizePM.addPass(CorrelatedValuePropagationPass()); + OptimizePM.addPass(InstCombinePass()); + LoopPassManager LPM(DebugLogging); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM.addPass( + SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); + OptimizePM.addPass(RequireAnalysisPass()); + OptimizePM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true, + DebugLogging)); + OptimizePM.addPass(SimplifyCFGPass()); + OptimizePM.addPass(InstCombinePass()); + } + // Now that we've formed fast to execute loop structures, we do further // optimizations. These are run afterward as they might block doing complex // analyses and transforms such as what are needed for loop vectorization. @@ -1274,8 +1297,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, .sinkCommonInsts(true))); // Optimize parallel scalar instruction chains into SIMD instructions. - if (PTO.SLPVectorization) + if (PTO.SLPVectorization) { OptimizePM.addPass(SLPVectorizerPass()); + if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { + OptimizePM.addPass(EarlyCSEPass()); + } + } // Enhance/cleanup vector code. OptimizePM.addPass(VectorCombinePass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 85acf2ff8bb1..068328391dff 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -60,7 +60,7 @@ UseGVNAfterVectorization("use-gvn-after-vectorization", cl::init(false), cl::Hidden, cl::desc("Run GVN instead of Early CSE after vectorization passes")); -static cl::opt ExtraVectorizerPasses( +cl::opt ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); diff --git a/llvm/test/Other/opt-pipeline-vector-passes.ll b/llvm/test/Other/opt-pipeline-vector-passes.ll index 5a76bfed1685..7fe57da612b5 100644 --- a/llvm/test/Other/opt-pipeline-vector-passes.ll +++ b/llvm/test/Other/opt-pipeline-vector-passes.ll @@ -5,6 +5,7 @@ ; RUN: opt -enable-new-pm=0 -O2 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_FORCE_OFF ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O1 ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2 +; RUN: opt -disable-verify -debug-pass-manager -passes='default' -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2_EXTRA ; REQUIRES: asserts @@ -64,6 +65,27 @@ ; NEWPM_O2: Running pass: SLPVectorizerPass ; NEWPM_O2: Running pass: VectorCombinePass -define void @f() { - ret void +; NEWPM_O2_EXTRA-LABEL: Running pass: LoopVectorizePass +; NEWPM_O2_EXTRA: Running pass: EarlyCSEPass +; NEWPM_O2_EXTRA: Running pass: CorrelatedValuePropagationPass +; NEWPM_O2_EXTRA: Running pass: InstCombinePass +; NEWPM_O2_EXTRA: Running pass: LICMPass +; NEWPM_O2_EXTRA: Running pass: SimpleLoopUnswitchPass +; NEWPM_O2_EXTRA: Running pass: SimplifyCFGPass +; NEWPM_O2_EXTRA: Running pass: InstCombinePass +; NEWPM_O2_EXTRA: Running pass: SLPVectorizerPass +; NEWPM_O2_EXTRA: Running pass: EarlyCSEPass +; NEWPM_O2_EXTRA: Running pass: VectorCombinePass + +define i64 @f(i1 %cond) { +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %inc, %loop ] + %inc = add i64 %i, 1 + br i1 %cond, label %loop, label %exit + +exit: + ret i64 %i }