[mlir][affine] Support affine.parallel in the index set analysis
Support affine.parallel in the index set analysis. It allows us to do dependence analysis containing affine.parallel in addition to affine.for and affine.if. This change only supports the constant lower/upper bound in affine.parallel. Other complicated affine map bounds will be supported in further commits. See https://github.com/llvm/llvm-project/issues/57327 Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D136056
This commit is contained in:
parent
8be0d8fb83
commit
1d541bd920
|
@ -24,6 +24,7 @@ namespace mlir {
|
||||||
class AffineCondition;
|
class AffineCondition;
|
||||||
class AffineForOp;
|
class AffineForOp;
|
||||||
class AffineIfOp;
|
class AffineIfOp;
|
||||||
|
class AffineParallelOp;
|
||||||
class AffineMap;
|
class AffineMap;
|
||||||
class AffineValueMap;
|
class AffineValueMap;
|
||||||
class IntegerSet;
|
class IntegerSet;
|
||||||
|
@ -141,6 +142,13 @@ public:
|
||||||
// TODO: add support for non-unit strides.
|
// TODO: add support for non-unit strides.
|
||||||
LogicalResult addAffineForOpDomain(AffineForOp forOp);
|
LogicalResult addAffineForOpDomain(AffineForOp forOp);
|
||||||
|
|
||||||
|
/// Add constraints (lower and upper bounds) for the specified
|
||||||
|
/// 'affine.parallel' operation's Value using IR information stored in its
|
||||||
|
/// bound maps. Returns failure for the yet unimplemented/unsupported cases.
|
||||||
|
/// Asserts if the Value corresponding to the 'affine.parallel' operation
|
||||||
|
/// isn't found in the constraint system.
|
||||||
|
LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp);
|
||||||
|
|
||||||
/// Adds constraints (lower and upper bounds) for each loop in the loop nest
|
/// Adds constraints (lower and upper bounds) for each loop in the loop nest
|
||||||
/// described by the bound maps `lbMaps` and `ubMaps` of a computation slice.
|
/// described by the bound maps `lbMaps` and `ubMaps` of a computation slice.
|
||||||
/// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in
|
/// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in
|
||||||
|
|
|
@ -450,6 +450,11 @@ AffineForOp getForInductionVarOwner(Value val);
|
||||||
void extractForInductionVars(ArrayRef<AffineForOp> forInsts,
|
void extractForInductionVars(ArrayRef<AffineForOp> forInsts,
|
||||||
SmallVectorImpl<Value> *ivs);
|
SmallVectorImpl<Value> *ivs);
|
||||||
|
|
||||||
|
/// Extracts the induction variables from a list of either AffineForOp or
|
||||||
|
/// AffineParallelOp and places them in the output argument `ivs`.
|
||||||
|
void extractInductionVars(ArrayRef<Operation *> affineOps,
|
||||||
|
SmallVectorImpl<Value> &ivs);
|
||||||
|
|
||||||
/// Builds a perfect nest of affine.for loops, i.e., each loop except the
|
/// Builds a perfect nest of affine.for loops, i.e., each loop except the
|
||||||
/// innermost one contains only another loop and a terminator. The loops iterate
|
/// innermost one contains only another loop and a terminator. The loops iterate
|
||||||
/// from "lbs" to "ubs" with "steps". The body of the innermost loop is
|
/// from "lbs" to "ubs" with "steps". The body of the innermost loop is
|
||||||
|
|
|
@ -240,27 +240,36 @@ void mlir::getReachableAffineApplyOps(
|
||||||
LogicalResult mlir::getIndexSet(MutableArrayRef<Operation *> ops,
|
LogicalResult mlir::getIndexSet(MutableArrayRef<Operation *> ops,
|
||||||
FlatAffineValueConstraints *domain) {
|
FlatAffineValueConstraints *domain) {
|
||||||
SmallVector<Value, 4> indices;
|
SmallVector<Value, 4> indices;
|
||||||
SmallVector<AffineForOp, 8> forOps;
|
SmallVector<Operation *, 8> loopOps;
|
||||||
|
size_t numDims = 0;
|
||||||
for (Operation *op : ops) {
|
for (Operation *op : ops) {
|
||||||
if (!isa<AffineForOp, AffineIfOp>(op)) {
|
if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
|
||||||
// TODO: Support affine.parallel ops.
|
LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
|
||||||
LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if ops");
|
"parallel ops");
|
||||||
return failure();
|
return failure();
|
||||||
}
|
}
|
||||||
if (AffineForOp forOp = dyn_cast<AffineForOp>(op))
|
if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
|
||||||
forOps.push_back(forOp);
|
loopOps.push_back(forOp);
|
||||||
|
// An AffineForOp retains only 1 induction variable.
|
||||||
|
numDims += 1;
|
||||||
|
} else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
|
||||||
|
loopOps.push_back(parallelOp);
|
||||||
|
numDims += parallelOp.getNumDims();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
extractForInductionVars(forOps, &indices);
|
extractInductionVars(loopOps, indices);
|
||||||
// Reset while associated Values in 'indices' to the domain.
|
// Reset while associating Values in 'indices' to the domain.
|
||||||
domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices);
|
domain->reset(numDims, /*numSymbols=*/0, /*numLocals=*/0, indices);
|
||||||
for (Operation *op : ops) {
|
for (Operation *op : ops) {
|
||||||
// Add constraints from forOp's bounds.
|
// Add constraints from forOp's bounds.
|
||||||
if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
|
if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
|
||||||
if (failed(domain->addAffineForOpDomain(forOp)))
|
if (failed(domain->addAffineForOpDomain(forOp)))
|
||||||
return failure();
|
return failure();
|
||||||
} else if (AffineIfOp ifOp = dyn_cast<AffineIfOp>(op)) {
|
} else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
|
||||||
domain->addAffineIfOpDomain(ifOp);
|
domain->addAffineIfOpDomain(ifOp);
|
||||||
}
|
} else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
|
||||||
|
if (failed(domain->addAffineParallelOpDomain(parallelOp)))
|
||||||
|
return failure();
|
||||||
}
|
}
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
|
@ -594,6 +603,12 @@ DependenceResult mlir::checkMemrefAccessDependence(
|
||||||
if (srcAccess.memref != dstAccess.memref)
|
if (srcAccess.memref != dstAccess.memref)
|
||||||
return DependenceResult::NoDependence;
|
return DependenceResult::NoDependence;
|
||||||
|
|
||||||
|
// TODO: Support affine.parallel which does not specify the ordering.
|
||||||
|
auto srcParent = srcAccess.opInst->getParentOfType<AffineParallelOp>();
|
||||||
|
auto dstParent = dstAccess.opInst->getParentOfType<AffineParallelOp>();
|
||||||
|
if (srcParent || dstParent)
|
||||||
|
return DependenceResult::Failure;
|
||||||
|
|
||||||
// Return 'NoDependence' if one of these accesses is not an
|
// Return 'NoDependence' if one of these accesses is not an
|
||||||
// AffineWriteOpInterface.
|
// AffineWriteOpInterface.
|
||||||
if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
|
if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
|
||||||
|
|
|
@ -639,6 +639,33 @@ FlatAffineValueConstraints::addAffineForOpDomain(AffineForOp forOp) {
|
||||||
forOp.getUpperBoundOperands());
|
forOp.getUpperBoundOperands());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LogicalResult FlatAffineValueConstraints::addAffineParallelOpDomain(
|
||||||
|
AffineParallelOp parallelOp) {
|
||||||
|
size_t ivPos = 0;
|
||||||
|
for (auto iv : parallelOp.getIVs()) {
|
||||||
|
unsigned pos;
|
||||||
|
if (!findVar(iv, &pos)) {
|
||||||
|
assert(false && "variable expected for the IV value");
|
||||||
|
return failure();
|
||||||
|
}
|
||||||
|
|
||||||
|
AffineMap lowerBound = parallelOp.getLowerBoundMap(ivPos);
|
||||||
|
if (lowerBound.isConstant())
|
||||||
|
addBound(BoundType::LB, pos, lowerBound.getSingleConstantResult());
|
||||||
|
else if (failed(addBound(BoundType::LB, pos, lowerBound,
|
||||||
|
parallelOp.getLowerBoundsOperands())))
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
auto upperBound = parallelOp.getUpperBoundMap(ivPos);
|
||||||
|
if (upperBound.isConstant())
|
||||||
|
addBound(BoundType::UB, pos, upperBound.getSingleConstantResult());
|
||||||
|
else if (failed(addBound(BoundType::UB, pos, upperBound,
|
||||||
|
parallelOp.getUpperBoundsOperands())))
|
||||||
|
return failure();
|
||||||
|
}
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
|
||||||
LogicalResult
|
LogicalResult
|
||||||
FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef<AffineMap> lbMaps,
|
FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef<AffineMap> lbMaps,
|
||||||
ArrayRef<AffineMap> ubMaps,
|
ArrayRef<AffineMap> ubMaps,
|
||||||
|
|
|
@ -2318,6 +2318,19 @@ void mlir::extractForInductionVars(ArrayRef<AffineForOp> forInsts,
|
||||||
ivs->push_back(forInst.getInductionVar());
|
ivs->push_back(forInst.getInductionVar());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mlir::extractInductionVars(ArrayRef<mlir::Operation *> affineOps,
|
||||||
|
SmallVectorImpl<mlir::Value> &ivs) {
|
||||||
|
ivs.reserve(affineOps.size());
|
||||||
|
for (Operation *op : affineOps) {
|
||||||
|
// Add constraints from forOp's bounds.
|
||||||
|
if (auto forOp = dyn_cast<AffineForOp>(op))
|
||||||
|
ivs.push_back(forOp.getInductionVar());
|
||||||
|
else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
|
||||||
|
for (size_t i = 0; i < parallelOp.getBody()->getNumArguments(); i++)
|
||||||
|
ivs.push_back(parallelOp.getBody()->getArgument(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Builds an affine loop nest, using "loopCreatorFn" to create individual loop
|
/// Builds an affine loop nest, using "loopCreatorFn" to create individual loop
|
||||||
/// operations.
|
/// operations.
|
||||||
template <typename BoundListTy, typename LoopCreatorTy>
|
template <typename BoundListTy, typename LoopCreatorTy>
|
||||||
|
|
|
@ -788,3 +788,61 @@ func.func @no_forwarding_across_scopes() -> memref<1xf32> {
|
||||||
}
|
}
|
||||||
return %A : memref<1xf32>
|
return %A : memref<1xf32>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: func @parallel_store_load() {
|
||||||
|
func.func @parallel_store_load() {
|
||||||
|
%cf7 = arith.constant 7.0 : f32
|
||||||
|
%m = memref.alloc() : memref<10xf32>
|
||||||
|
affine.parallel (%i0) = (0) to (10) {
|
||||||
|
affine.store %cf7, %m[%i0] : memref<10xf32>
|
||||||
|
%v0 = affine.load %m[%i0] : memref<10xf32>
|
||||||
|
%v1 = arith.addf %v0, %v0 : f32
|
||||||
|
}
|
||||||
|
memref.dealloc %m : memref<10xf32>
|
||||||
|
return
|
||||||
|
// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32
|
||||||
|
// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (10) {
|
||||||
|
// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32
|
||||||
|
// CHECK-NEXT: }
|
||||||
|
// CHECK-NEXT: return
|
||||||
|
}
|
||||||
|
|
||||||
|
func.func @non_constant_parallel_store_load(%N : index) {
|
||||||
|
%cf7 = arith.constant 7.0 : f32
|
||||||
|
%m = memref.alloc() : memref<10xf32>
|
||||||
|
affine.parallel (%i0) = (0) to (%N) {
|
||||||
|
affine.store %cf7, %m[%i0] : memref<10xf32>
|
||||||
|
%v0 = affine.load %m[%i0] : memref<10xf32>
|
||||||
|
%v1 = arith.addf %v0, %v0 : f32
|
||||||
|
}
|
||||||
|
memref.dealloc %m : memref<10xf32>
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// CHECK: func.func @non_constant_parallel_store_load(%[[ARG0:.*]]: index) {
|
||||||
|
// CHECK-NEXT: %[[C7:.*]] = arith.constant 7.000000e+00 : f32
|
||||||
|
// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (%[[ARG0]]) {
|
||||||
|
// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32
|
||||||
|
// CHECK-NEXT: }
|
||||||
|
// CHECK-NEXT: return
|
||||||
|
|
||||||
|
// CHECK-LABEL: func @parallel_surrounding_for() {
|
||||||
|
func.func @parallel_surrounding_for() {
|
||||||
|
%cf7 = arith.constant 7.0 : f32
|
||||||
|
%m = memref.alloc() : memref<10x10xf32>
|
||||||
|
affine.parallel (%i0) = (0) to (10) {
|
||||||
|
affine.for %i1 = 0 to 10 {
|
||||||
|
affine.store %cf7, %m[%i0,%i1] : memref<10x10xf32>
|
||||||
|
%v0 = affine.load %m[%i0,%i1] : memref<10x10xf32>
|
||||||
|
%v1 = arith.addf %v0, %v0 : f32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
memref.dealloc %m : memref<10x10xf32>
|
||||||
|
return
|
||||||
|
// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32
|
||||||
|
// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (10) {
|
||||||
|
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
|
||||||
|
// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32
|
||||||
|
// CHECK-NEXT: }
|
||||||
|
// CHECK-NEXT: }
|
||||||
|
// CHECK-NEXT: return
|
||||||
|
}
|
||||||
|
|
|
@ -1064,3 +1064,19 @@ func.func @test_interleaved_affine_for_if() {
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
// CHECK-LABEL: func @parallel_dependence_check_failure() {
|
||||||
|
func.func @parallel_dependence_check_failure() {
|
||||||
|
%0 = memref.alloc() : memref<10xf32>
|
||||||
|
%cst = arith.constant 7.000000e+00 : f32
|
||||||
|
affine.parallel (%i0) = (0) to (10) {
|
||||||
|
// expected-error @+1 {{dependence check failed}}
|
||||||
|
affine.store %cst, %0[%i0] : memref<10xf32>
|
||||||
|
}
|
||||||
|
affine.parallel (%i1) = (0) to (10) {
|
||||||
|
// expected-error @+1 {{dependence check failed}}
|
||||||
|
%1 = affine.load %0[%i1] : memref<10xf32>
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
|
@ -86,15 +86,18 @@ static void checkDependences(ArrayRef<Operation *> loadsAndStores) {
|
||||||
DependenceResult result = checkMemrefAccessDependence(
|
DependenceResult result = checkMemrefAccessDependence(
|
||||||
srcAccess, dstAccess, d, &dependenceConstraints,
|
srcAccess, dstAccess, d, &dependenceConstraints,
|
||||||
&dependenceComponents);
|
&dependenceComponents);
|
||||||
assert(result.value != DependenceResult::Failure);
|
if (result.value == DependenceResult::Failure) {
|
||||||
bool ret = hasDependence(result);
|
srcOpInst->emitError("dependence check failed");
|
||||||
// TODO: Print dependence type (i.e. RAW, etc) and print
|
} else {
|
||||||
// distance vectors as: ([2, 3], [0, 10]). Also, shorten distance
|
bool ret = hasDependence(result);
|
||||||
// vectors from ([1, 1], [3, 3]) to (1, 3).
|
// TODO: Print dependence type (i.e. RAW, etc) and print
|
||||||
srcOpInst->emitRemark("dependence from ")
|
// distance vectors as: ([2, 3], [0, 10]). Also, shorten distance
|
||||||
<< i << " to " << j << " at depth " << d << " = "
|
// vectors from ([1, 1], [3, 3]) to (1, 3).
|
||||||
<< getDirectionVectorStr(ret, numCommonLoops, d,
|
srcOpInst->emitRemark("dependence from ")
|
||||||
dependenceComponents);
|
<< i << " to " << j << " at depth " << d << " = "
|
||||||
|
<< getDirectionVectorStr(ret, numCommonLoops, d,
|
||||||
|
dependenceComponents);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue