[mlir][affine] Support affine.parallel in the index set analysis

Support affine.parallel in the index set analysis. It allows us to do dependence analysis containing affine.parallel in addition to affine.for and affine.if. This change only supports the constant lower/upper bound in affine.parallel. Other complicated affine map bounds will be supported in further commits.

See https://github.com/llvm/llvm-project/issues/57327

Reviewed By: bondhugula

Differential Revision: https://reviews.llvm.org/D136056
This commit is contained in:
Kai Sasaki 2022-12-04 20:09:01 +09:00
parent 8be0d8fb83
commit 1d541bd920
8 changed files with 165 additions and 20 deletions

View File

@ -24,6 +24,7 @@ namespace mlir {
class AffineCondition; class AffineCondition;
class AffineForOp; class AffineForOp;
class AffineIfOp; class AffineIfOp;
class AffineParallelOp;
class AffineMap; class AffineMap;
class AffineValueMap; class AffineValueMap;
class IntegerSet; class IntegerSet;
@ -141,6 +142,13 @@ public:
// TODO: add support for non-unit strides. // TODO: add support for non-unit strides.
LogicalResult addAffineForOpDomain(AffineForOp forOp); LogicalResult addAffineForOpDomain(AffineForOp forOp);
/// Add constraints (lower and upper bounds) for the specified
/// 'affine.parallel' operation's Value using IR information stored in its
/// bound maps. Returns failure for the yet unimplemented/unsupported cases.
/// Asserts if the Value corresponding to the 'affine.parallel' operation
/// isn't found in the constraint system.
LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp);
/// Adds constraints (lower and upper bounds) for each loop in the loop nest /// Adds constraints (lower and upper bounds) for each loop in the loop nest
/// described by the bound maps `lbMaps` and `ubMaps` of a computation slice. /// described by the bound maps `lbMaps` and `ubMaps` of a computation slice.
/// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in /// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in

View File

@ -450,6 +450,11 @@ AffineForOp getForInductionVarOwner(Value val);
void extractForInductionVars(ArrayRef<AffineForOp> forInsts, void extractForInductionVars(ArrayRef<AffineForOp> forInsts,
SmallVectorImpl<Value> *ivs); SmallVectorImpl<Value> *ivs);
/// Extracts the induction variables from a list of either AffineForOp or
/// AffineParallelOp and places them in the output argument `ivs`.
void extractInductionVars(ArrayRef<Operation *> affineOps,
SmallVectorImpl<Value> &ivs);
/// Builds a perfect nest of affine.for loops, i.e., each loop except the /// Builds a perfect nest of affine.for loops, i.e., each loop except the
/// innermost one contains only another loop and a terminator. The loops iterate /// innermost one contains only another loop and a terminator. The loops iterate
/// from "lbs" to "ubs" with "steps". The body of the innermost loop is /// from "lbs" to "ubs" with "steps". The body of the innermost loop is

View File

@ -240,27 +240,36 @@ void mlir::getReachableAffineApplyOps(
LogicalResult mlir::getIndexSet(MutableArrayRef<Operation *> ops, LogicalResult mlir::getIndexSet(MutableArrayRef<Operation *> ops,
FlatAffineValueConstraints *domain) { FlatAffineValueConstraints *domain) {
SmallVector<Value, 4> indices; SmallVector<Value, 4> indices;
SmallVector<AffineForOp, 8> forOps; SmallVector<Operation *, 8> loopOps;
size_t numDims = 0;
for (Operation *op : ops) { for (Operation *op : ops) {
if (!isa<AffineForOp, AffineIfOp>(op)) { if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
// TODO: Support affine.parallel ops. LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if ops"); "parallel ops");
return failure(); return failure();
} }
if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
forOps.push_back(forOp); loopOps.push_back(forOp);
// An AffineForOp retains only 1 induction variable.
numDims += 1;
} else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
loopOps.push_back(parallelOp);
numDims += parallelOp.getNumDims();
}
} }
extractForInductionVars(forOps, &indices); extractInductionVars(loopOps, indices);
// Reset while associated Values in 'indices' to the domain. // Reset while associating Values in 'indices' to the domain.
domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices); domain->reset(numDims, /*numSymbols=*/0, /*numLocals=*/0, indices);
for (Operation *op : ops) { for (Operation *op : ops) {
// Add constraints from forOp's bounds. // Add constraints from forOp's bounds.
if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) { if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
if (failed(domain->addAffineForOpDomain(forOp))) if (failed(domain->addAffineForOpDomain(forOp)))
return failure(); return failure();
} else if (AffineIfOp ifOp = dyn_cast<AffineIfOp>(op)) { } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
domain->addAffineIfOpDomain(ifOp); domain->addAffineIfOpDomain(ifOp);
} } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
if (failed(domain->addAffineParallelOpDomain(parallelOp)))
return failure();
} }
return success(); return success();
} }
@ -594,6 +603,12 @@ DependenceResult mlir::checkMemrefAccessDependence(
if (srcAccess.memref != dstAccess.memref) if (srcAccess.memref != dstAccess.memref)
return DependenceResult::NoDependence; return DependenceResult::NoDependence;
// TODO: Support affine.parallel which does not specify the ordering.
auto srcParent = srcAccess.opInst->getParentOfType<AffineParallelOp>();
auto dstParent = dstAccess.opInst->getParentOfType<AffineParallelOp>();
if (srcParent || dstParent)
return DependenceResult::Failure;
// Return 'NoDependence' if one of these accesses is not an // Return 'NoDependence' if one of these accesses is not an
// AffineWriteOpInterface. // AffineWriteOpInterface.
if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) && if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&

View File

@ -639,6 +639,33 @@ FlatAffineValueConstraints::addAffineForOpDomain(AffineForOp forOp) {
forOp.getUpperBoundOperands()); forOp.getUpperBoundOperands());
} }
LogicalResult FlatAffineValueConstraints::addAffineParallelOpDomain(
AffineParallelOp parallelOp) {
size_t ivPos = 0;
for (auto iv : parallelOp.getIVs()) {
unsigned pos;
if (!findVar(iv, &pos)) {
assert(false && "variable expected for the IV value");
return failure();
}
AffineMap lowerBound = parallelOp.getLowerBoundMap(ivPos);
if (lowerBound.isConstant())
addBound(BoundType::LB, pos, lowerBound.getSingleConstantResult());
else if (failed(addBound(BoundType::LB, pos, lowerBound,
parallelOp.getLowerBoundsOperands())))
return failure();
auto upperBound = parallelOp.getUpperBoundMap(ivPos);
if (upperBound.isConstant())
addBound(BoundType::UB, pos, upperBound.getSingleConstantResult());
else if (failed(addBound(BoundType::UB, pos, upperBound,
parallelOp.getUpperBoundsOperands())))
return failure();
}
return success();
}
LogicalResult LogicalResult
FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef<AffineMap> lbMaps, FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef<AffineMap> lbMaps,
ArrayRef<AffineMap> ubMaps, ArrayRef<AffineMap> ubMaps,

View File

@ -2318,6 +2318,19 @@ void mlir::extractForInductionVars(ArrayRef<AffineForOp> forInsts,
ivs->push_back(forInst.getInductionVar()); ivs->push_back(forInst.getInductionVar());
} }
void mlir::extractInductionVars(ArrayRef<mlir::Operation *> affineOps,
SmallVectorImpl<mlir::Value> &ivs) {
ivs.reserve(affineOps.size());
for (Operation *op : affineOps) {
// Add constraints from forOp's bounds.
if (auto forOp = dyn_cast<AffineForOp>(op))
ivs.push_back(forOp.getInductionVar());
else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
for (size_t i = 0; i < parallelOp.getBody()->getNumArguments(); i++)
ivs.push_back(parallelOp.getBody()->getArgument(i));
}
}
/// Builds an affine loop nest, using "loopCreatorFn" to create individual loop /// Builds an affine loop nest, using "loopCreatorFn" to create individual loop
/// operations. /// operations.
template <typename BoundListTy, typename LoopCreatorTy> template <typename BoundListTy, typename LoopCreatorTy>

View File

@ -788,3 +788,61 @@ func.func @no_forwarding_across_scopes() -> memref<1xf32> {
} }
return %A : memref<1xf32> return %A : memref<1xf32>
} }
// CHECK-LABEL: func @parallel_store_load() {
func.func @parallel_store_load() {
%cf7 = arith.constant 7.0 : f32
%m = memref.alloc() : memref<10xf32>
affine.parallel (%i0) = (0) to (10) {
affine.store %cf7, %m[%i0] : memref<10xf32>
%v0 = affine.load %m[%i0] : memref<10xf32>
%v1 = arith.addf %v0, %v0 : f32
}
memref.dealloc %m : memref<10xf32>
return
// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32
// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (10) {
// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32
// CHECK-NEXT: }
// CHECK-NEXT: return
}
func.func @non_constant_parallel_store_load(%N : index) {
%cf7 = arith.constant 7.0 : f32
%m = memref.alloc() : memref<10xf32>
affine.parallel (%i0) = (0) to (%N) {
affine.store %cf7, %m[%i0] : memref<10xf32>
%v0 = affine.load %m[%i0] : memref<10xf32>
%v1 = arith.addf %v0, %v0 : f32
}
memref.dealloc %m : memref<10xf32>
return
}
// CHECK: func.func @non_constant_parallel_store_load(%[[ARG0:.*]]: index) {
// CHECK-NEXT: %[[C7:.*]] = arith.constant 7.000000e+00 : f32
// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (%[[ARG0]]) {
// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-LABEL: func @parallel_surrounding_for() {
func.func @parallel_surrounding_for() {
%cf7 = arith.constant 7.0 : f32
%m = memref.alloc() : memref<10x10xf32>
affine.parallel (%i0) = (0) to (10) {
affine.for %i1 = 0 to 10 {
affine.store %cf7, %m[%i0,%i1] : memref<10x10xf32>
%v0 = affine.load %m[%i0,%i1] : memref<10x10xf32>
%v1 = arith.addf %v0, %v0 : f32
}
}
memref.dealloc %m : memref<10x10xf32>
return
// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32
// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (10) {
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: return
}

View File

@ -1064,3 +1064,19 @@ func.func @test_interleaved_affine_for_if() {
return return
} }
// -----
// CHECK-LABEL: func @parallel_dependence_check_failure() {
func.func @parallel_dependence_check_failure() {
%0 = memref.alloc() : memref<10xf32>
%cst = arith.constant 7.000000e+00 : f32
affine.parallel (%i0) = (0) to (10) {
// expected-error @+1 {{dependence check failed}}
affine.store %cst, %0[%i0] : memref<10xf32>
}
affine.parallel (%i1) = (0) to (10) {
// expected-error @+1 {{dependence check failed}}
%1 = affine.load %0[%i1] : memref<10xf32>
}
return
}

View File

@ -86,15 +86,18 @@ static void checkDependences(ArrayRef<Operation *> loadsAndStores) {
DependenceResult result = checkMemrefAccessDependence( DependenceResult result = checkMemrefAccessDependence(
srcAccess, dstAccess, d, &dependenceConstraints, srcAccess, dstAccess, d, &dependenceConstraints,
&dependenceComponents); &dependenceComponents);
assert(result.value != DependenceResult::Failure); if (result.value == DependenceResult::Failure) {
bool ret = hasDependence(result); srcOpInst->emitError("dependence check failed");
// TODO: Print dependence type (i.e. RAW, etc) and print } else {
// distance vectors as: ([2, 3], [0, 10]). Also, shorten distance bool ret = hasDependence(result);
// vectors from ([1, 1], [3, 3]) to (1, 3). // TODO: Print dependence type (i.e. RAW, etc) and print
srcOpInst->emitRemark("dependence from ") // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance
<< i << " to " << j << " at depth " << d << " = " // vectors from ([1, 1], [3, 3]) to (1, 3).
<< getDirectionVectorStr(ret, numCommonLoops, d, srcOpInst->emitRemark("dependence from ")
dependenceComponents); << i << " to " << j << " at depth " << d << " = "
<< getDirectionVectorStr(ret, numCommonLoops, d,
dependenceComponents);
}
} }
} }
} }