[mlir][affine] Support affine.parallel in the index set analysis

Support affine.parallel in the index set analysis. It allows us to do dependence analysis containing affine.parallel in addition to affine.for and affine.if. This change only supports the constant lower/upper bound in affine.parallel. Other complicated affine map bounds will be supported in further commits. See https://github.com/llvm/llvm-project/issues/57327 Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D136056
2022-12-04 20:09:01 +09:00 · 2022-12-04 20:09:01 +09:00 · 1d541bd920
parent 8be0d8fb83
commit 1d541bd920
8 changed files with 165 additions and 20 deletions
--- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
@ -24,6 +24,7 @@ namespace mlir {
 class AffineCondition;
 class AffineForOp;
 class AffineIfOp;
 class AffineParallelOp;
 class AffineMap;
 class AffineValueMap;
 class IntegerSet;
@ -141,6 +142,13 @@ public:
  //  TODO: add support for non-unit strides.
  LogicalResult addAffineForOpDomain(AffineForOp forOp);
  /// Add constraints (lower and upper bounds) for the specified
  /// 'affine.parallel' operation's Value using IR information stored in its
  /// bound maps. Returns failure for the yet unimplemented/unsupported cases.
  /// Asserts if the Value corresponding to the 'affine.parallel' operation
  /// isn't found in the constraint system.
  LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp);
  /// Adds constraints (lower and upper bounds) for each loop in the loop nest
  /// described by the bound maps `lbMaps` and `ubMaps` of a computation slice.
  /// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in
--- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
@ -450,6 +450,11 @@ AffineForOp getForInductionVarOwner(Value val);
 void extractForInductionVars(ArrayRef<AffineForOp> forInsts,
                             SmallVectorImpl<Value> *ivs);
 /// Extracts the induction variables from a list of either AffineForOp or
 /// AffineParallelOp and places them in the output argument `ivs`.
 void extractInductionVars(ArrayRef<Operation *> affineOps,
                          SmallVectorImpl<Value> &ivs);
 /// Builds a perfect nest of affine.for loops, i.e., each loop except the
 /// innermost one contains only another loop and a terminator. The loops iterate
 /// from "lbs" to "ubs" with "steps". The body of the innermost loop is
--- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@ -240,27 +240,36 @@ void mlir::getReachableAffineApplyOps(
 LogicalResult mlir::getIndexSet(MutableArrayRef<Operation *> ops,
                                FlatAffineValueConstraints *domain) {
  SmallVector<Value, 4> indices;
-  SmallVector<AffineForOp, 8> forOps;
+  SmallVector<Operation *, 8> loopOps;
  size_t numDims = 0;
  for (Operation *op : ops) {
-    if (!isa<AffineForOp, AffineIfOp>(op)) {
+    if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
-      // TODO: Support affine.parallel ops.
+      LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
-      LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if ops");
+                                 "parallel ops");
      return failure();
    }
-    if (AffineForOp forOp = dyn_cast<AffineForOp>(op))
+    if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
-      forOps.push_back(forOp);
+      loopOps.push_back(forOp);
      // An AffineForOp retains only 1 induction variable.
      numDims += 1;
    } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
      loopOps.push_back(parallelOp);
      numDims += parallelOp.getNumDims();
    }
  }
-  extractForInductionVars(forOps, &indices);
+  extractInductionVars(loopOps, indices);
-  // Reset while associated Values in 'indices' to the domain.
+  // Reset while associating Values in 'indices' to the domain.
-  domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices);
+  domain->reset(numDims, /*numSymbols=*/0, /*numLocals=*/0, indices);
  for (Operation *op : ops) {
    // Add constraints from forOp's bounds.
    if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
      if (failed(domain->addAffineForOpDomain(forOp)))
        return failure();
-    } else if (AffineIfOp ifOp = dyn_cast<AffineIfOp>(op)) {
+    } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
      domain->addAffineIfOpDomain(ifOp);
-    }
+    } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
      if (failed(domain->addAffineParallelOpDomain(parallelOp)))
        return failure();
  }
  return success();
 }
@ -594,6 +603,12 @@ DependenceResult mlir::checkMemrefAccessDependence(
  if (srcAccess.memref != dstAccess.memref)
    return DependenceResult::NoDependence;
  // TODO: Support affine.parallel which does not specify the ordering.
  auto srcParent = srcAccess.opInst->getParentOfType<AffineParallelOp>();
  auto dstParent = dstAccess.opInst->getParentOfType<AffineParallelOp>();
  if (srcParent || dstParent)
    return DependenceResult::Failure;
  // Return 'NoDependence' if one of these accesses is not an
  // AffineWriteOpInterface.
  if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
--- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
@ -639,6 +639,33 @@ FlatAffineValueConstraints::addAffineForOpDomain(AffineForOp forOp) {
                  forOp.getUpperBoundOperands());
 }
 LogicalResult FlatAffineValueConstraints::addAffineParallelOpDomain(
    AffineParallelOp parallelOp) {
  size_t ivPos = 0;
  for (auto iv : parallelOp.getIVs()) {
    unsigned pos;
    if (!findVar(iv, &pos)) {
      assert(false && "variable expected for the IV value");
      return failure();
    }
    AffineMap lowerBound = parallelOp.getLowerBoundMap(ivPos);
    if (lowerBound.isConstant())
      addBound(BoundType::LB, pos, lowerBound.getSingleConstantResult());
    else if (failed(addBound(BoundType::LB, pos, lowerBound,
                             parallelOp.getLowerBoundsOperands())))
      return failure();
    auto upperBound = parallelOp.getUpperBoundMap(ivPos);
    if (upperBound.isConstant())
      addBound(BoundType::UB, pos, upperBound.getSingleConstantResult());
    else if (failed(addBound(BoundType::UB, pos, upperBound,
                             parallelOp.getUpperBoundsOperands())))
      return failure();
  }
  return success();
 }
 LogicalResult
 FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef<AffineMap> lbMaps,
                                                   ArrayRef<AffineMap> ubMaps,
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@ -2318,6 +2318,19 @@ void mlir::extractForInductionVars(ArrayRef<AffineForOp> forInsts,
    ivs->push_back(forInst.getInductionVar());
 }
 void mlir::extractInductionVars(ArrayRef<mlir::Operation *> affineOps,
                                SmallVectorImpl<mlir::Value> &ivs) {
  ivs.reserve(affineOps.size());
  for (Operation *op : affineOps) {
    // Add constraints from forOp's bounds.
    if (auto forOp = dyn_cast<AffineForOp>(op))
      ivs.push_back(forOp.getInductionVar());
    else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
      for (size_t i = 0; i < parallelOp.getBody()->getNumArguments(); i++)
        ivs.push_back(parallelOp.getBody()->getArgument(i));
  }
 }
 /// Builds an affine loop nest, using "loopCreatorFn" to create individual loop
 /// operations.
 template <typename BoundListTy, typename LoopCreatorTy>
--- a/mlir/test/Dialect/Affine/scalrep.mlir
+++ b/mlir/test/Dialect/Affine/scalrep.mlir
@ -788,3 +788,61 @@ func.func @no_forwarding_across_scopes() -> memref<1xf32> {
  }
  return %A : memref<1xf32>
 }
 // CHECK-LABEL: func @parallel_store_load() {
 func.func @parallel_store_load() {
  %cf7 = arith.constant 7.0 : f32
  %m = memref.alloc() : memref<10xf32>
  affine.parallel (%i0) = (0) to (10) {
    affine.store %cf7, %m[%i0] : memref<10xf32>
    %v0 = affine.load %m[%i0] : memref<10xf32>
    %v1 = arith.addf %v0, %v0 : f32
  }
  memref.dealloc %m : memref<10xf32>
  return
 // CHECK:       %[[C7:.*]] = arith.constant 7.000000e+00 : f32
 // CHECK-NEXT:  affine.parallel (%{{.*}}) = (0) to (10) {
 // CHECK-NEXT:    arith.addf %[[C7]], %[[C7]] : f32
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
 }
 func.func @non_constant_parallel_store_load(%N : index) {
  %cf7 = arith.constant 7.0 : f32
  %m = memref.alloc() : memref<10xf32>
  affine.parallel (%i0) = (0) to (%N) {
    affine.store %cf7, %m[%i0] : memref<10xf32>
    %v0 = affine.load %m[%i0] : memref<10xf32>
    %v1 = arith.addf %v0, %v0 : f32
  }
  memref.dealloc %m : memref<10xf32>
  return
 }
 // CHECK: func.func @non_constant_parallel_store_load(%[[ARG0:.*]]: index) {
 // CHECK-NEXT:  %[[C7:.*]] = arith.constant 7.000000e+00 : f32
 // CHECK-NEXT:  affine.parallel (%{{.*}}) = (0) to (%[[ARG0]]) {
 // CHECK-NEXT:    arith.addf %[[C7]], %[[C7]] : f32
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
 // CHECK-LABEL: func @parallel_surrounding_for() {
 func.func @parallel_surrounding_for() {
  %cf7 = arith.constant 7.0 : f32
  %m = memref.alloc() : memref<10x10xf32>
  affine.parallel (%i0) = (0) to (10) {
    affine.for %i1 = 0 to 10 {
      affine.store %cf7, %m[%i0,%i1] : memref<10x10xf32>
      %v0 = affine.load %m[%i0,%i1] : memref<10x10xf32>
      %v1 = arith.addf %v0, %v0 : f32
    }
  }
  memref.dealloc %m : memref<10x10xf32>
  return
 // CHECK:       %[[C7:.*]] = arith.constant 7.000000e+00 : f32
 // CHECK-NEXT:  affine.parallel (%{{.*}}) = (0) to (10) {
 // CHECK-NEXT:    affine.for %{{.*}} = 0 to 10 {
 // CHECK-NEXT:      arith.addf %[[C7]], %[[C7]] : f32
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
 }
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@ -1064,3 +1064,19 @@ func.func @test_interleaved_affine_for_if() {
  return
 }
 // -----
 // CHECK-LABEL: func @parallel_dependence_check_failure() {
 func.func @parallel_dependence_check_failure() {
  %0 = memref.alloc() : memref<10xf32>
  %cst = arith.constant 7.000000e+00 : f32
  affine.parallel (%i0) = (0) to (10) {
    // expected-error @+1 {{dependence check failed}}
    affine.store %cst, %0[%i0] : memref<10xf32>
  }
  affine.parallel (%i1) = (0) to (10) {
    // expected-error @+1 {{dependence check failed}}
    %1 = affine.load %0[%i1] : memref<10xf32>
  }
  return
 }
--- a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp
+++ b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp
@ -86,15 +86,18 @@ static void checkDependences(ArrayRef<Operation *> loadsAndStores) {
        DependenceResult result = checkMemrefAccessDependence(
            srcAccess, dstAccess, d, &dependenceConstraints,
            &dependenceComponents);
-        assert(result.value != DependenceResult::Failure);
+        if (result.value == DependenceResult::Failure) {
-        bool ret = hasDependence(result);
+          srcOpInst->emitError("dependence check failed");
-        // TODO: Print dependence type (i.e. RAW, etc) and print
+        } else {
-        // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance
+          bool ret = hasDependence(result);
-        // vectors from ([1, 1], [3, 3]) to (1, 3).
+          // TODO: Print dependence type (i.e. RAW, etc) and print
-        srcOpInst->emitRemark("dependence from ")
+          // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance
-            << i << " to " << j << " at depth " << d << " = "
+          // vectors from ([1, 1], [3, 3]) to (1, 3).
-            << getDirectionVectorStr(ret, numCommonLoops, d,
+          srcOpInst->emitRemark("dependence from ")
-                                     dependenceComponents);
+              << i << " to " << j << " at depth " << d << " = "
              << getDirectionVectorStr(ret, numCommonLoops, d,
                                       dependenceComponents);
        }
      }
    }
  }