forked from OSchip/llvm-project
1257 lines
46 KiB
C++
1257 lines
46 KiB
C++
//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a profile inference algorithm. Given an incomplete and
|
|
// possibly imprecise block counts, the algorithm reconstructs realistic block
|
|
// and edge counts that satisfy flow conservation rules, while minimally modify
|
|
// input block counts.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Transforms/Utils/SampleProfileInference.h"
|
|
#include "llvm/ADT/BitVector.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include <queue>
|
|
#include <set>
|
|
#include <stack>
|
|
|
|
using namespace llvm;
|
|
#define DEBUG_TYPE "sample-profile-inference"
|
|
|
|
namespace {
|
|
|
|
static cl::opt<bool> SampleProfileEvenCountDistribution(
|
|
"sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
|
|
cl::desc("Try to evenly distribute counts when there are multiple equally "
|
|
"likely options."));
|
|
|
|
static cl::opt<unsigned> SampleProfileMaxDfsCalls(
|
|
"sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
|
|
cl::desc("Maximum number of dfs iterations for even count distribution."));
|
|
|
|
static cl::opt<unsigned> SampleProfileProfiCostInc(
|
|
"sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
|
|
cl::desc("A cost of increasing a block's count by one."));
|
|
|
|
static cl::opt<unsigned> SampleProfileProfiCostDec(
|
|
"sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
|
|
cl::desc("A cost of decreasing a block's count by one."));
|
|
|
|
static cl::opt<unsigned> SampleProfileProfiCostIncZero(
|
|
"sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
|
|
cl::desc("A cost of increasing a count of zero-weight block by one."));
|
|
|
|
static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
|
|
"sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
|
|
cl::desc("A cost of increasing the entry block's count by one."));
|
|
|
|
static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
|
|
"sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
|
|
cl::desc("A cost of decreasing the entry block's count by one."));
|
|
|
|
/// A value indicating an infinite flow/capacity/weight of a block/edge.
|
|
/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
|
|
/// during the execution.
|
|
static constexpr int64_t INF = ((int64_t)1) << 50;
|
|
|
|
/// The minimum-cost maximum flow algorithm.
|
|
///
|
|
/// The algorithm finds the maximum flow of minimum cost on a given (directed)
|
|
/// network using a modified version of the classical Moore-Bellman-Ford
|
|
/// approach. The algorithm applies a number of augmentation iterations in which
|
|
/// flow is sent along paths of positive capacity from the source to the sink.
|
|
/// The worst-case time complexity of the implementation is O(v(f)*m*n), where
|
|
/// where m is the number of edges, n is the number of vertices, and v(f) is the
|
|
/// value of the maximum flow. However, the observed running time on typical
|
|
/// instances is sub-quadratic, that is, o(n^2).
|
|
///
|
|
/// The input is a set of edges with specified costs and capacities, and a pair
|
|
/// of nodes (source and sink). The output is the flow along each edge of the
|
|
/// minimum total cost respecting the given edge capacities.
|
|
class MinCostMaxFlow {
|
|
public:
|
|
// Initialize algorithm's data structures for a network of a given size.
|
|
void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
|
|
Source = SourceNode;
|
|
Target = SinkNode;
|
|
|
|
Nodes = std::vector<Node>(NodeCount);
|
|
Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
|
|
if (SampleProfileEvenCountDistribution)
|
|
AugmentingEdges =
|
|
std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
|
|
}
|
|
|
|
// Run the algorithm.
|
|
int64_t run() {
|
|
// Iteratively find an augmentation path/dag in the network and send the
|
|
// flow along its edges
|
|
size_t AugmentationIters = applyFlowAugmentation();
|
|
|
|
// Compute the total flow and its cost
|
|
int64_t TotalCost = 0;
|
|
int64_t TotalFlow = 0;
|
|
for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
|
|
for (auto &Edge : Edges[Src]) {
|
|
if (Edge.Flow > 0) {
|
|
TotalCost += Edge.Cost * Edge.Flow;
|
|
if (Src == Source)
|
|
TotalFlow += Edge.Flow;
|
|
}
|
|
}
|
|
}
|
|
LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
|
|
<< " iterations with " << TotalFlow << " total flow"
|
|
<< " of " << TotalCost << " cost\n");
|
|
(void)TotalFlow;
|
|
(void)AugmentationIters;
|
|
return TotalCost;
|
|
}
|
|
|
|
/// Adding an edge to the network with a specified capacity and a cost.
|
|
/// Multiple edges between a pair of nodes are allowed but self-edges
|
|
/// are not supported.
|
|
void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
|
|
assert(Capacity > 0 && "adding an edge of zero capacity");
|
|
assert(Src != Dst && "loop edge are not supported");
|
|
|
|
Edge SrcEdge;
|
|
SrcEdge.Dst = Dst;
|
|
SrcEdge.Cost = Cost;
|
|
SrcEdge.Capacity = Capacity;
|
|
SrcEdge.Flow = 0;
|
|
SrcEdge.RevEdgeIndex = Edges[Dst].size();
|
|
|
|
Edge DstEdge;
|
|
DstEdge.Dst = Src;
|
|
DstEdge.Cost = -Cost;
|
|
DstEdge.Capacity = 0;
|
|
DstEdge.Flow = 0;
|
|
DstEdge.RevEdgeIndex = Edges[Src].size();
|
|
|
|
Edges[Src].push_back(SrcEdge);
|
|
Edges[Dst].push_back(DstEdge);
|
|
}
|
|
|
|
/// Adding an edge to the network of infinite capacity and a given cost.
|
|
void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
|
|
addEdge(Src, Dst, INF, Cost);
|
|
}
|
|
|
|
/// Get the total flow from a given source node.
|
|
/// Returns a list of pairs (target node, amount of flow to the target).
|
|
const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
|
|
std::vector<std::pair<uint64_t, int64_t>> Flow;
|
|
for (auto &Edge : Edges[Src]) {
|
|
if (Edge.Flow > 0)
|
|
Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
|
|
}
|
|
return Flow;
|
|
}
|
|
|
|
/// Get the total flow between a pair of nodes.
|
|
int64_t getFlow(uint64_t Src, uint64_t Dst) const {
|
|
int64_t Flow = 0;
|
|
for (auto &Edge : Edges[Src]) {
|
|
if (Edge.Dst == Dst) {
|
|
Flow += Edge.Flow;
|
|
}
|
|
}
|
|
return Flow;
|
|
}
|
|
|
|
/// A cost of taking an unlikely jump.
|
|
static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
|
|
/// Minimum BaseDistance for the jump distance values in island joining.
|
|
static constexpr uint64_t MinBaseDistance = 10000;
|
|
|
|
private:
|
|
/// Iteratively find an augmentation path/dag in the network and send the
|
|
/// flow along its edges. The method returns the number of applied iterations.
|
|
size_t applyFlowAugmentation() {
|
|
size_t AugmentationIters = 0;
|
|
while (findAugmentingPath()) {
|
|
uint64_t PathCapacity = computeAugmentingPathCapacity();
|
|
while (PathCapacity > 0) {
|
|
bool Progress = false;
|
|
if (SampleProfileEvenCountDistribution) {
|
|
// Identify node/edge candidates for augmentation
|
|
identifyShortestEdges(PathCapacity);
|
|
|
|
// Find an augmenting DAG
|
|
auto AugmentingOrder = findAugmentingDAG();
|
|
|
|
// Apply the DAG augmentation
|
|
Progress = augmentFlowAlongDAG(AugmentingOrder);
|
|
PathCapacity = computeAugmentingPathCapacity();
|
|
}
|
|
|
|
if (!Progress) {
|
|
augmentFlowAlongPath(PathCapacity);
|
|
PathCapacity = 0;
|
|
}
|
|
|
|
AugmentationIters++;
|
|
}
|
|
}
|
|
return AugmentationIters;
|
|
}
|
|
|
|
/// Compute the capacity of the cannonical augmenting path. If the path is
|
|
/// saturated (that is, no flow can be sent along the path), then return 0.
|
|
uint64_t computeAugmentingPathCapacity() {
|
|
uint64_t PathCapacity = INF;
|
|
uint64_t Now = Target;
|
|
while (Now != Source) {
|
|
uint64_t Pred = Nodes[Now].ParentNode;
|
|
auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
|
|
|
|
assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
|
|
uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
|
|
PathCapacity = std::min(PathCapacity, EdgeCapacity);
|
|
|
|
Now = Pred;
|
|
}
|
|
return PathCapacity;
|
|
}
|
|
|
|
/// Check for existence of an augmenting path with a positive capacity.
|
|
bool findAugmentingPath() {
|
|
// Initialize data structures
|
|
for (auto &Node : Nodes) {
|
|
Node.Distance = INF;
|
|
Node.ParentNode = uint64_t(-1);
|
|
Node.ParentEdgeIndex = uint64_t(-1);
|
|
Node.Taken = false;
|
|
}
|
|
|
|
std::queue<uint64_t> Queue;
|
|
Queue.push(Source);
|
|
Nodes[Source].Distance = 0;
|
|
Nodes[Source].Taken = true;
|
|
while (!Queue.empty()) {
|
|
uint64_t Src = Queue.front();
|
|
Queue.pop();
|
|
Nodes[Src].Taken = false;
|
|
// Although the residual network contains edges with negative costs
|
|
// (in particular, backward edges), it can be shown that there are no
|
|
// negative-weight cycles and the following two invariants are maintained:
|
|
// (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
|
|
// where Dist is the length of the shortest path between two nodes. This
|
|
// allows to prune the search-space of the path-finding algorithm using
|
|
// the following early-stop criteria:
|
|
// -- If we find a path with zero-distance from Source to Target, stop the
|
|
// search, as the path is the shortest since Dist[Source, Target] >= 0;
|
|
// -- If we have Dist[Source, V] > Dist[Source, Target], then do not
|
|
// process node V, as it is guaranteed _not_ to be on a shortest path
|
|
// from Source to Target; it follows from inequalities
|
|
// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
|
|
// >= Dist[Source, V]
|
|
if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
|
|
break;
|
|
if (Nodes[Src].Distance > Nodes[Target].Distance)
|
|
continue;
|
|
|
|
// Process adjacent edges
|
|
for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
|
|
auto &Edge = Edges[Src][EdgeIdx];
|
|
if (Edge.Flow < Edge.Capacity) {
|
|
uint64_t Dst = Edge.Dst;
|
|
int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
|
|
if (Nodes[Dst].Distance > NewDistance) {
|
|
// Update the distance and the parent node/edge
|
|
Nodes[Dst].Distance = NewDistance;
|
|
Nodes[Dst].ParentNode = Src;
|
|
Nodes[Dst].ParentEdgeIndex = EdgeIdx;
|
|
// Add the node to the queue, if it is not there yet
|
|
if (!Nodes[Dst].Taken) {
|
|
Queue.push(Dst);
|
|
Nodes[Dst].Taken = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return Nodes[Target].Distance != INF;
|
|
}
|
|
|
|
/// Update the current flow along the augmenting path.
|
|
void augmentFlowAlongPath(uint64_t PathCapacity) {
|
|
assert(PathCapacity > 0 && "found an incorrect augmenting path");
|
|
uint64_t Now = Target;
|
|
while (Now != Source) {
|
|
uint64_t Pred = Nodes[Now].ParentNode;
|
|
auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
|
|
auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
|
|
|
|
Edge.Flow += PathCapacity;
|
|
RevEdge.Flow -= PathCapacity;
|
|
|
|
Now = Pred;
|
|
}
|
|
}
|
|
|
|
/// Find an Augmenting DAG order using a modified version of DFS in which we
|
|
/// can visit a node multiple times. In the DFS search, when scanning each
|
|
/// edge out of a node, continue search at Edge.Dst endpoint if it has not
|
|
/// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
|
|
/// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
|
|
/// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
|
|
/// that starts with Source and ends with Target.
|
|
std::vector<uint64_t> findAugmentingDAG() {
|
|
// We use a stack based implemenation of DFS to avoid recursion.
|
|
// Defining DFS data structures:
|
|
// A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
|
|
// - we are currently visiting Nodes[NodeIdx] and
|
|
// - the next edge to scan is Edges[NodeIdx][EdgeIdx]
|
|
typedef std::pair<uint64_t, uint64_t> StackItemType;
|
|
std::stack<StackItemType> Stack;
|
|
std::vector<uint64_t> AugmentingOrder;
|
|
|
|
// Phase 0: Initialize Node attributes and Time for DFS run
|
|
for (auto &Node : Nodes) {
|
|
Node.Discovery = 0;
|
|
Node.Finish = 0;
|
|
Node.NumCalls = 0;
|
|
Node.Taken = false;
|
|
}
|
|
uint64_t Time = 0;
|
|
// Mark Target as Taken
|
|
// Taken attribute will be propagated backwards from Target towards Source
|
|
Nodes[Target].Taken = true;
|
|
|
|
// Phase 1: Start DFS traversal from Source
|
|
Stack.emplace(Source, 0);
|
|
Nodes[Source].Discovery = ++Time;
|
|
while (!Stack.empty()) {
|
|
auto NodeIdx = Stack.top().first;
|
|
auto EdgeIdx = Stack.top().second;
|
|
|
|
// If we haven't scanned all edges out of NodeIdx, continue scanning
|
|
if (EdgeIdx < Edges[NodeIdx].size()) {
|
|
auto &Edge = Edges[NodeIdx][EdgeIdx];
|
|
auto &Dst = Nodes[Edge.Dst];
|
|
Stack.top().second++;
|
|
|
|
if (Edge.OnShortestPath) {
|
|
// If we haven't seen Edge.Dst so far, continue DFS search there
|
|
if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
|
|
Dst.Discovery = ++Time;
|
|
Stack.emplace(Edge.Dst, 0);
|
|
Dst.NumCalls++;
|
|
} else if (Dst.Taken && Dst.Finish != 0) {
|
|
// Else, if Edge.Dst already have a path to Target, so that NodeIdx
|
|
Nodes[NodeIdx].Taken = true;
|
|
}
|
|
}
|
|
} else {
|
|
// If we are done scanning all edge out of NodeIdx
|
|
Stack.pop();
|
|
// If we haven't found a path from NodeIdx to Target, forget about it
|
|
if (!Nodes[NodeIdx].Taken) {
|
|
Nodes[NodeIdx].Discovery = 0;
|
|
} else {
|
|
// If we have found a path from NodeIdx to Target, then finish NodeIdx
|
|
// and propagate Taken flag to DFS parent unless at the Source
|
|
Nodes[NodeIdx].Finish = ++Time;
|
|
// NodeIdx == Source if and only if the stack is empty
|
|
if (NodeIdx != Source) {
|
|
assert(!Stack.empty() && "empty stack while running dfs");
|
|
Nodes[Stack.top().first].Taken = true;
|
|
}
|
|
AugmentingOrder.push_back(NodeIdx);
|
|
}
|
|
}
|
|
}
|
|
// Nodes are collected decreasing Finish time, so the order is reversed
|
|
std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
|
|
|
|
// Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
|
|
for (size_t Src : AugmentingOrder) {
|
|
AugmentingEdges[Src].clear();
|
|
for (auto &Edge : Edges[Src]) {
|
|
uint64_t Dst = Edge.Dst;
|
|
if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
|
|
Nodes[Dst].Finish < Nodes[Src].Finish) {
|
|
AugmentingEdges[Src].push_back(&Edge);
|
|
}
|
|
}
|
|
assert((Src == Target || !AugmentingEdges[Src].empty()) &&
|
|
"incorrectly constructed augmenting edges");
|
|
}
|
|
|
|
return AugmentingOrder;
|
|
}
|
|
|
|
/// Update the current flow along the given (acyclic) subgraph specified by
|
|
/// the vertex order, AugmentingOrder. The objective is to send as much flow
|
|
/// as possible while evenly distributing flow among successors of each node.
|
|
/// After the update at least one edge is saturated.
|
|
bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
|
|
// Phase 0: Initialization
|
|
for (uint64_t Src : AugmentingOrder) {
|
|
Nodes[Src].FracFlow = 0;
|
|
Nodes[Src].IntFlow = 0;
|
|
for (auto &Edge : AugmentingEdges[Src]) {
|
|
Edge->AugmentedFlow = 0;
|
|
}
|
|
}
|
|
|
|
// Phase 1: Send a unit of fractional flow along the DAG
|
|
uint64_t MaxFlowAmount = INF;
|
|
Nodes[Source].FracFlow = 1.0;
|
|
for (uint64_t Src : AugmentingOrder) {
|
|
assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
|
|
"incorrectly computed fractional flow");
|
|
// Distribute flow evenly among successors of Src
|
|
uint64_t Degree = AugmentingEdges[Src].size();
|
|
for (auto &Edge : AugmentingEdges[Src]) {
|
|
double EdgeFlow = Nodes[Src].FracFlow / Degree;
|
|
Nodes[Edge->Dst].FracFlow += EdgeFlow;
|
|
if (Edge->Capacity == INF)
|
|
continue;
|
|
uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
|
|
MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
|
|
}
|
|
}
|
|
// Stop early if we cannot send any (integral) flow from Source to Target
|
|
if (MaxFlowAmount == 0)
|
|
return false;
|
|
|
|
// Phase 2: Send an integral flow of MaxFlowAmount
|
|
Nodes[Source].IntFlow = MaxFlowAmount;
|
|
for (uint64_t Src : AugmentingOrder) {
|
|
if (Src == Target)
|
|
break;
|
|
// Distribute flow evenly among successors of Src, rounding up to make
|
|
// sure all flow is sent
|
|
uint64_t Degree = AugmentingEdges[Src].size();
|
|
// We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
|
|
uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
|
|
for (auto &Edge : AugmentingEdges[Src]) {
|
|
uint64_t Dst = Edge->Dst;
|
|
uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
|
|
EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
|
|
Nodes[Dst].IntFlow += EdgeFlow;
|
|
Nodes[Src].IntFlow -= EdgeFlow;
|
|
Edge->AugmentedFlow += EdgeFlow;
|
|
}
|
|
}
|
|
assert(Nodes[Target].IntFlow <= MaxFlowAmount);
|
|
Nodes[Target].IntFlow = 0;
|
|
|
|
// Phase 3: Send excess flow back traversing the nodes backwards.
|
|
// Because of rounding, not all flow can be sent along the edges of Src.
|
|
// Hence, sending the remaining flow back to maintain flow conservation
|
|
for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
|
|
uint64_t Src = AugmentingOrder[Idx - 1];
|
|
// Try to send excess flow back along each edge.
|
|
// Make sure we only send back flow we just augmented (AugmentedFlow).
|
|
for (auto &Edge : AugmentingEdges[Src]) {
|
|
uint64_t Dst = Edge->Dst;
|
|
if (Nodes[Dst].IntFlow == 0)
|
|
continue;
|
|
uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
|
|
Nodes[Dst].IntFlow -= EdgeFlow;
|
|
Nodes[Src].IntFlow += EdgeFlow;
|
|
Edge->AugmentedFlow -= EdgeFlow;
|
|
}
|
|
}
|
|
|
|
// Phase 4: Update flow values along all edges
|
|
bool HasSaturatedEdges = false;
|
|
for (uint64_t Src : AugmentingOrder) {
|
|
// Verify that we have sent all the excess flow from the node
|
|
assert(Src == Source || Nodes[Src].IntFlow == 0);
|
|
for (auto &Edge : AugmentingEdges[Src]) {
|
|
assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
|
|
// Update flow values along the edge and its reverse copy
|
|
auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
|
|
Edge->Flow += Edge->AugmentedFlow;
|
|
RevEdge.Flow -= Edge->AugmentedFlow;
|
|
if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
|
|
HasSaturatedEdges = true;
|
|
}
|
|
}
|
|
|
|
// The augmentation is successful iff at least one edge becomes saturated
|
|
return HasSaturatedEdges;
|
|
}
|
|
|
|
/// Identify candidate (shortest) edges for augmentation.
|
|
void identifyShortestEdges(uint64_t PathCapacity) {
|
|
assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
|
|
// To make sure the augmentation DAG contains only edges with large residual
|
|
// capacity, we prune all edges whose capacity is below a fraction of
|
|
// the capacity of the augmented path.
|
|
// (All edges of the path itself are always in the DAG)
|
|
uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
|
|
|
|
// Decide which edges are on a shortest path from Source to Target
|
|
for (size_t Src = 0; Src < Nodes.size(); Src++) {
|
|
// An edge cannot be augmenting if the endpoint has large distance
|
|
if (Nodes[Src].Distance > Nodes[Target].Distance)
|
|
continue;
|
|
|
|
for (auto &Edge : Edges[Src]) {
|
|
uint64_t Dst = Edge.Dst;
|
|
Edge.OnShortestPath =
|
|
Src != Target && Dst != Source &&
|
|
Nodes[Dst].Distance <= Nodes[Target].Distance &&
|
|
Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
|
|
Edge.Capacity > Edge.Flow &&
|
|
uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A node in a flow network.
|
|
struct Node {
|
|
/// The cost of the cheapest path from the source to the current node.
|
|
int64_t Distance;
|
|
/// The node preceding the current one in the path.
|
|
uint64_t ParentNode;
|
|
/// The index of the edge between ParentNode and the current node.
|
|
uint64_t ParentEdgeIndex;
|
|
/// An indicator of whether the current node is in a queue.
|
|
bool Taken;
|
|
|
|
/// Data fields utilized in DAG-augmentation:
|
|
/// Fractional flow.
|
|
double FracFlow;
|
|
/// Integral flow.
|
|
uint64_t IntFlow;
|
|
/// Discovery time.
|
|
uint64_t Discovery;
|
|
/// Finish time.
|
|
uint64_t Finish;
|
|
/// NumCalls.
|
|
uint64_t NumCalls;
|
|
};
|
|
|
|
/// An edge in a flow network.
|
|
struct Edge {
|
|
/// The cost of the edge.
|
|
int64_t Cost;
|
|
/// The capacity of the edge.
|
|
int64_t Capacity;
|
|
/// The current flow on the edge.
|
|
int64_t Flow;
|
|
/// The destination node of the edge.
|
|
uint64_t Dst;
|
|
/// The index of the reverse edge between Dst and the current node.
|
|
uint64_t RevEdgeIndex;
|
|
|
|
/// Data fields utilized in DAG-augmentation:
|
|
/// Whether the edge is currently on a shortest path from Source to Target.
|
|
bool OnShortestPath;
|
|
/// Extra flow along the edge.
|
|
uint64_t AugmentedFlow;
|
|
};
|
|
|
|
/// The set of network nodes.
|
|
std::vector<Node> Nodes;
|
|
/// The set of network edges.
|
|
std::vector<std::vector<Edge>> Edges;
|
|
/// Source node of the flow.
|
|
uint64_t Source;
|
|
/// Target (sink) node of the flow.
|
|
uint64_t Target;
|
|
/// Augmenting edges.
|
|
std::vector<std::vector<Edge *>> AugmentingEdges;
|
|
};
|
|
|
|
constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
|
|
constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
|
|
|
|
/// A post-processing adjustment of control flow. It applies two steps by
|
|
/// rerouting some flow and making it more realistic:
|
|
///
|
|
/// - First, it removes all isolated components ("islands") with a positive flow
|
|
/// that are unreachable from the entry block. For every such component, we
|
|
/// find the shortest from the entry to an exit passing through the component,
|
|
/// and increase the flow by one unit along the path.
|
|
///
|
|
/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
|
|
/// with no sampled counts. Then it rebalnces the flow that goes through such
|
|
/// a subgraph so that each branch is taken with probability 50%.
|
|
/// An unknown subgraph is such that for every two nodes u and v:
|
|
/// - u dominates v and u is not unknown;
|
|
/// - v post-dominates u; and
|
|
/// - all inner-nodes of all (u,v)-paths are unknown.
|
|
///
|
|
class FlowAdjuster {
|
|
public:
|
|
FlowAdjuster(FlowFunction &Func) : Func(Func) {
|
|
assert(Func.Blocks[Func.Entry].isEntry() &&
|
|
"incorrect index of the entry block");
|
|
}
|
|
|
|
// Run the post-processing
|
|
void run() {
|
|
/// Adjust the flow to get rid of isolated components.
|
|
joinIsolatedComponents();
|
|
|
|
/// Rebalance the flow inside unknown subgraphs.
|
|
rebalanceUnknownSubgraphs();
|
|
}
|
|
|
|
private:
|
|
void joinIsolatedComponents() {
|
|
// Find blocks that are reachable from the source
|
|
auto Visited = BitVector(NumBlocks(), false);
|
|
findReachable(Func.Entry, Visited);
|
|
|
|
// Iterate over all non-reachable blocks and adjust their weights
|
|
for (uint64_t I = 0; I < NumBlocks(); I++) {
|
|
auto &Block = Func.Blocks[I];
|
|
if (Block.Flow > 0 && !Visited[I]) {
|
|
// Find a path from the entry to an exit passing through the block I
|
|
auto Path = findShortestPath(I);
|
|
// Increase the flow along the path
|
|
assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
|
|
"incorrectly computed path adjusting control flow");
|
|
Func.Blocks[Func.Entry].Flow += 1;
|
|
for (auto &Jump : Path) {
|
|
Jump->Flow += 1;
|
|
Func.Blocks[Jump->Target].Flow += 1;
|
|
// Update reachability
|
|
findReachable(Jump->Target, Visited);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Run BFS from a given block along the jumps with a positive flow and mark
|
|
/// all reachable blocks.
|
|
void findReachable(uint64_t Src, BitVector &Visited) {
|
|
if (Visited[Src])
|
|
return;
|
|
std::queue<uint64_t> Queue;
|
|
Queue.push(Src);
|
|
Visited[Src] = true;
|
|
while (!Queue.empty()) {
|
|
Src = Queue.front();
|
|
Queue.pop();
|
|
for (auto Jump : Func.Blocks[Src].SuccJumps) {
|
|
uint64_t Dst = Jump->Target;
|
|
if (Jump->Flow > 0 && !Visited[Dst]) {
|
|
Queue.push(Dst);
|
|
Visited[Dst] = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Find the shortest path from the entry block to an exit block passing
|
|
/// through a given block.
|
|
std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
|
|
// A path from the entry block to BlockIdx
|
|
auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
|
|
// A path from BlockIdx to an exit block
|
|
auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
|
|
|
|
// Concatenate the two paths
|
|
std::vector<FlowJump *> Result;
|
|
Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
|
|
Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
|
|
return Result;
|
|
}
|
|
|
|
/// Apply the Dijkstra algorithm to find the shortest path from a given
|
|
/// Source to a given Target block.
|
|
/// If Target == -1, then the path ends at an exit block.
|
|
std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
|
|
// Quit early, if possible
|
|
if (Source == Target)
|
|
return std::vector<FlowJump *>();
|
|
if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
|
|
return std::vector<FlowJump *>();
|
|
|
|
// Initialize data structures
|
|
auto Distance = std::vector<int64_t>(NumBlocks(), INF);
|
|
auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
|
|
Distance[Source] = 0;
|
|
std::set<std::pair<uint64_t, uint64_t>> Queue;
|
|
Queue.insert(std::make_pair(Distance[Source], Source));
|
|
|
|
// Run the Dijkstra algorithm
|
|
while (!Queue.empty()) {
|
|
uint64_t Src = Queue.begin()->second;
|
|
Queue.erase(Queue.begin());
|
|
// If we found a solution, quit early
|
|
if (Src == Target ||
|
|
(Func.Blocks[Src].isExit() && Target == AnyExitBlock))
|
|
break;
|
|
|
|
for (auto Jump : Func.Blocks[Src].SuccJumps) {
|
|
uint64_t Dst = Jump->Target;
|
|
int64_t JumpDist = jumpDistance(Jump);
|
|
if (Distance[Dst] > Distance[Src] + JumpDist) {
|
|
Queue.erase(std::make_pair(Distance[Dst], Dst));
|
|
|
|
Distance[Dst] = Distance[Src] + JumpDist;
|
|
Parent[Dst] = Jump;
|
|
|
|
Queue.insert(std::make_pair(Distance[Dst], Dst));
|
|
}
|
|
}
|
|
}
|
|
// If Target is not provided, find the closest exit block
|
|
if (Target == AnyExitBlock) {
|
|
for (uint64_t I = 0; I < NumBlocks(); I++) {
|
|
if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
|
|
if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
|
|
Target = I;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
assert(Parent[Target] != nullptr && "a path does not exist");
|
|
|
|
// Extract the constructed path
|
|
std::vector<FlowJump *> Result;
|
|
uint64_t Now = Target;
|
|
while (Now != Source) {
|
|
assert(Now == Parent[Now]->Target && "incorrect parent jump");
|
|
Result.push_back(Parent[Now]);
|
|
Now = Parent[Now]->Source;
|
|
}
|
|
// Reverse the path, since it is extracted from Target to Source
|
|
std::reverse(Result.begin(), Result.end());
|
|
return Result;
|
|
}
|
|
|
|
/// A distance of a path for a given jump.
|
|
/// In order to incite the path to use blocks/jumps with large positive flow,
|
|
/// and avoid changing branch probability of outgoing edges drastically,
|
|
/// set the jump distance so as:
|
|
/// - to minimize the number of unlikely jumps used and subject to that,
|
|
/// - to minimize the number of Flow == 0 jumps used and subject to that,
|
|
/// - minimizes total multiplicative Flow increase for the remaining edges.
|
|
/// To capture this objective with integer distances, we round off fractional
|
|
/// parts to a multiple of 1 / BaseDistance.
|
|
int64_t jumpDistance(FlowJump *Jump) const {
|
|
uint64_t BaseDistance =
|
|
std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance),
|
|
std::min(Func.Blocks[Func.Entry].Flow,
|
|
MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
|
|
if (Jump->IsUnlikely)
|
|
return MinCostMaxFlow::AuxCostUnlikely;
|
|
if (Jump->Flow > 0)
|
|
return BaseDistance + BaseDistance / Jump->Flow;
|
|
return BaseDistance * NumBlocks();
|
|
};
|
|
|
|
uint64_t NumBlocks() const { return Func.Blocks.size(); }
|
|
|
|
/// Rebalance unknown subgraphs so that the flow is split evenly across the
|
|
/// outgoing branches of every block of the subgraph. The method iterates over
|
|
/// blocks with known weight and identifies unknown subgraphs rooted at the
|
|
/// blocks. Then it verifies if flow rebalancing is feasible and applies it.
|
|
void rebalanceUnknownSubgraphs() {
|
|
// Try to find unknown subgraphs from each block
|
|
for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
|
|
auto SrcBlock = &Func.Blocks[I];
|
|
// Verify if rebalancing rooted at SrcBlock is feasible
|
|
if (!canRebalanceAtRoot(SrcBlock))
|
|
continue;
|
|
|
|
// Find an unknown subgraphs starting at SrcBlock. Along the way,
|
|
// fill in known destinations and intermediate unknown blocks.
|
|
std::vector<FlowBlock *> UnknownBlocks;
|
|
std::vector<FlowBlock *> KnownDstBlocks;
|
|
findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
|
|
|
|
// Verify if rebalancing of the subgraph is feasible. If the search is
|
|
// successful, find the unique destination block (which can be null)
|
|
FlowBlock *DstBlock = nullptr;
|
|
if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
|
|
DstBlock))
|
|
continue;
|
|
|
|
// We cannot rebalance subgraphs containing cycles among unknown blocks
|
|
if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
|
|
continue;
|
|
|
|
// Rebalance the flow
|
|
rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
|
|
}
|
|
}
|
|
|
|
/// Verify if rebalancing rooted at a given block is possible.
|
|
bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
|
|
// Do not attempt to find unknown subgraphs from an unknown or a
|
|
// zero-flow block
|
|
if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
|
|
return false;
|
|
|
|
// Do not attempt to process subgraphs from a block w/o unknown sucessors
|
|
bool HasUnknownSuccs = false;
|
|
for (auto Jump : SrcBlock->SuccJumps) {
|
|
if (Func.Blocks[Jump->Target].UnknownWeight) {
|
|
HasUnknownSuccs = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!HasUnknownSuccs)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/// Find an unknown subgraph starting at block SrcBlock. The method sets
|
|
/// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
|
|
void findUnknownSubgraph(const FlowBlock *SrcBlock,
|
|
std::vector<FlowBlock *> &KnownDstBlocks,
|
|
std::vector<FlowBlock *> &UnknownBlocks) {
|
|
// Run BFS from SrcBlock and make sure all paths are going through unknown
|
|
// blocks and end at a known DstBlock
|
|
auto Visited = BitVector(NumBlocks(), false);
|
|
std::queue<uint64_t> Queue;
|
|
|
|
Queue.push(SrcBlock->Index);
|
|
Visited[SrcBlock->Index] = true;
|
|
while (!Queue.empty()) {
|
|
auto &Block = Func.Blocks[Queue.front()];
|
|
Queue.pop();
|
|
// Process blocks reachable from Block
|
|
for (auto Jump : Block.SuccJumps) {
|
|
// If Jump can be ignored, skip it
|
|
if (ignoreJump(SrcBlock, nullptr, Jump))
|
|
continue;
|
|
|
|
uint64_t Dst = Jump->Target;
|
|
// If Dst has been visited, skip Jump
|
|
if (Visited[Dst])
|
|
continue;
|
|
// Process block Dst
|
|
Visited[Dst] = true;
|
|
if (!Func.Blocks[Dst].UnknownWeight) {
|
|
KnownDstBlocks.push_back(&Func.Blocks[Dst]);
|
|
} else {
|
|
Queue.push(Dst);
|
|
UnknownBlocks.push_back(&Func.Blocks[Dst]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Verify if rebalancing of the subgraph is feasible. If the checks are
|
|
/// successful, set the unique destination block, DstBlock (can be null).
|
|
bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
|
|
const std::vector<FlowBlock *> &KnownDstBlocks,
|
|
const std::vector<FlowBlock *> &UnknownBlocks,
|
|
FlowBlock *&DstBlock) {
|
|
// If the list of unknown blocks is empty, we don't need rebalancing
|
|
if (UnknownBlocks.empty())
|
|
return false;
|
|
|
|
// If there are multiple known sinks, we can't rebalance
|
|
if (KnownDstBlocks.size() > 1)
|
|
return false;
|
|
DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
|
|
|
|
// Verify sinks of the subgraph
|
|
for (auto Block : UnknownBlocks) {
|
|
if (Block->SuccJumps.empty()) {
|
|
// If there are multiple (known and unknown) sinks, we can't rebalance
|
|
if (DstBlock != nullptr)
|
|
return false;
|
|
continue;
|
|
}
|
|
size_t NumIgnoredJumps = 0;
|
|
for (auto Jump : Block->SuccJumps) {
|
|
if (ignoreJump(SrcBlock, DstBlock, Jump))
|
|
NumIgnoredJumps++;
|
|
}
|
|
// If there is a non-sink block in UnknownBlocks with all jumps ignored,
|
|
// then we can't rebalance
|
|
if (NumIgnoredJumps == Block->SuccJumps.size())
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// Decide whether the Jump is ignored while processing an unknown subgraphs
|
|
/// rooted at basic block SrcBlock with the destination block, DstBlock.
|
|
bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
|
|
const FlowJump *Jump) {
|
|
// Ignore unlikely jumps with zero flow
|
|
if (Jump->IsUnlikely && Jump->Flow == 0)
|
|
return true;
|
|
|
|
auto JumpSource = &Func.Blocks[Jump->Source];
|
|
auto JumpTarget = &Func.Blocks[Jump->Target];
|
|
|
|
// Do not ignore jumps coming into DstBlock
|
|
if (DstBlock != nullptr && JumpTarget == DstBlock)
|
|
return false;
|
|
|
|
// Ignore jumps out of SrcBlock to known blocks
|
|
if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
|
|
return true;
|
|
|
|
// Ignore jumps to known blocks with zero flow
|
|
if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/// Verify if the given unknown subgraph is acyclic, and if yes, reorder
|
|
/// UnknownBlocks in the topological order (so that all jumps are "forward").
|
|
bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
|
|
std::vector<FlowBlock *> &UnknownBlocks) {
|
|
// Extract local in-degrees in the considered subgraph
|
|
auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
|
|
auto fillInDegree = [&](const FlowBlock *Block) {
|
|
for (auto Jump : Block->SuccJumps) {
|
|
if (ignoreJump(SrcBlock, DstBlock, Jump))
|
|
continue;
|
|
LocalInDegree[Jump->Target]++;
|
|
}
|
|
};
|
|
fillInDegree(SrcBlock);
|
|
for (auto Block : UnknownBlocks) {
|
|
fillInDegree(Block);
|
|
}
|
|
// A loop containing SrcBlock
|
|
if (LocalInDegree[SrcBlock->Index] > 0)
|
|
return false;
|
|
|
|
std::vector<FlowBlock *> AcyclicOrder;
|
|
std::queue<uint64_t> Queue;
|
|
Queue.push(SrcBlock->Index);
|
|
while (!Queue.empty()) {
|
|
FlowBlock *Block = &Func.Blocks[Queue.front()];
|
|
Queue.pop();
|
|
// Stop propagation once we reach DstBlock, if any
|
|
if (DstBlock != nullptr && Block == DstBlock)
|
|
break;
|
|
|
|
// Keep an acyclic order of unknown blocks
|
|
if (Block->UnknownWeight && Block != SrcBlock)
|
|
AcyclicOrder.push_back(Block);
|
|
|
|
// Add to the queue all successors with zero local in-degree
|
|
for (auto Jump : Block->SuccJumps) {
|
|
if (ignoreJump(SrcBlock, DstBlock, Jump))
|
|
continue;
|
|
uint64_t Dst = Jump->Target;
|
|
LocalInDegree[Dst]--;
|
|
if (LocalInDegree[Dst] == 0) {
|
|
Queue.push(Dst);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If there is a cycle in the subgraph, AcyclicOrder contains only a subset
|
|
// of all blocks
|
|
if (UnknownBlocks.size() != AcyclicOrder.size())
|
|
return false;
|
|
UnknownBlocks = AcyclicOrder;
|
|
return true;
|
|
}
|
|
|
|
/// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
|
|
/// having UnknownBlocks intermediate blocks.
|
|
void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
|
|
const FlowBlock *DstBlock,
|
|
const std::vector<FlowBlock *> &UnknownBlocks) {
|
|
assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
|
|
|
|
// Ditribute flow from the source block
|
|
uint64_t BlockFlow = 0;
|
|
// SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
|
|
for (auto Jump : SrcBlock->SuccJumps) {
|
|
if (ignoreJump(SrcBlock, DstBlock, Jump))
|
|
continue;
|
|
BlockFlow += Jump->Flow;
|
|
}
|
|
rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
|
|
|
|
// Ditribute flow from the remaining blocks
|
|
for (auto Block : UnknownBlocks) {
|
|
assert(Block->UnknownWeight && "incorrect unknown subgraph");
|
|
uint64_t BlockFlow = 0;
|
|
// Block's flow is the sum of incoming flows
|
|
for (auto Jump : Block->PredJumps) {
|
|
BlockFlow += Jump->Flow;
|
|
}
|
|
Block->Flow = BlockFlow;
|
|
rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
|
|
}
|
|
}
|
|
|
|
/// Redistribute flow for a block in a subgraph rooted at SrcBlock,
|
|
/// and ending at DstBlock.
|
|
void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
|
|
const FlowBlock *Block, uint64_t BlockFlow) {
|
|
// Process all successor jumps and update corresponding flow values
|
|
size_t BlockDegree = 0;
|
|
for (auto Jump : Block->SuccJumps) {
|
|
if (ignoreJump(SrcBlock, DstBlock, Jump))
|
|
continue;
|
|
BlockDegree++;
|
|
}
|
|
// If all successor jumps of the block are ignored, skip it
|
|
if (DstBlock == nullptr && BlockDegree == 0)
|
|
return;
|
|
assert(BlockDegree > 0 && "all outgoing jumps are ignored");
|
|
|
|
// Each of the Block's successors gets the following amount of flow.
|
|
// Rounding the value up so that all flow is propagated
|
|
uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
|
|
for (auto Jump : Block->SuccJumps) {
|
|
if (ignoreJump(SrcBlock, DstBlock, Jump))
|
|
continue;
|
|
uint64_t Flow = std::min(SuccFlow, BlockFlow);
|
|
Jump->Flow = Flow;
|
|
BlockFlow -= Flow;
|
|
}
|
|
assert(BlockFlow == 0 && "not all flow is propagated");
|
|
}
|
|
|
|
/// A constant indicating an arbitrary exit block of a function.
|
|
static constexpr uint64_t AnyExitBlock = uint64_t(-1);
|
|
|
|
/// The function.
|
|
FlowFunction &Func;
|
|
};
|
|
|
|
/// Initializing flow network for a given function.
|
|
///
|
|
/// Every block is split into three nodes that are responsible for (i) an
|
|
/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
|
|
/// reduction of the block weight.
|
|
void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
|
|
uint64_t NumBlocks = Func.Blocks.size();
|
|
assert(NumBlocks > 1 && "Too few blocks in a function");
|
|
LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
|
|
|
|
// Pre-process data: make sure the entry weight is at least 1
|
|
if (Func.Blocks[Func.Entry].Weight == 0) {
|
|
Func.Blocks[Func.Entry].Weight = 1;
|
|
}
|
|
// Introducing dummy source/sink pairs to allow flow circulation.
|
|
// The nodes corresponding to blocks of Func have indicies in the range
|
|
// [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
|
|
uint64_t S = 3 * NumBlocks;
|
|
uint64_t T = S + 1;
|
|
uint64_t S1 = S + 2;
|
|
uint64_t T1 = S + 3;
|
|
|
|
Network.initialize(3 * NumBlocks + 4, S1, T1);
|
|
|
|
// Create three nodes for every block of the function
|
|
for (uint64_t B = 0; B < NumBlocks; B++) {
|
|
auto &Block = Func.Blocks[B];
|
|
assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
|
|
"non-zero weight of a block w/o weight except for an entry");
|
|
|
|
// Split every block into two nodes
|
|
uint64_t Bin = 3 * B;
|
|
uint64_t Bout = 3 * B + 1;
|
|
uint64_t Baux = 3 * B + 2;
|
|
if (Block.Weight > 0) {
|
|
Network.addEdge(S1, Bout, Block.Weight, 0);
|
|
Network.addEdge(Bin, T1, Block.Weight, 0);
|
|
}
|
|
|
|
// Edges from S and to T
|
|
assert((!Block.isEntry() || !Block.isExit()) &&
|
|
"a block cannot be an entry and an exit");
|
|
if (Block.isEntry()) {
|
|
Network.addEdge(S, Bin, 0);
|
|
} else if (Block.isExit()) {
|
|
Network.addEdge(Bout, T, 0);
|
|
}
|
|
|
|
// An auxiliary node to allow increase/reduction of block counts:
|
|
// We assume that decreasing block counts is more expensive than increasing,
|
|
// and thus, setting separate costs here. In the future we may want to tune
|
|
// the relative costs so as to maximize the quality of generated profiles.
|
|
int64_t AuxCostInc = SampleProfileProfiCostInc;
|
|
int64_t AuxCostDec = SampleProfileProfiCostDec;
|
|
if (Block.UnknownWeight) {
|
|
// Do not penalize changing weights of blocks w/o known profile count
|
|
AuxCostInc = 0;
|
|
AuxCostDec = 0;
|
|
} else {
|
|
// Increasing the count for "cold" blocks with zero initial count is more
|
|
// expensive than for "hot" ones
|
|
if (Block.Weight == 0) {
|
|
AuxCostInc = SampleProfileProfiCostIncZero;
|
|
}
|
|
// Modifying the count of the entry block is expensive
|
|
if (Block.isEntry()) {
|
|
AuxCostInc = SampleProfileProfiCostIncEntry;
|
|
AuxCostDec = SampleProfileProfiCostDecEntry;
|
|
}
|
|
}
|
|
// For blocks with self-edges, do not penalize a reduction of the count,
|
|
// as all of the increase can be attributed to the self-edge
|
|
if (Block.HasSelfEdge) {
|
|
AuxCostDec = 0;
|
|
}
|
|
|
|
Network.addEdge(Bin, Baux, AuxCostInc);
|
|
Network.addEdge(Baux, Bout, AuxCostInc);
|
|
if (Block.Weight > 0) {
|
|
Network.addEdge(Bout, Baux, AuxCostDec);
|
|
Network.addEdge(Baux, Bin, AuxCostDec);
|
|
}
|
|
}
|
|
|
|
// Creating edges for every jump
|
|
for (auto &Jump : Func.Jumps) {
|
|
uint64_t Src = Jump.Source;
|
|
uint64_t Dst = Jump.Target;
|
|
if (Src != Dst) {
|
|
uint64_t SrcOut = 3 * Src + 1;
|
|
uint64_t DstIn = 3 * Dst;
|
|
uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
|
|
Network.addEdge(SrcOut, DstIn, Cost);
|
|
}
|
|
}
|
|
|
|
// Make sure we have a valid flow circulation
|
|
Network.addEdge(T, S, 0);
|
|
}
|
|
|
|
/// Extract resulting block and edge counts from the flow network.
|
|
void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
|
|
uint64_t NumBlocks = Func.Blocks.size();
|
|
|
|
// Extract resulting block counts
|
|
for (uint64_t Src = 0; Src < NumBlocks; Src++) {
|
|
auto &Block = Func.Blocks[Src];
|
|
uint64_t SrcOut = 3 * Src + 1;
|
|
int64_t Flow = 0;
|
|
for (auto &Adj : Network.getFlow(SrcOut)) {
|
|
uint64_t DstIn = Adj.first;
|
|
int64_t DstFlow = Adj.second;
|
|
bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
|
|
if (!IsAuxNode || Block.HasSelfEdge) {
|
|
Flow += DstFlow;
|
|
}
|
|
}
|
|
Block.Flow = Flow;
|
|
assert(Flow >= 0 && "negative block flow");
|
|
}
|
|
|
|
// Extract resulting jump counts
|
|
for (auto &Jump : Func.Jumps) {
|
|
uint64_t Src = Jump.Source;
|
|
uint64_t Dst = Jump.Target;
|
|
int64_t Flow = 0;
|
|
if (Src != Dst) {
|
|
uint64_t SrcOut = 3 * Src + 1;
|
|
uint64_t DstIn = 3 * Dst;
|
|
Flow = Network.getFlow(SrcOut, DstIn);
|
|
} else {
|
|
uint64_t SrcOut = 3 * Src + 1;
|
|
uint64_t SrcAux = 3 * Src + 2;
|
|
int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
|
|
if (AuxFlow > 0)
|
|
Flow = AuxFlow;
|
|
}
|
|
Jump.Flow = Flow;
|
|
assert(Flow >= 0 && "negative jump flow");
|
|
}
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
/// Verify that the computed flow values satisfy flow conservation rules
|
|
void verifyWeights(const FlowFunction &Func) {
|
|
const uint64_t NumBlocks = Func.Blocks.size();
|
|
auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
|
|
auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
|
|
for (auto &Jump : Func.Jumps) {
|
|
InFlow[Jump.Target] += Jump.Flow;
|
|
OutFlow[Jump.Source] += Jump.Flow;
|
|
}
|
|
|
|
uint64_t TotalInFlow = 0;
|
|
uint64_t TotalOutFlow = 0;
|
|
for (uint64_t I = 0; I < NumBlocks; I++) {
|
|
auto &Block = Func.Blocks[I];
|
|
if (Block.isEntry()) {
|
|
TotalInFlow += Block.Flow;
|
|
assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
|
|
} else if (Block.isExit()) {
|
|
TotalOutFlow += Block.Flow;
|
|
assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
|
|
} else {
|
|
assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
|
|
assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
|
|
}
|
|
}
|
|
assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
|
|
|
|
// Verify that there are no isolated flow components
|
|
// One could modify FlowFunction to hold edges indexed by the sources, which
|
|
// will avoid a creation of the object
|
|
auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
|
|
for (auto &Jump : Func.Jumps) {
|
|
if (Jump.Flow > 0) {
|
|
PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
|
|
}
|
|
}
|
|
|
|
// Run BFS from the source along edges with positive flow
|
|
std::queue<uint64_t> Queue;
|
|
auto Visited = BitVector(NumBlocks, false);
|
|
Queue.push(Func.Entry);
|
|
Visited[Func.Entry] = true;
|
|
while (!Queue.empty()) {
|
|
uint64_t Src = Queue.front();
|
|
Queue.pop();
|
|
for (uint64_t Dst : PositiveFlowEdges[Src]) {
|
|
if (!Visited[Dst]) {
|
|
Queue.push(Dst);
|
|
Visited[Dst] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Verify that every block that has a positive flow is reached from the source
|
|
// along edges with a positive flow
|
|
for (uint64_t I = 0; I < NumBlocks; I++) {
|
|
auto &Block = Func.Blocks[I];
|
|
assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
} // end of anonymous namespace
|
|
|
|
/// Apply the profile inference algorithm for a given flow function
|
|
void llvm::applyFlowInference(FlowFunction &Func) {
|
|
// Create and apply an inference network model
|
|
auto InferenceNetwork = MinCostMaxFlow();
|
|
initializeNetwork(InferenceNetwork, Func);
|
|
InferenceNetwork.run();
|
|
|
|
// Extract flow values for every block and every edge
|
|
extractWeights(InferenceNetwork, Func);
|
|
|
|
// Post-processing adjustments to the flow
|
|
auto Adjuster = FlowAdjuster(Func);
|
|
Adjuster.run();
|
|
|
|
#ifndef NDEBUG
|
|
// Verify the result
|
|
verifyWeights(Func);
|
|
#endif
|
|
}
|