[VENTUS][RISCV][pass] Add insert join instruction pass for VBranch

we follow the following rules to insert join block and join instruction

 1: Legalize all the return block
    when there are one more return blocks in machine function, there must be
    branches, we need to reduce return blocks number down to 1
 1.1: If two return blocks have common nearest parent branch, this two blocks
    need to be joined, and we add a hasBeenJoined marker for this parent
    branch
 1.2: after we complete 1.1 process, there maybe one more return blocks, we
    need to further add join block, we recursively build dominator tree for
    these return blocks, first we find the nearest common dominator branch for
    two return blocks, and then get dominator tree path between dominator
    and each return block, we need to check this path in which whether any
    other branch blocks exists, ideally, the branch block in path should have
    been joined and marked, if not, this path is illegal, these two block can
    not be joined

 2: Insert join instructions
 2.1: we scan through the MachineBasic blocks and check what blocks to insert
    join instruction, below MBB represents MachineBasic Block
 2.2: The MBB must have one more predecessors and its nearest dominator must
     be a VBranch
 2.3: Then we analyze the the predecessor of MBB, if the predecessor
    has single successor, we add a join instruction to the predecessor end,
    other wise, we need to insert a join block between predecessor and MBB
This commit is contained in:
zhoujingya 2023-05-12 14:01:57 +08:00
parent a0a80bfdad
commit 97a3f99e4c
5 changed files with 790 additions and 0 deletions

View File

@ -41,6 +41,7 @@ add_llvm_target(RISCVCodeGen
RISCVTargetTransformInfo.cpp
VentusRegextInsertion.cpp
VentusVVInstrConversion.cpp
VentusInsertJoinToVBranch.cpp
GISel/RISCVCallLowering.cpp
GISel/RISCVInstructionSelector.cpp
GISel/RISCVLegalizerInfo.cpp

View File

@ -72,6 +72,9 @@ void initializeVentusRegextInsertionPass(PassRegistry &);
FunctionPass *createVentusVVInstrConversionPass();
void initializeVentusVVInstrConversionPass(PassRegistry &);
FunctionPass *createVentusInsertJoinToVBranchPass();
void initializeVentusInsertJoinToVBranchPass(PassRegistry &);
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,
RISCVRegisterBankInfo &);

View File

@ -286,6 +286,7 @@ void RISCVPassConfig::addPreEmitPass2() {
// Insert regext instruction for instruction whose register id is greater
// than 31.
addPass(createVentusRegextInsertionPass());
addPass(createVentusInsertJoinToVBranchPass());
}
void RISCVPassConfig::addMachineSSAOptimization() {
@ -302,11 +303,13 @@ void RISCVPassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createVentusVVInstrConversionPass());
}
void RISCVPassConfig::addPostRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
addPass(createRISCVRedundantCopyEliminationPass());
}
yaml::MachineFunctionInfo *

View File

@ -0,0 +1,388 @@
//===-- VentusInsertJoinToBranch.cpp - Insert join to VBranches -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// In Ventus, if VBranch instructions are generated, we need to insert join
// instructions in both `else` and `then` branch to tell hardware where these
// two branches need to join together
//
// we follow the following rules to insert join block and join instruction
//
// 1: Legalize all the return block
// when there are one more return blocks in machine function, there must be
// branches, we need to reduce return blocks number down to 1
// 1.1: If two return blocks have common nearest parent branch, this two blocks
// need to be joined, and we add a hasBeenJoined marker for this parent
// branch
// 1.2: after we complete 1.1 process, there maybe one more return blocks, we
// need to further add join block, we recursively build dominator tree for
// these return blocks, first we find the nearest common dominator branch for
// two return blocks, and then get dominator tree path between dominator
// and each return block, we need to check this path in which whether any
// other branch blocks exists, ideally, the branch block in path should have
// been joined and marked, if not, this path is illegal, these two block can
// not be joined
//
// 2: Insert join instructions
// 2.1: we scan through the MachineBasic blocks and check what blocks to insert
// join instruction, below MBB represents MachineBasic Block
// 2.2: The MBB must have one more predecessors and its nearest dominator must
// be a VBranch
// 2.3: Then we analyze the the predecessor of MBB, if the predecessor
// has single successor, we add a join instruction to the predecessor end,
// other wise, we need to insert a join block between predecessor and MBB
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVInstrInfo.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/CodeGenPassBuilder.h"
#include "llvm/CodeGen/MachineDominators.h"
#define VENTUS_INSERT_JOIN_TO_BRANCH "Insert join to VBranch"
#define DEBUG_TYPE "Insert_join_to_VBranch"
using namespace llvm;
namespace {
struct BranchInfo {
bool isDivergentBranch = false; // MBB is divergent branch or not
bool hasBeenJoined = false; // MBB has been joined
};
class VentusInsertJoinToVBranch : public MachineFunctionPass {
public:
const RISCVInstrInfo *TII;
static char ID;
MachineFunction *MachineFunc;
const RISCVRegisterInfo *MRI;
const MachineRegisterInfo *MR;
SmallVector<MachineBasicBlock *, 8> ReturnBlock;
SmallDenseMap<MachineBasicBlock *, BranchInfo> BranchMBBInfo;
MachineDominatorTree *MDT = new MachineDominatorTree();
VentusInsertJoinToVBranch() : MachineFunctionPass(ID) {
initializeVentusInsertJoinToVBranchPass(*PassRegistry::getPassRegistry());
}
~VentusInsertJoinToVBranch() { delete MDT; }
// Collect all the branch blocks information in function
void collectBranchMBBInfo(MachineFunction &MF);
bool insertJoinMBB(MachineBasicBlock &MBB1, MachineBasicBlock &MBB2);
bool runOnMachineFunction(MachineFunction &MF) override;
bool legalizeRetMBB(MachineBasicBlock &MBB);
bool hasCommonNearestParentBranch(MachineBasicBlock &MBB1,
MachineBasicBlock &MBB2);
bool canJoinMBB(MachineBasicBlock &MBB1, MachineBasicBlock &MBB2);
/// This function check two return blocks whether can join or not
bool hasNoUnjoinedBranch(MachineBasicBlock *CurrMBB,
MachineBasicBlock *TargetMBB);
/// Find all the branch predecessor no matter direct or indirect
SmallVector<MachineBasicBlock *>
findAllNearestParentBranches(MachineBasicBlock &MBB);
/// Check MBB is divergent branch or not
bool isDivergentBranchBlock(MachineBasicBlock &MBB) {
if (MBB.empty())
return false;
const MachineInstr &MI = MBB.instr_back();
switch (MI.getOpcode()) {
default:
return false;
case RISCV::VBEQ:
case RISCV::VBNE:
case RISCV::VBLT:
case RISCV::VBGE:
case RISCV::VBLTU:
case RISCV::VBGEU:
return true;
}
}
/// Check MBB is common branch or not
bool isCommonBranchBlock(MachineBasicBlock &MBB) {
if (MBB.empty())
return false;
const MachineInstr &MI = MBB.instr_back();
switch (MI.getOpcode()) {
default:
return false;
case RISCV::BEQ:
case RISCV::BNE:
case RISCV::BLT:
case RISCV::BGE:
case RISCV::BLTU:
case RISCV::BGEU:
return true;
}
}
// virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
// AU.addRequired<BranchFolderPass>();
// AU.setPreservesAll();
// }
/// Legalize all the return MBB
bool canJoinRetMBB(MachineFunction &MF);
/// Get return MBB numbers
unsigned getReturnBlockNum(MachineFunction &MF);
StringRef getPassName() const override {
return VENTUS_INSERT_JOIN_TO_BRANCH;
}
};
char VentusInsertJoinToVBranch::ID = 0;
void VentusInsertJoinToVBranch::collectBranchMBBInfo(MachineFunction &MF) {
for (auto &MBB : MF) {
if (isCommonBranchBlock(MBB))
BranchMBBInfo[&MBB] = {false, false};
else if (isDivergentBranchBlock(MBB))
BranchMBBInfo[&MBB] = {true, false};
}
}
unsigned VentusInsertJoinToVBranch::getReturnBlockNum(MachineFunction &MF) {
// Clear return block before each analysis
if (!ReturnBlock.empty())
ReturnBlock.clear();
unsigned ReturnBlockNum = 0;
for (auto &MBB : MF) {
if (MBB.isReturnBlock()) {
// Original return blocks
ReturnBlock.push_back(&MBB);
ReturnBlockNum++;
}
}
return ReturnBlockNum;
}
bool VentusInsertJoinToVBranch::insertJoinMBB(MachineBasicBlock &MBB1,
MachineBasicBlock &MBB2) {
MachineBasicBlock *PseudoJoinMBB = MachineFunc->CreateMachineBasicBlock();
BuildMI(*PseudoJoinMBB, PseudoJoinMBB->end(), DebugLoc(),
TII->get(RISCV::PseudoRET));
MachineFunc->push_back(PseudoJoinMBB);
legalizeRetMBB(MBB1);
legalizeRetMBB(MBB2);
MBB1.addSuccessor(PseudoJoinMBB);
MBB2.addSuccessor(PseudoJoinMBB);
return true;
}
/// Check if two return blocks can join or not
bool VentusInsertJoinToVBranch::canJoinMBB(MachineBasicBlock &MBB1,
MachineBasicBlock &MBB2) {
auto DominatorBlock = MDT->findNearestCommonDominator(&MBB1, &MBB2);
if (DominatorBlock) {
if (!hasNoUnjoinedBranch(DominatorBlock, &MBB1) &&
!hasNoUnjoinedBranch(DominatorBlock, &MBB2)) {
BranchMBBInfo.find(DominatorBlock)->second.hasBeenJoined = true;
return true;
}
}
return false;
}
bool VentusInsertJoinToVBranch::runOnMachineFunction(MachineFunction &MF) {
bool IsChanged = false;
TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
MRI = MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
MR = &MF.getRegInfo();
MachineFunc = &MF;
collectBranchMBBInfo(MF);
MDT->getBase().recalculate(*MachineFunc);
// After this check, all return blocks are expected to be legal
IsChanged |= canJoinRetMBB(MF);
MDT->getBase().recalculate(*MachineFunc);
// assert(getReturnBlockNum(MF) == 1 && "Join return MBB process not
// completed");
for (auto &MBB : make_early_inc_range(MF)) {
MachineDomTreeNode *Node = MDT->getNode(&MBB);
if (Node && Node->getIDom()) {
// At least two predecessors
unsigned PredecessorNum = std::distance(MBB.pred_begin(), MBB.pred_end());
if (BranchMBBInfo.find(Node->getIDom()->getBlock()) !=
BranchMBBInfo.end() &&
BranchMBBInfo.find(Node->getIDom()->getBlock())
->getSecond()
.isDivergentBranch &&
PredecessorNum > 1) {
SmallVector<MachineBasicBlock *, 4> Predecessors;
for (auto Pred : MBB.predecessors())
Predecessors.push_back(Pred);
for (auto Predecessor : make_early_inc_range(Predecessors)) {
// Divergent branch, insert a block between MBB & predecessor
if (isDivergentBranchBlock(*Predecessor)) {
MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
// This is essential to keep CFG legal, if MBB is the fall through
// block of predecessor, the NewBB should replace MBB's place
// otherwise, we only need to insert before MBB
if (Predecessor->getFallThrough() == &MBB)
MF.insert(std::next(Predecessor->getIterator()), NewBB);
else
MF.insert(MBB.getIterator(), NewBB);
Predecessor->replaceSuccessor(&MBB, NewBB);
NewBB->addSuccessor(&MBB);
BuildMI(*NewBB, NewBB->end(), DebugLoc(), TII->get(RISCV::JOIN))
.addMBB(&MBB);
MachineInstr *LastInst = &(*Predecessor->getFirstInstrTerminator());
assert(LastInst->isBranch() && "Not branch instruction");
LastInst->getOperand(2).setMBB(NewBB);
} else {
// Avoid duplicate JOIN add
if (!(Predecessor->instr_back().getOpcode() == RISCV::JOIN))
BuildMI(*Predecessor, Predecessor->end(), DebugLoc(),
TII->get(RISCV::JOIN))
.addMBB(&MBB);
}
}
}
}
}
return IsChanged;
}
bool VentusInsertJoinToVBranch::canJoinRetMBB(MachineFunction &MF) {
bool IsChanged = false;
// Check two MBBs' nearest parent branch MBB is the same or not, if is same
// we need to join them to a maybe Joint block. otherwise
unsigned ReturnBlockNum = getReturnBlockNum(MF);
for (size_t i = 0; i < ReturnBlockNum; i++) {
for (size_t j = i + 1; j < ReturnBlockNum; j++) {
if (hasCommonNearestParentBranch(*ReturnBlock[i], *ReturnBlock[j]))
IsChanged |= insertJoinMBB(*ReturnBlock[i], *ReturnBlock[j]);
}
}
// Rebuild dominator tree
MDT->getBase().recalculate(MF);
unsigned RetNum = getReturnBlockNum(MF);
while (true) {
for (size_t i = 0; i < ReturnBlock.size(); i++) {
for (size_t j = i + 1; j < ReturnBlock.size(); j++) {
if (canJoinMBB(*ReturnBlock[i], *ReturnBlock[j]))
IsChanged |= insertJoinMBB(*ReturnBlock[i], *ReturnBlock[j]);
}
}
// After check, rebuild dominator tree
MDT->getBase().recalculate(MF);
unsigned RetNum1 = getReturnBlockNum(MF);
if (RetNum1 == RetNum)
// Avoid dead loop
break;
RetNum = RetNum1;
}
return IsChanged;
}
/// Legalize return block, right now, we only consider tail call && ret
bool VentusInsertJoinToVBranch::legalizeRetMBB(MachineBasicBlock &MBB) {
// Get last instruction in this basic block
if (MBB.empty())
return false;
MachineInstr *LastInst = &(*MBB.instr_rbegin());
unsigned LastInstOpcode = LastInst->getOpcode();
assert(LastInstOpcode == RISCV::PseudoRET ||
LastInstOpcode == RISCV::PseudoTAIL && "Unexpected opcode");
// If opcode is RISCV::PseudoRET, keep a copy of this instruction
if (LastInstOpcode == RISCV::PseudoRET)
// Get the return instruction's implicit operands
LastInst->eraseFromParent();
else
LastInst->setDesc(TII->get(RISCV::PseudoCALL));
return true;
}
bool VentusInsertJoinToVBranch::hasCommonNearestParentBranch(
MachineBasicBlock &MBB1, MachineBasicBlock &MBB2) {
auto ParentBranches1 = findAllNearestParentBranches(MBB1);
auto ParentBranches2 = findAllNearestParentBranches(MBB2);
for (auto Branch : ParentBranches1) {
if (std::find(ParentBranches2.begin(), ParentBranches2.end(), Branch) !=
ParentBranches2.end()) {
auto BranchMBB =
std::find(ParentBranches2.begin(), ParentBranches2.end(), Branch);
if (BranchMBBInfo.find(*BranchMBB) != BranchMBBInfo.end())
// Update BranchMBB's hasBeenJoined flag
BranchMBBInfo.find(*BranchMBB)->getSecond().hasBeenJoined = true;
return true;
}
}
return false;
}
SmallVector<MachineBasicBlock *>
VentusInsertJoinToVBranch::findAllNearestParentBranches(
MachineBasicBlock &MBB) {
SmallVector<MachineBasicBlock *, 8> BranchParents;
for (auto Pred : MBB.predecessors()) {
unsigned PredNum = std::distance(Pred->succ_begin(), Pred->succ_end());
if (PredNum >= 2)
BranchParents.push_back(Pred);
else {
auto Parents = findAllNearestParentBranches(*Pred);
BranchParents.insert(BranchParents.end(), Parents.begin(), Parents.end());
}
}
return BranchParents;
}
bool VentusInsertJoinToVBranch::hasNoUnjoinedBranch(
MachineBasicBlock *DominatorMBB, MachineBasicBlock *TargetMBB) {
// Find the path between MBB1 and its immediate dominator
MachineDomTreeNode *TargetMBBNode = MDT->getNode(TargetMBB);
SmallVector<MachineBasicBlock *, 4> Path;
// Build path between dominator DominatorMBB and TargetMBB
// FIXME: Maybe can simplify below codes
while (TargetMBBNode && TargetMBBNode->getBlock() != DominatorMBB &&
TargetMBBNode->getIDom()->getBlock() != DominatorMBB) {
Path.push_back(TargetMBBNode->getBlock());
TargetMBBNode = TargetMBBNode->getIDom();
}
// Traverse this path, if found unjoined branch, return true
for (auto path : Path) {
if (BranchMBBInfo.find(path) != BranchMBBInfo.end()) {
if (!BranchMBBInfo.find(path)->getSecond().hasBeenJoined)
return true;
}
}
return false;
}
} // end of anonymous namespace
INITIALIZE_PASS(VentusInsertJoinToVBranch, "Insert-join-to-VBranch",
VENTUS_INSERT_JOIN_TO_BRANCH, false, false)
namespace llvm {
FunctionPass *createVentusInsertJoinToVBranchPass() {
return new VentusInsertJoinToVBranch();
}
} // end of namespace llvm

View File

@ -0,0 +1,395 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(none) vscale_range(1,2048)
define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-LABEL: branch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: li a1, 14
; VENTUS-NEXT: vmv.v.x v1, a1
; VENTUS-NEXT: li a1, 13
; VENTUS-NEXT: vblt v0, v1, .LBB0_5
; VENTUS-NEXT: # %bb.1: # %if.else
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: li a1, 18
; VENTUS-NEXT: bgeu a0, a1, .LBB0_3
; VENTUS-NEXT: join v0, v0, .LBB0_2
; VENTUS-NEXT: .LBB0_5:
; VENTUS-NEXT: join v0, v0, .LBB0_2
; VENTUS-NEXT: .LBB0_2: # %cleanup
; VENTUS-NEXT: vmv.v.x v0, a1
; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi tp, tp, -16
; VENTUS-NEXT: join v0, v0, .LBB0_4
; VENTUS-NEXT: .LBB0_3: # %if.end3
; VENTUS-NEXT: li a0, 4
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi tp, tp, -16
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: join v0, v0, .LBB0_4
; VENTUS-NEXT: .LBB0_4:
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%cmp = icmp slt i32 %call, 14
br i1 %cmp, label %cleanup, label %if.else
if.else: ; preds = %entry
%cmp1 = icmp ult i32 %call, 18
br i1 %cmp1, label %cleanup, label %if.end3
if.end3: ; preds = %if.else
%call4 = tail call i32 @_Z13get_global_idj(i32 noundef 4)
br label %cleanup
cleanup: ; preds = %if.else, %entry, %if.end3
%retval.0 = phi i32 [ %call4, %if.end3 ], [ 13, %entry ], [ 18, %if.else ]
ret i32 %retval.0
}
define dso_local spir_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: loop_branch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset s0, 0
; VENTUS-NEXT: mv s0, a0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: vbeq v1, v0, .LBB1_4
; VENTUS-NEXT: # %bb.1: # %for.body.lr.ph
; VENTUS-NEXT: lw a3, 4(s0)
; VENTUS-NEXT: lw a1, 0(s0)
; VENTUS-NEXT: slli a4, a0, 2
; VENTUS-NEXT: add a1, a1, a4
; VENTUS-NEXT: lw a2, 0(a1)
; VENTUS-NEXT: add a3, a3, a4
; VENTUS-NEXT: .LBB1_2: # %for.body
; VENTUS-NEXT: # =>This Inner Loop Header: Depth=1
; VENTUS-NEXT: lw a4, 0(a3)
; VENTUS-NEXT: add a2, a2, a4
; VENTUS-NEXT: addi a0, a0, -1
; VENTUS-NEXT: sw a2, 0(a1)
; VENTUS-NEXT: bnez a0, .LBB1_2
; VENTUS-NEXT: join v0, v0, .LBB1_3
; VENTUS-NEXT: .LBB1_4:
; VENTUS-NEXT: join v0, v0, .LBB1_3
; VENTUS-NEXT: .LBB1_3: # %for.cond.cleanup
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%cmp5.not = icmp eq i32 %call, 0
br i1 %cmp5.not, label %for.cond.cleanup, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
%arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
%.pre = load i32, ptr addrspace(1) %arrayidx1, align 4
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %for.body.lr.ph, %for.body
%0 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add, %for.body ]
%i.06 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%1 = load i32, ptr addrspace(1) %arrayidx, align 4
%add = add nsw i32 %0, %1
store i32 %add, ptr addrspace(1) %arrayidx1, align 4
%inc = add nuw nsw i32 %i.06, 1
%exitcond.not = icmp eq i32 %inc, %call
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-LABEL: branch_in_branch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset s0, 0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: li a0, 14
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: li a0, 13
; VENTUS-NEXT: vblt v0, v1, .LBB2_9
; VENTUS-NEXT: # %bb.1: # %if.else
; VENTUS-NEXT: vmv.x.s s0, v0
; VENTUS-NEXT: li a0, 17
; VENTUS-NEXT: bltu a0, s0, .LBB2_4
; VENTUS-NEXT: # %bb.2: # %if.then2
; VENTUS-NEXT: li a0, 1
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.v.x v1, s0
; VENTUS-NEXT: vblt v0, v1, .LBB2_5
; VENTUS-NEXT: # %bb.3: # %if.then2
; VENTUS-NEXT: li a0, 23
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: join v0, v0, .LBB2_6
; VENTUS-NEXT: .LBB2_4: # %if.end7
; VENTUS-NEXT: li a0, 4
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi tp, tp, -16
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: join v0, v0, .LBB2_8
; VENTUS-NEXT: .LBB2_5: # %if.then2
; VENTUS-NEXT: li a0, 12
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: join v0, v0, .LBB2_6
; VENTUS-NEXT: .LBB2_6: # %if.then2
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: join v0, v0, .LBB2_7
; VENTUS-NEXT: .LBB2_9:
; VENTUS-NEXT: join v0, v0, .LBB2_7
; VENTUS-NEXT: .LBB2_7: # %cleanup9
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi tp, tp, -16
; VENTUS-NEXT: join v0, v0, .LBB2_8
; VENTUS-NEXT: .LBB2_8:
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%cmp = icmp slt i32 %call, 14
br i1 %cmp, label %cleanup9, label %if.else
if.else: ; preds = %entry
%cmp1 = icmp ult i32 %call, 18
br i1 %cmp1, label %if.then2, label %if.end7
if.then2: ; preds = %if.else
%call3 = tail call i32 @_Z13get_global_idj(i32 noundef 1)
%cmp4 = icmp sgt i32 %call, %call3
%. = select i1 %cmp4, i32 12, i32 23
br label %cleanup9
if.end7: ; preds = %if.else
%call8 = tail call i32 @_Z13get_global_idj(i32 noundef 4)
br label %cleanup9
cleanup9: ; preds = %entry, %if.end7, %if.then2
%retval.1 = phi i32 [ %., %if.then2 ], [ %call8, %if.end7 ], [ 13, %entry ]
ret i32 %retval.1
}
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
define dso_local spir_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: double_loop:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset s0, 0
; VENTUS-NEXT: mv s0, a0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: vbeq v1, v0, .LBB3_6
; VENTUS-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
; VENTUS-NEXT: li a1, 0
; VENTUS-NEXT: lw a4, 4(s0)
; VENTUS-NEXT: lw a2, 0(s0)
; VENTUS-NEXT: slli a5, a0, 2
; VENTUS-NEXT: add a2, a2, a5
; VENTUS-NEXT: lw a3, 0(a2)
; VENTUS-NEXT: add a4, a4, a5
; VENTUS-NEXT: .LBB3_2: # %for.cond1.preheader
; VENTUS-NEXT: # =>This Loop Header: Depth=1
; VENTUS-NEXT: # Child Loop BB3_3 Depth 2
; VENTUS-NEXT: mv a5, a0
; VENTUS-NEXT: .LBB3_3: # %for.body4
; VENTUS-NEXT: # Parent Loop BB3_2 Depth=1
; VENTUS-NEXT: # => This Inner Loop Header: Depth=2
; VENTUS-NEXT: lw a6, 0(a4)
; VENTUS-NEXT: add a3, a3, a6
; VENTUS-NEXT: addi a5, a5, -1
; VENTUS-NEXT: sw a3, 0(a2)
; VENTUS-NEXT: bnez a5, .LBB3_3
; VENTUS-NEXT: # %bb.4: # %for.cond1.for.cond.cleanup3_crit_edge
; VENTUS-NEXT: # in Loop: Header=BB3_2 Depth=1
; VENTUS-NEXT: addi a1, a1, 1
; VENTUS-NEXT: bne a1, a0, .LBB3_2
; VENTUS-NEXT: join v0, v0, .LBB3_5
; VENTUS-NEXT: .LBB3_6:
; VENTUS-NEXT: join v0, v0, .LBB3_5
; VENTUS-NEXT: .LBB3_5: # %for.cond.cleanup
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%cmp16.not = icmp eq i32 %call, 0
br i1 %cmp16.not, label %for.cond.cleanup, label %for.cond1.preheader.lr.ph
for.cond1.preheader.lr.ph: ; preds = %entry
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
%arrayidx5 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
%.pre.pre = load i32, ptr addrspace(1) %arrayidx5, align 4
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.cond1.for.cond.cleanup3_crit_edge
%.pre = phi i32 [ %.pre.pre, %for.cond1.preheader.lr.ph ], [ %add, %for.cond1.for.cond.cleanup3_crit_edge ]
%i.017 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc7, %for.cond1.for.cond.cleanup3_crit_edge ]
br label %for.body4
for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge, %entry
ret void
for.cond1.for.cond.cleanup3_crit_edge: ; preds = %for.body4
%inc7 = add nuw nsw i32 %i.017, 1
%exitcond18.not = icmp eq i32 %inc7, %call
br i1 %exitcond18.not, label %for.cond.cleanup, label %for.cond1.preheader
for.body4: ; preds = %for.cond1.preheader, %for.body4
%0 = phi i32 [ %.pre, %for.cond1.preheader ], [ %add, %for.body4 ]
%j.015 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
%1 = load i32, ptr addrspace(1) %arrayidx, align 4
%add = add nsw i32 %0, %1
store i32 %add, ptr addrspace(1) %arrayidx5, align 4
%inc = add nuw nsw i32 %j.015, 1
%exitcond.not = icmp eq i32 %inc, %call
br i1 %exitcond.not, label %for.cond1.for.cond.cleanup3_crit_edge, label %for.body4
}
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
define dso_local spir_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: loop_switch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset s0, 0
; VENTUS-NEXT: mv s0, a0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: vbeq v1, v0, .LBB4_10
; VENTUS-NEXT: # %bb.1: # %for.body.lr.ph
; VENTUS-NEXT: li a1, 0
; VENTUS-NEXT: lw a2, 4(s0)
; VENTUS-NEXT: lw a5, 0(s0)
; VENTUS-NEXT: slli a3, a0, 2
; VENTUS-NEXT: add a2, a2, a3
; VENTUS-NEXT: add a3, a5, a3
; VENTUS-NEXT: addi a4, a5, 8
; VENTUS-NEXT: addi a5, a5, 4
; VENTUS-NEXT: li a6, 1
; VENTUS-NEXT: li a7, 2
; VENTUS-NEXT: j .LBB4_5
; VENTUS-NEXT: .LBB4_2: # %sw.default
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: lw t1, 0(a2)
; VENTUS-NEXT: mv t0, a3
; VENTUS-NEXT: .LBB4_3: # %for.inc.sink.split
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: lw t2, 0(t0)
; VENTUS-NEXT: add t1, t2, t1
; VENTUS-NEXT: sw t1, 0(t0)
; VENTUS-NEXT: .LBB4_4: # %for.inc
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: addi a1, a1, 1
; VENTUS-NEXT: beq a0, a1, .LBB4_9
; VENTUS-NEXT: join v0, v0, .LBB4_9
; VENTUS-NEXT: .LBB4_5: # %for.body
; VENTUS-NEXT: # =>This Inner Loop Header: Depth=1
; VENTUS-NEXT: beqz a1, .LBB4_4
; VENTUS-NEXT: # %bb.6: # %for.body
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: mv t0, a5
; VENTUS-NEXT: li t1, 2
; VENTUS-NEXT: beq a1, a6, .LBB4_3
; VENTUS-NEXT: # %bb.7: # %for.body
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: bne a1, a7, .LBB4_2
; VENTUS-NEXT: # %bb.8: # %sw.bb4
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: li t1, 23
; VENTUS-NEXT: mv t0, a4
; VENTUS-NEXT: j .LBB4_3
; VENTUS-NEXT: .LBB4_10:
; VENTUS-NEXT: join v0, v0, .LBB4_9
; VENTUS-NEXT: .LBB4_9: # %for.cond.cleanup
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%cmp21.not = icmp eq i32 %call, 0
br i1 %cmp21.not, label %for.cond.cleanup, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
%arrayidx7 = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
%arrayidx8 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
%arrayidx5 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 2
%arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 1
br label %for.body
for.cond.cleanup: ; preds = %for.inc, %entry
ret void
for.body: ; preds = %for.body.lr.ph, %for.inc
%i.022 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
switch i32 %i.022, label %sw.default [
i32 0, label %for.inc
i32 1, label %for.inc.sink.split
i32 2, label %sw.bb4
]
sw.bb4: ; preds = %for.body
br label %for.inc.sink.split
sw.default: ; preds = %for.body
%0 = load i32, ptr addrspace(1) %arrayidx7, align 4
br label %for.inc.sink.split
for.inc.sink.split: ; preds = %for.body, %sw.default, %sw.bb4
%arrayidx2.sink24 = phi ptr addrspace(1) [ %arrayidx5, %sw.bb4 ], [ %arrayidx8, %sw.default ], [ %arrayidx2, %for.body ]
%.sink23 = phi i32 [ 23, %sw.bb4 ], [ %0, %sw.default ], [ 2, %for.body ]
%1 = load i32, ptr addrspace(1) %arrayidx2.sink24, align 4
%add3 = add nsw i32 %1, %.sink23
store i32 %add3, ptr addrspace(1) %arrayidx2.sink24, align 4
br label %for.inc
for.inc: ; preds = %for.inc.sink.split, %for.body
%inc = add nuw nsw i32 %i.022, 1
%exitcond.not = icmp eq i32 %inc, %call
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
declare dso_local i32 @_Z13get_global_idj(i32 noundef) local_unnamed_addr