[RISCV] Add basic support for the sifive-7-series short forward branch optimization.
sifive-7-series has macrofusion support to convert a branch over a single instruction into a conditional instruction. This can be an improvement if the branch is hard to predict. This patch adds support for the most basic case, a branch over a move instruction. This is implemented as a pseudo instruction so we can hide the control flow until all code motion passes complete. I've disabled a recent select optimization if this feature is enabled in the subtarget. Related gcc patch for the same optimization https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg211045.html Reviewed By: reames Differential Revision: https://reviews.llvm.org/D135814
This commit is contained in:
parent
5de73d27bd
commit
2b32e4f98b
|
@ -449,9 +449,18 @@ def TuneNoDefaultUnroll
|
|||
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
|
||||
"Disable default unroll preference.">;
|
||||
|
||||
// SiFive 7 is able to fuse integer ALU operations with a preceding branch
|
||||
// instruction.
|
||||
def TuneShortForwardBranchOpt
|
||||
: SubtargetFeature<"short-forward-branch-opt", "HasShortForwardBranchOpt",
|
||||
"true", "Enable short forward branch optimization">;
|
||||
def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
|
||||
def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;
|
||||
|
||||
def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
|
||||
"SiFive 7-Series processors",
|
||||
[TuneNoDefaultUnroll]>;
|
||||
[TuneNoDefaultUnroll,
|
||||
TuneShortForwardBranchOpt]>;
|
||||
|
||||
// Assume that lock-free native-width atomics are available, even if the target
|
||||
// and operating system combination would not usually provide them. The user
|
||||
|
|
|
@ -45,6 +45,8 @@ private:
|
|||
bool expandMBB(MachineBasicBlock &MBB);
|
||||
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
|
||||
bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, unsigned Opcode);
|
||||
|
@ -82,6 +84,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
// expanded instructions for each pseudo is correct in the Size field of the
|
||||
// tablegen definition for the pseudo.
|
||||
switch (MBBI->getOpcode()) {
|
||||
case RISCV::PseudoCCMOVGPR:
|
||||
return expandCCOp(MBB, MBBI, NextMBBI);
|
||||
case RISCV::PseudoVSETVLI:
|
||||
case RISCV::PseudoVSETVLIX0:
|
||||
case RISCV::PseudoVSETIVLI:
|
||||
|
@ -133,6 +137,60 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
assert(MBBI->getOpcode() == RISCV::PseudoCCMOVGPR && "Unexpected opcode");
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
MachineBasicBlock *MergeBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
MF->insert(++MBB.getIterator(), TrueBB);
|
||||
MF->insert(++TrueBB->getIterator(), MergeBB);
|
||||
|
||||
// We want to copy the "true" value when the condition is true which means
|
||||
// we need to invert the branch condition to jump over TrueBB when the
|
||||
// condition is false.
|
||||
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
|
||||
CC = RISCVCC::getOppositeBranchCondition(CC);
|
||||
|
||||
// Insert branch instruction.
|
||||
BuildMI(MBB, MBBI, DL, TII->getBrCond(CC))
|
||||
.addReg(MI.getOperand(1).getReg())
|
||||
.addReg(MI.getOperand(2).getReg())
|
||||
.addMBB(MergeBB);
|
||||
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
assert(MI.getOperand(4).getReg() == DestReg);
|
||||
|
||||
// Add MV.
|
||||
BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
|
||||
.add(MI.getOperand(5))
|
||||
.addImm(0);
|
||||
|
||||
TrueBB->addSuccessor(MergeBB);
|
||||
|
||||
MergeBB->splice(MergeBB->end(), &MBB, MI, MBB.end());
|
||||
MergeBB->transferSuccessors(&MBB);
|
||||
|
||||
MBB.addSuccessor(TrueBB);
|
||||
MBB.addSuccessor(MergeBB);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
|
||||
// Make sure live-ins are correctly attached to this new basic block.
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *TrueBB);
|
||||
computeAndAddLiveIns(LiveRegs, *MergeBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI) {
|
||||
assert(MBBI->getNumExplicitOperands() == 3 && MBBI->getNumOperands() >= 5 &&
|
||||
|
|
|
@ -4222,28 +4222,30 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||
return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
|
||||
}
|
||||
|
||||
// (select c, -1, y) -> -c | y
|
||||
if (isAllOnesConstant(TrueV)) {
|
||||
SDValue Neg = DAG.getNegative(CondV, DL, VT);
|
||||
return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
|
||||
}
|
||||
// (select c, y, -1) -> (c-1) | y
|
||||
if (isAllOnesConstant(FalseV)) {
|
||||
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
|
||||
DAG.getAllOnesConstant(DL, VT));
|
||||
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
|
||||
}
|
||||
if (!Subtarget.hasShortForwardBranchOpt()) {
|
||||
// (select c, -1, y) -> -c | y
|
||||
if (isAllOnesConstant(TrueV)) {
|
||||
SDValue Neg = DAG.getNegative(CondV, DL, VT);
|
||||
return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
|
||||
}
|
||||
// (select c, y, -1) -> (c-1) | y
|
||||
if (isAllOnesConstant(FalseV)) {
|
||||
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
|
||||
DAG.getAllOnesConstant(DL, VT));
|
||||
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
|
||||
}
|
||||
|
||||
// (select c, 0, y) -> (c-1) & y
|
||||
if (isNullConstant(TrueV)) {
|
||||
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
|
||||
DAG.getAllOnesConstant(DL, VT));
|
||||
return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
|
||||
}
|
||||
// (select c, y, 0) -> -c & y
|
||||
if (isNullConstant(FalseV)) {
|
||||
SDValue Neg = DAG.getNegative(CondV, DL, VT);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
|
||||
// (select c, 0, y) -> (c-1) & y
|
||||
if (isNullConstant(TrueV)) {
|
||||
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
|
||||
DAG.getAllOnesConstant(DL, VT));
|
||||
return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
|
||||
}
|
||||
// (select c, y, 0) -> -c & y
|
||||
if (isNullConstant(FalseV)) {
|
||||
SDValue Neg = DAG.getNegative(CondV, DL, VT);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
|
||||
}
|
||||
}
|
||||
|
||||
// If the CondV is the output of a SETCC node which operates on XLenVT inputs,
|
||||
|
@ -9450,9 +9452,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
// (select (x in [0,1] != 0), (z ^ y), y ) -> (-x & z ) ^ y
|
||||
// (select (x in [0,1] == 0), y, (z | y) ) -> (-x & z ) | y
|
||||
// (select (x in [0,1] != 0), (z | y), y ) -> (-x & z ) | y
|
||||
// NOTE: We only do this if the target does not have the short forward
|
||||
// branch optimization.
|
||||
APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
|
||||
if (isNullConstant(RHS) && ISD::isIntEqualitySetCC(CCVal) &&
|
||||
DAG.MaskedValueIsZero(LHS, Mask)) {
|
||||
if (!Subtarget.hasShortForwardBranchOpt() && isNullConstant(RHS) &&
|
||||
ISD::isIntEqualitySetCC(CCVal) && DAG.MaskedValueIsZero(LHS, Mask)) {
|
||||
unsigned Opcode;
|
||||
SDValue Src1, Src2;
|
||||
// true if FalseV is XOR or OR operator and one of its operands
|
||||
|
@ -9504,34 +9508,35 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
|
||||
{LHS, RHS, CC, TrueV, FalseV});
|
||||
|
||||
// (select c, -1, y) -> -c | y
|
||||
if (isAllOnesConstant(TrueV)) {
|
||||
SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
|
||||
SDValue Neg = DAG.getNegative(C, DL, VT);
|
||||
return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
|
||||
}
|
||||
// (select c, y, -1) -> -!c | y
|
||||
if (isAllOnesConstant(FalseV)) {
|
||||
SDValue C = DAG.getSetCC(DL, VT, LHS, RHS,
|
||||
ISD::getSetCCInverse(CCVal, VT));
|
||||
SDValue Neg = DAG.getNegative(C, DL, VT);
|
||||
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
|
||||
}
|
||||
if (!Subtarget.hasShortForwardBranchOpt()) {
|
||||
// (select c, -1, y) -> -c | y
|
||||
if (isAllOnesConstant(TrueV)) {
|
||||
SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
|
||||
SDValue Neg = DAG.getNegative(C, DL, VT);
|
||||
return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
|
||||
}
|
||||
// (select c, y, -1) -> -!c | y
|
||||
if (isAllOnesConstant(FalseV)) {
|
||||
SDValue C =
|
||||
DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
|
||||
SDValue Neg = DAG.getNegative(C, DL, VT);
|
||||
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
|
||||
}
|
||||
|
||||
// (select c, 0, y) -> -!c & y
|
||||
if (isNullConstant(TrueV)) {
|
||||
SDValue C = DAG.getSetCC(DL, VT, LHS, RHS,
|
||||
ISD::getSetCCInverse(CCVal, VT));
|
||||
SDValue Neg = DAG.getNegative(C, DL, VT);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
|
||||
// (select c, 0, y) -> -!c & y
|
||||
if (isNullConstant(TrueV)) {
|
||||
SDValue C =
|
||||
DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
|
||||
SDValue Neg = DAG.getNegative(C, DL, VT);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
|
||||
}
|
||||
// (select c, y, 0) -> -c & y
|
||||
if (isNullConstant(FalseV)) {
|
||||
SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
|
||||
SDValue Neg = DAG.getNegative(C, DL, VT);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
|
||||
}
|
||||
}
|
||||
// (select c, y, 0) -> -c & y
|
||||
if (isNullConstant(FalseV)) {
|
||||
SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
|
||||
SDValue Neg = DAG.getNegative(C, DL, VT);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
|
||||
}
|
||||
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
|
|
@ -1616,6 +1616,9 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
|
|||
return false;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case RISCV::PseudoCCMOVGPR:
|
||||
// Operands 4 and 5 are commutable.
|
||||
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
|
||||
case CASE_VFMA_SPLATS(FMADD):
|
||||
case CASE_VFMA_SPLATS(FMSUB):
|
||||
case CASE_VFMA_SPLATS(FMACC):
|
||||
|
@ -1761,6 +1764,15 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
|
|||
};
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case RISCV::PseudoCCMOVGPR: {
|
||||
// CCMOV can be commuted by inverting the condition.
|
||||
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
|
||||
CC = RISCVCC::getOppositeBranchCondition(CC);
|
||||
auto &WorkingMI = cloneIfNew(MI);
|
||||
WorkingMI.getOperand(3).setImm(CC);
|
||||
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
|
||||
OpIdx1, OpIdx2);
|
||||
}
|
||||
case CASE_VFMA_SPLATS(FMACC):
|
||||
case CASE_VFMA_SPLATS(FMADD):
|
||||
case CASE_VFMA_SPLATS(FMSAC):
|
||||
|
|
|
@ -1313,6 +1313,20 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
|
|||
node:$falsev), [{}],
|
||||
IntCCtoRISCVCC>;
|
||||
|
||||
let Predicates = [HasShortForwardBranchOpt],
|
||||
Constraints = "$dst = $falsev", isCommutable = 1, Size = 8 in {
|
||||
// This instruction moves $truev to $dst when the condition is true. It will
|
||||
// be expanded to control flow in RISCVExpandPseudoInsts.
|
||||
def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
|
||||
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
|
||||
GPR:$falsev, GPR:$truev),
|
||||
[(set GPR:$dst,
|
||||
(riscv_selectcc_frag:$cc GPR:$lhs, GPR:$rhs,
|
||||
cond, GPR:$truev,
|
||||
GPR:$falsev))]>,
|
||||
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
|
||||
}
|
||||
|
||||
multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
|
||||
let usesCustomInserter = 1 in
|
||||
def _Using_CC_GPR : Pseudo<(outs valty:$dst),
|
||||
|
@ -1329,6 +1343,7 @@ multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
|
|||
(IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>;
|
||||
}
|
||||
|
||||
let Predicates = [NoShortForwardBranchOpt] in
|
||||
defm Select_GPR : SelectCC_GPR_rrirr<GPR>;
|
||||
|
||||
class SelectCompressOpt<CondCode Cond>: Pat<(riscv_selectcc_frag:$select GPR:$lhs, simm12_no6:$Constant, Cond,
|
||||
|
|
|
@ -246,4 +246,5 @@ defm : UnsupportedSchedZbs;
|
|||
defm : UnsupportedSchedZbkb;
|
||||
defm : UnsupportedSchedZbkx;
|
||||
defm : UnsupportedSchedZfh;
|
||||
defm : UnsupportedSchedSFB;
|
||||
}
|
||||
|
|
|
@ -44,6 +44,12 @@ def : WriteRes<WriteJal, [SiFive7PipeB]>;
|
|||
def : WriteRes<WriteJalr, [SiFive7PipeB]>;
|
||||
def : WriteRes<WriteJmpReg, [SiFive7PipeB]>;
|
||||
|
||||
//Short forward branch
|
||||
def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
// Integer arithmetic and logic
|
||||
let Latency = 3 in {
|
||||
def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
|
||||
|
@ -223,6 +229,8 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
|
|||
def : ReadAdvance<ReadFClass32, 0>;
|
||||
def : ReadAdvance<ReadFClass64, 0>;
|
||||
|
||||
def : ReadAdvance<ReadSFB, 0>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Unsupported extensions
|
||||
defm : UnsupportedSchedV;
|
||||
|
|
|
@ -105,6 +105,10 @@ def WriteFST16 : SchedWrite; // Floating point sp store
|
|||
def WriteFST32 : SchedWrite; // Floating point sp store
|
||||
def WriteFST64 : SchedWrite; // Floating point dp store
|
||||
|
||||
// short forward branch for Bullet
|
||||
def WriteSFB : SchedWrite;
|
||||
def ReadSFB : SchedRead;
|
||||
|
||||
/// Define scheduler resources associated with use operands.
|
||||
def ReadJmp : SchedRead;
|
||||
def ReadJalr : SchedRead;
|
||||
|
@ -229,6 +233,14 @@ def : ReadAdvance<ReadFSqrt16, 0>;
|
|||
} // Unsupported = true
|
||||
}
|
||||
|
||||
multiclass UnsupportedSchedSFB {
|
||||
let Unsupported = true in {
|
||||
def : WriteRes<WriteSFB, []>;
|
||||
|
||||
def : ReadAdvance<ReadSFB, 0>;
|
||||
} // Unsupported = true
|
||||
}
|
||||
|
||||
// Include the scheduler resources for other instruction extensions.
|
||||
include "RISCVScheduleZb.td"
|
||||
include "RISCVScheduleV.td"
|
||||
|
|
|
@ -96,6 +96,7 @@ private:
|
|||
bool EnableDefaultUnroll = true;
|
||||
bool EnableSaveRestore = false;
|
||||
bool EnableUnalignedScalarMem = false;
|
||||
bool HasShortForwardBranchOpt = false;
|
||||
bool HasLUIADDIFusion = false;
|
||||
bool HasForcedAtomics = false;
|
||||
unsigned XLen = 32;
|
||||
|
@ -190,6 +191,7 @@ public:
|
|||
bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; }
|
||||
bool enableDefaultUnroll() const { return EnableDefaultUnroll; }
|
||||
bool enableSaveRestore() const { return EnableSaveRestore; }
|
||||
bool hasShortForwardBranchOpt() const { return HasShortForwardBranchOpt; }
|
||||
bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
|
||||
bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
|
||||
bool hasForcedAtomics() const { return HasForcedAtomics; }
|
||||
|
|
|
@ -0,0 +1,385 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck -check-prefix=NOSFB %s
|
||||
; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck -check-prefix=SFB %s
|
||||
|
||||
; The sifive-7-series can predicate a mv.
|
||||
|
||||
define signext i32 @test1(i32 signext %x, i32 signext %y, i32 signext %z) {
|
||||
; NOSFB-LABEL: test1:
|
||||
; NOSFB: # %bb.0:
|
||||
; NOSFB-NEXT: beqz a2, .LBB0_2
|
||||
; NOSFB-NEXT: # %bb.1:
|
||||
; NOSFB-NEXT: mv a0, a1
|
||||
; NOSFB-NEXT: .LBB0_2:
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: test1:
|
||||
; SFB: # %bb.0:
|
||||
; SFB-NEXT: beqz a2, .LBB0_2
|
||||
; SFB-NEXT: # %bb.1:
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB0_2:
|
||||
; SFB-NEXT: ret
|
||||
%c = icmp eq i32 %z, 0
|
||||
%b = select i1 %c, i32 %x, i32 %y
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; Same as above with select operands swapped.
|
||||
define signext i32 @test2(i32 signext %x, i32 signext %y, i32 signext %z) {
|
||||
; NOSFB-LABEL: test2:
|
||||
; NOSFB: # %bb.0:
|
||||
; NOSFB-NEXT: beqz a2, .LBB1_2
|
||||
; NOSFB-NEXT: # %bb.1:
|
||||
; NOSFB-NEXT: mv a1, a0
|
||||
; NOSFB-NEXT: .LBB1_2:
|
||||
; NOSFB-NEXT: mv a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: test2:
|
||||
; SFB: # %bb.0:
|
||||
; SFB-NEXT: bnez a2, .LBB1_2
|
||||
; SFB-NEXT: # %bb.1:
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB1_2:
|
||||
; SFB-NEXT: ret
|
||||
%c = icmp eq i32 %z, 0
|
||||
%b = select i1 %c, i32 %y, i32 %x
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; Make sure we don't share the same basic block for two selects with the same
|
||||
; condition this would break the predication.
|
||||
define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 signext %y, i32 signext %z) {
|
||||
; NOSFB-LABEL: test3:
|
||||
; NOSFB: # %bb.0:
|
||||
; NOSFB-NEXT: beqz a4, .LBB2_2
|
||||
; NOSFB-NEXT: # %bb.1:
|
||||
; NOSFB-NEXT: mv a1, a0
|
||||
; NOSFB-NEXT: mv a2, a3
|
||||
; NOSFB-NEXT: .LBB2_2:
|
||||
; NOSFB-NEXT: addw a0, a1, a2
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: test3:
|
||||
; SFB: # %bb.0:
|
||||
; SFB-NEXT: bnez a4, .LBB2_2
|
||||
; SFB-NEXT: # %bb.1:
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB2_2:
|
||||
; SFB-NEXT: beqz a4, .LBB2_4
|
||||
; SFB-NEXT: # %bb.3:
|
||||
; SFB-NEXT: mv a2, a3
|
||||
; SFB-NEXT: .LBB2_4:
|
||||
; SFB-NEXT: addw a0, a0, a2
|
||||
; SFB-NEXT: ret
|
||||
%c = icmp eq i32 %z, 0
|
||||
%a = select i1 %c, i32 %w, i32 %v
|
||||
%b = select i1 %c, i32 %x, i32 %y
|
||||
%d = add i32 %a, %b
|
||||
ret i32 %d
|
||||
}
|
||||
|
||||
; Test with false value 0.
|
||||
define signext i32 @test4(i32 signext %x, i32 signext %z) {
|
||||
; NOSFB-LABEL: test4:
|
||||
; NOSFB: # %bb.0:
|
||||
; NOSFB-NEXT: snez a1, a1
|
||||
; NOSFB-NEXT: addi a1, a1, -1
|
||||
; NOSFB-NEXT: and a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: test4:
|
||||
; SFB: # %bb.0:
|
||||
; SFB-NEXT: beqz a1, .LBB3_2
|
||||
; SFB-NEXT: # %bb.1:
|
||||
; SFB-NEXT: li a0, 0
|
||||
; SFB-NEXT: .LBB3_2:
|
||||
; SFB-NEXT: ret
|
||||
%c = icmp eq i32 %z, 0
|
||||
%b = select i1 %c, i32 %x, i32 0
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; Same as above with select operands swapped.
|
||||
define signext i32 @test5(i32 signext %x, i32 signext %z) {
|
||||
; NOSFB-LABEL: test5:
|
||||
; NOSFB: # %bb.0:
|
||||
; NOSFB-NEXT: seqz a1, a1
|
||||
; NOSFB-NEXT: addi a1, a1, -1
|
||||
; NOSFB-NEXT: and a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: test5:
|
||||
; SFB: # %bb.0:
|
||||
; SFB-NEXT: bnez a1, .LBB4_2
|
||||
; SFB-NEXT: # %bb.1:
|
||||
; SFB-NEXT: li a0, 0
|
||||
; SFB-NEXT: .LBB4_2:
|
||||
; SFB-NEXT: ret
|
||||
%c = icmp eq i32 %z, 0
|
||||
%b = select i1 %c, i32 0, i32 %x
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; Test with false value -1.
|
||||
define signext i32 @test6(i32 signext %x, i32 signext %z) {
|
||||
; NOSFB-LABEL: test6:
|
||||
; NOSFB: # %bb.0:
|
||||
; NOSFB-NEXT: seqz a1, a1
|
||||
; NOSFB-NEXT: addi a1, a1, -1
|
||||
; NOSFB-NEXT: or a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: test6:
|
||||
; SFB: # %bb.0:
|
||||
; SFB-NEXT: li a2, -1
|
||||
; SFB-NEXT: beqz a1, .LBB5_2
|
||||
; SFB-NEXT: # %bb.1:
|
||||
; SFB-NEXT: mv a0, a2
|
||||
; SFB-NEXT: .LBB5_2:
|
||||
; SFB-NEXT: ret
|
||||
%c = icmp eq i32 %z, 0
|
||||
%b = select i1 %c, i32 %x, i32 -1
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; Same as above with select operands swapped.
|
||||
define signext i32 @test7(i32 signext %x, i32 signext %z) {
|
||||
; NOSFB-LABEL: test7:
|
||||
; NOSFB: # %bb.0:
|
||||
; NOSFB-NEXT: snez a1, a1
|
||||
; NOSFB-NEXT: addi a1, a1, -1
|
||||
; NOSFB-NEXT: or a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: test7:
|
||||
; SFB: # %bb.0:
|
||||
; SFB-NEXT: li a2, -1
|
||||
; SFB-NEXT: bnez a1, .LBB6_2
|
||||
; SFB-NEXT: # %bb.1:
|
||||
; SFB-NEXT: mv a0, a2
|
||||
; SFB-NEXT: .LBB6_2:
|
||||
; SFB-NEXT: ret
|
||||
%c = icmp eq i32 %z, 0
|
||||
%b = select i1 %c, i32 -1, i32 %x
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i16 @select_xor_1(i16 %A, i8 %cond) {
|
||||
; NOSFB-LABEL: select_xor_1:
|
||||
; NOSFB: # %bb.0: # %entry
|
||||
; NOSFB-NEXT: andi a1, a1, 1
|
||||
; NOSFB-NEXT: negw a1, a1
|
||||
; NOSFB-NEXT: andi a1, a1, 43
|
||||
; NOSFB-NEXT: xor a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: select_xor_1:
|
||||
; SFB: # %bb.0: # %entry
|
||||
; SFB-NEXT: andi a1, a1, 1
|
||||
; SFB-NEXT: xori a2, a0, 43
|
||||
; SFB-NEXT: beqz a1, .LBB7_2
|
||||
; SFB-NEXT: # %bb.1: # %entry
|
||||
; SFB-NEXT: mv a0, a2
|
||||
; SFB-NEXT: .LBB7_2: # %entry
|
||||
; SFB-NEXT: ret
|
||||
entry:
|
||||
%and = and i8 %cond, 1
|
||||
%cmp10 = icmp eq i8 %and, 0
|
||||
%0 = xor i16 %A, 43
|
||||
%1 = select i1 %cmp10, i16 %A, i16 %0
|
||||
ret i16 %1
|
||||
}
|
||||
|
||||
; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
|
||||
; icmp eq (and %cond, 1), 0
|
||||
define i16 @select_xor_1b(i16 %A, i8 %cond) {
|
||||
; NOSFB-LABEL: select_xor_1b:
|
||||
; NOSFB: # %bb.0: # %entry
|
||||
; NOSFB-NEXT: andi a1, a1, 1
|
||||
; NOSFB-NEXT: negw a1, a1
|
||||
; NOSFB-NEXT: andi a1, a1, 43
|
||||
; NOSFB-NEXT: xor a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: select_xor_1b:
|
||||
; SFB: # %bb.0: # %entry
|
||||
; SFB-NEXT: andi a1, a1, 1
|
||||
; SFB-NEXT: xori a2, a0, 43
|
||||
; SFB-NEXT: beqz a1, .LBB8_2
|
||||
; SFB-NEXT: # %bb.1: # %entry
|
||||
; SFB-NEXT: mv a0, a2
|
||||
; SFB-NEXT: .LBB8_2: # %entry
|
||||
; SFB-NEXT: ret
|
||||
entry:
|
||||
%and = and i8 %cond, 1
|
||||
%cmp10 = icmp ne i8 %and, 1
|
||||
%0 = xor i16 %A, 43
|
||||
%1 = select i1 %cmp10, i16 %A, i16 %0
|
||||
ret i16 %1
|
||||
}
|
||||
|
||||
define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
|
||||
; NOSFB-LABEL: select_xor_2:
|
||||
; NOSFB: # %bb.0: # %entry
|
||||
; NOSFB-NEXT: andi a2, a2, 1
|
||||
; NOSFB-NEXT: neg a2, a2
|
||||
; NOSFB-NEXT: and a1, a1, a2
|
||||
; NOSFB-NEXT: xor a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: select_xor_2:
|
||||
; SFB: # %bb.0: # %entry
|
||||
; SFB-NEXT: andi a2, a2, 1
|
||||
; SFB-NEXT: xor a1, a1, a0
|
||||
; SFB-NEXT: beqz a2, .LBB9_2
|
||||
; SFB-NEXT: # %bb.1: # %entry
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB9_2: # %entry
|
||||
; SFB-NEXT: ret
|
||||
entry:
|
||||
%and = and i8 %cond, 1
|
||||
%cmp10 = icmp eq i8 %and, 0
|
||||
%0 = xor i32 %B, %A
|
||||
%1 = select i1 %cmp10, i32 %A, i32 %0
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
|
||||
; icmp eq (and %cond, 1), 0
|
||||
define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
|
||||
; NOSFB-LABEL: select_xor_2b:
|
||||
; NOSFB: # %bb.0: # %entry
|
||||
; NOSFB-NEXT: andi a2, a2, 1
|
||||
; NOSFB-NEXT: neg a2, a2
|
||||
; NOSFB-NEXT: and a1, a1, a2
|
||||
; NOSFB-NEXT: xor a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: select_xor_2b:
|
||||
; SFB: # %bb.0: # %entry
|
||||
; SFB-NEXT: andi a2, a2, 1
|
||||
; SFB-NEXT: xor a1, a1, a0
|
||||
; SFB-NEXT: beqz a2, .LBB10_2
|
||||
; SFB-NEXT: # %bb.1: # %entry
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB10_2: # %entry
|
||||
; SFB-NEXT: ret
|
||||
entry:
|
||||
%and = and i8 %cond, 1
|
||||
%cmp10 = icmp ne i8 %and, 1
|
||||
%0 = xor i32 %B, %A
|
||||
%1 = select i1 %cmp10, i32 %A, i32 %0
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
|
||||
; NOSFB-LABEL: select_or:
|
||||
; NOSFB: # %bb.0: # %entry
|
||||
; NOSFB-NEXT: andi a2, a2, 1
|
||||
; NOSFB-NEXT: neg a2, a2
|
||||
; NOSFB-NEXT: and a1, a1, a2
|
||||
; NOSFB-NEXT: or a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: select_or:
|
||||
; SFB: # %bb.0: # %entry
|
||||
; SFB-NEXT: andi a2, a2, 1
|
||||
; SFB-NEXT: or a1, a1, a0
|
||||
; SFB-NEXT: beqz a2, .LBB11_2
|
||||
; SFB-NEXT: # %bb.1: # %entry
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB11_2: # %entry
|
||||
; SFB-NEXT: ret
|
||||
entry:
|
||||
%and = and i8 %cond, 1
|
||||
%cmp10 = icmp eq i8 %and, 0
|
||||
%0 = or i32 %B, %A
|
||||
%1 = select i1 %cmp10, i32 %A, i32 %0
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
|
||||
; icmp eq (and %cond, 1), 0
|
||||
define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
|
||||
; NOSFB-LABEL: select_or_b:
|
||||
; NOSFB: # %bb.0: # %entry
|
||||
; NOSFB-NEXT: andi a2, a2, 1
|
||||
; NOSFB-NEXT: neg a2, a2
|
||||
; NOSFB-NEXT: and a1, a1, a2
|
||||
; NOSFB-NEXT: or a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: select_or_b:
|
||||
; SFB: # %bb.0: # %entry
|
||||
; SFB-NEXT: andi a2, a2, 1
|
||||
; SFB-NEXT: or a1, a1, a0
|
||||
; SFB-NEXT: beqz a2, .LBB12_2
|
||||
; SFB-NEXT: # %bb.1: # %entry
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB12_2: # %entry
|
||||
; SFB-NEXT: ret
|
||||
entry:
|
||||
%and = and i8 %cond, 1
|
||||
%cmp10 = icmp ne i8 %and, 1
|
||||
%0 = or i32 %B, %A
|
||||
%1 = select i1 %cmp10, i32 %A, i32 %0
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
|
||||
; NOSFB-LABEL: select_or_1:
|
||||
; NOSFB: # %bb.0: # %entry
|
||||
; NOSFB-NEXT: andi a2, a2, 1
|
||||
; NOSFB-NEXT: neg a2, a2
|
||||
; NOSFB-NEXT: and a1, a1, a2
|
||||
; NOSFB-NEXT: or a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: select_or_1:
|
||||
; SFB: # %bb.0: # %entry
|
||||
; SFB-NEXT: andi a2, a2, 1
|
||||
; SFB-NEXT: or a1, a1, a0
|
||||
; SFB-NEXT: beqz a2, .LBB13_2
|
||||
; SFB-NEXT: # %bb.1: # %entry
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB13_2: # %entry
|
||||
; SFB-NEXT: ret
|
||||
entry:
|
||||
%and = and i32 %cond, 1
|
||||
%cmp10 = icmp eq i32 %and, 0
|
||||
%0 = or i32 %B, %A
|
||||
%1 = select i1 %cmp10, i32 %A, i32 %0
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
|
||||
; icmp eq (and %cond, 1), 0
|
||||
define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
|
||||
; NOSFB-LABEL: select_or_1b:
|
||||
; NOSFB: # %bb.0: # %entry
|
||||
; NOSFB-NEXT: andi a2, a2, 1
|
||||
; NOSFB-NEXT: neg a2, a2
|
||||
; NOSFB-NEXT: and a1, a1, a2
|
||||
; NOSFB-NEXT: or a0, a0, a1
|
||||
; NOSFB-NEXT: ret
|
||||
;
|
||||
; SFB-LABEL: select_or_1b:
|
||||
; SFB: # %bb.0: # %entry
|
||||
; SFB-NEXT: andi a2, a2, 1
|
||||
; SFB-NEXT: or a1, a1, a0
|
||||
; SFB-NEXT: beqz a2, .LBB14_2
|
||||
; SFB-NEXT: # %bb.1: # %entry
|
||||
; SFB-NEXT: mv a0, a1
|
||||
; SFB-NEXT: .LBB14_2: # %entry
|
||||
; SFB-NEXT: ret
|
||||
entry:
|
||||
%and = and i32 %cond, 1
|
||||
%cmp10 = icmp ne i32 %and, 1
|
||||
%0 = or i32 %B, %A
|
||||
%1 = select i1 %cmp10, i32 %A, i32 %0
|
||||
ret i32 %1
|
||||
}
|
Loading…
Reference in New Issue