Revert "Reapply "[CodeGen] Add new pass for late cleanup of redundant definitions.""
This reverts commit 17db0de330
.
Some more bots got broken - need to investigate.
This commit is contained in:
parent
2577cb7a20
commit
122efef8ee
|
@ -1130,9 +1130,6 @@ void CodeGenPassBuilder<Derived>::addMachineLateOptimization(
|
|||
if (!TM.requiresStructuredCFG())
|
||||
addPass(TailDuplicatePass());
|
||||
|
||||
// Cleanup of redundant (identical) address/immediate loads.
|
||||
addPass(MachineLateInstrsCleanupPass());
|
||||
|
||||
// Copy propagation.
|
||||
addPass(MachineCopyPropagationPass());
|
||||
}
|
||||
|
|
|
@ -151,7 +151,6 @@ DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ())
|
|||
DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ())
|
||||
|
|
|
@ -334,10 +334,6 @@ namespace llvm {
|
|||
|
||||
MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr);
|
||||
|
||||
/// MachineLateInstrsCleanup - This pass removes redundant identical
|
||||
/// instructions after register allocation and rematerialization.
|
||||
extern char &MachineLateInstrsCleanupID;
|
||||
|
||||
/// PeepholeOptimizer - This pass performs peephole optimizations -
|
||||
/// like extension and comparison eliminations.
|
||||
extern char &PeepholeOptimizerID;
|
||||
|
|
|
@ -277,7 +277,6 @@ void initializeMachineDominanceFrontierPass(PassRegistry&);
|
|||
void initializeMachineDominatorTreePass(PassRegistry&);
|
||||
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
|
||||
void initializeMachineFunctionSplitterPass(PassRegistry &);
|
||||
void initializeMachineLateInstrsCleanupPass(PassRegistry&);
|
||||
void initializeMachineLICMPass(PassRegistry&);
|
||||
void initializeMachineLoopInfoPass(PassRegistry&);
|
||||
void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);
|
||||
|
|
|
@ -119,7 +119,6 @@ add_llvm_component_library(LLVMCodeGen
|
|||
MachineFunctionSplitter.cpp
|
||||
MachineInstrBundle.cpp
|
||||
MachineInstr.cpp
|
||||
MachineLateInstrsCleanup.cpp
|
||||
MachineLICM.cpp
|
||||
MachineLoopInfo.cpp
|
||||
MachineLoopUtils.cpp
|
||||
|
|
|
@ -78,7 +78,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
|
|||
initializeMachineCycleInfoWrapperPassPass(Registry);
|
||||
initializeMachineDominatorTreePass(Registry);
|
||||
initializeMachineFunctionPrinterPassPass(Registry);
|
||||
initializeMachineLateInstrsCleanupPass(Registry);
|
||||
initializeMachineLICMPass(Registry);
|
||||
initializeMachineLoopInfoPass(Registry);
|
||||
initializeMachineModuleInfoWrapperPassPass(Registry);
|
||||
|
|
|
@ -1,240 +0,0 @@
|
|||
//==--- MachineLateInstrsCleanup.cpp - Late Instructions Cleanup Pass -----===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This simple pass removes any identical and redundant immediate or address
|
||||
// loads to the same register. The immediate loads removed can originally be
|
||||
// the result of rematerialization, while the addresses are redundant frame
|
||||
// addressing anchor points created during Frame Indices elimination.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "machine-latecleanup"
|
||||
|
||||
STATISTIC(NumRemoved, "Number of redundant instructions removed.");
|
||||
|
||||
namespace {
|
||||
|
||||
class MachineLateInstrsCleanup : public MachineFunctionPass {
|
||||
const TargetRegisterInfo *TRI;
|
||||
const TargetInstrInfo *TII;
|
||||
|
||||
// Data structures to map regs to their definitions per MBB.
|
||||
using Reg2DefMap = std::map<Register, MachineInstr*>;
|
||||
std::vector<Reg2DefMap> RegDefs;
|
||||
|
||||
// Walk through the instructions in MBB and remove any redundant
|
||||
// instructions.
|
||||
bool processBlock(MachineBasicBlock *MBB);
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
|
||||
MachineLateInstrsCleanup() : MachineFunctionPass(ID) {
|
||||
initializeMachineLateInstrsCleanupPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
MachineFunctionProperties getRequiredProperties() const override {
|
||||
return MachineFunctionProperties().set(
|
||||
MachineFunctionProperties::Property::NoVRegs);
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char MachineLateInstrsCleanup::ID = 0;
|
||||
|
||||
char &llvm::MachineLateInstrsCleanupID = MachineLateInstrsCleanup::ID;
|
||||
|
||||
INITIALIZE_PASS(MachineLateInstrsCleanup, DEBUG_TYPE,
|
||||
"Machine Late Instructions Cleanup Pass", false, false)
|
||||
|
||||
bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
TRI = MF.getSubtarget().getRegisterInfo();
|
||||
TII = MF.getSubtarget().getInstrInfo();
|
||||
|
||||
RegDefs.clear();
|
||||
RegDefs.resize(MF.getNumBlockIDs());
|
||||
|
||||
// Visit all MBBs in an order that maximises the reuse from predecessors.
|
||||
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
|
||||
for (MachineBasicBlock *MBB : RPOT)
|
||||
Changed |= processBlock(MBB);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// Clear any previous kill flag on Reg found before I in MBB. Walk backwards
|
||||
// in MBB and if needed continue in predecessors until a use/def of Reg is
|
||||
// encountered. This seems to be faster in practice than tracking kill flags
|
||||
// in a map.
|
||||
static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
BitVector &VisitedPreds,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
VisitedPreds.set(MBB->getNumber());
|
||||
while (I != MBB->begin()) {
|
||||
I--;
|
||||
bool Found = false;
|
||||
for (auto &MO : I->operands())
|
||||
if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
|
||||
if (MO.isDef())
|
||||
return;
|
||||
if (MO.readsReg()) {
|
||||
MO.setIsKill(false);
|
||||
Found = true; // Keep going for an implicit kill of the super-reg.
|
||||
}
|
||||
}
|
||||
if (Found)
|
||||
return;
|
||||
}
|
||||
|
||||
// If an earlier def is not in MBB, continue in predecessors.
|
||||
if (!MBB->isLiveIn(Reg))
|
||||
MBB->addLiveIn(Reg);
|
||||
assert(!MBB->pred_empty() && "Predecessor def not found!");
|
||||
for (MachineBasicBlock *Pred : MBB->predecessors())
|
||||
if (!VisitedPreds.test(Pred->getNumber()))
|
||||
clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
|
||||
}
|
||||
|
||||
static void removeRedundantDef(MachineInstr *MI,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
Register Reg = MI->getOperand(0).getReg();
|
||||
BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
|
||||
clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
|
||||
MI->eraseFromParent();
|
||||
++NumRemoved;
|
||||
}
|
||||
|
||||
// Return true if MI is a potential candidate for reuse/removal and if so
|
||||
// also the register it defines in DefedReg. A candidate is a simple
|
||||
// instruction that does not touch memory, has only one register definition
|
||||
// and the only reg it may use is FrameReg. Typically this is an immediate
|
||||
// load or a load-address instruction.
|
||||
static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
|
||||
Register FrameReg) {
|
||||
DefedReg = MCRegister::NoRegister;
|
||||
bool SawStore = true;
|
||||
if (!MI->isSafeToMove(nullptr, SawStore) || MI->isImplicitDef() ||
|
||||
MI->isInlineAsm())
|
||||
return false;
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
if (MO.isReg()) {
|
||||
if (MO.isDef()) {
|
||||
if (i == 0 && !MO.isImplicit() && !MO.isDead())
|
||||
DefedReg = MO.getReg();
|
||||
else
|
||||
return false;
|
||||
} else if (MO.getReg() && MO.getReg() != FrameReg)
|
||||
return false;
|
||||
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
|
||||
MO.isGlobal() || MO.isSymbol()))
|
||||
return false;
|
||||
}
|
||||
return DefedReg.isValid();
|
||||
}
|
||||
|
||||
bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
|
||||
bool Changed = false;
|
||||
|
||||
Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];
|
||||
|
||||
// Find reusable definitions in the predecessor(s).
|
||||
if (!MBB->pred_empty()) {
|
||||
MachineBasicBlock *FirstPred = *MBB->pred_begin();
|
||||
for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()])
|
||||
if (llvm::all_of(
|
||||
drop_begin(MBB->predecessors()),
|
||||
[&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
|
||||
auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
|
||||
return PredDefI != RegDefs[Pred->getNumber()].end() &&
|
||||
DefMI->isIdenticalTo(*PredDefI->second);
|
||||
})) {
|
||||
MBBDefs[Reg] = DefMI;
|
||||
LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
|
||||
<< printMBBReference(*MBB) << ": " << *DefMI;);
|
||||
}
|
||||
}
|
||||
|
||||
// Process MBB.
|
||||
MachineFunction *MF = MBB->getParent();
|
||||
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
|
||||
Register FrameReg = TRI->getFrameRegister(*MF);
|
||||
for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
|
||||
// If FrameReg is modified, no previous load-address instructions are valid.
|
||||
if (MI.modifiesRegister(FrameReg, TRI)) {
|
||||
MBBDefs.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
Register DefedReg;
|
||||
bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
|
||||
|
||||
// Check for an earlier identical and reusable instruction.
|
||||
if (IsCandidate) {
|
||||
auto DefI = MBBDefs.find(DefedReg);
|
||||
if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
|
||||
LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
|
||||
<< printMBBReference(*MBB) << ": " << MI;);
|
||||
removeRedundantDef(&MI, TRI);
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Clear any entries in map that MI clobbers.
|
||||
for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
|
||||
Register Reg = DefI->first;
|
||||
if (MI.modifiesRegister(Reg, TRI))
|
||||
DefI = MBBDefs.erase(DefI);
|
||||
else
|
||||
++DefI;
|
||||
}
|
||||
|
||||
// Record this MI for potential later reuse.
|
||||
if (IsCandidate) {
|
||||
LLVM_DEBUG(dbgs() << "Found interesting instruction in "
|
||||
<< printMBBReference(*MBB) << ": " << MI;);
|
||||
MBBDefs[DefedReg] = &MI;
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
|
@ -1522,9 +1522,6 @@ void TargetPassConfig::addOptimizedRegAlloc() {
|
|||
|
||||
/// Add passes that optimize machine instructions after register allocation.
|
||||
void TargetPassConfig::addMachineLateOptimization() {
|
||||
// Cleanup of redundant immediate/address loads.
|
||||
addPass(&MachineLateInstrsCleanupID);
|
||||
|
||||
// Branch folding must be run after regalloc and prolog/epilog insertion.
|
||||
addPass(&BranchFolderPassID);
|
||||
|
||||
|
|
|
@ -291,7 +291,6 @@ void NVPTXPassConfig::addIRPasses() {
|
|||
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
|
||||
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
|
||||
disablePass(&PrologEpilogCodeInserterID);
|
||||
disablePass(&MachineLateInstrsCleanupID);
|
||||
disablePass(&MachineCopyPropagationID);
|
||||
disablePass(&TailDuplicateID);
|
||||
disablePass(&StackMapLivenessID);
|
||||
|
|
|
@ -286,10 +286,6 @@ void RISCVPassConfig::addPreRegAlloc() {
|
|||
void RISCVPassConfig::addPostRegAlloc() {
|
||||
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
|
||||
addPass(createRISCVRedundantCopyEliminationPass());
|
||||
|
||||
// Temporarily disabled until post-RA pseudo expansion problem is fixed,
|
||||
// see D123394 and D139169.
|
||||
disablePass(&MachineLateInstrsCleanupID);
|
||||
}
|
||||
|
||||
yaml::MachineFunctionInfo *
|
||||
|
|
|
@ -501,7 +501,6 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
|
|||
// them.
|
||||
|
||||
// These functions all require the NoVRegs property.
|
||||
disablePass(&MachineLateInstrsCleanupID);
|
||||
disablePass(&MachineCopyPropagationID);
|
||||
disablePass(&PostRAMachineSinkingID);
|
||||
disablePass(&PostRASchedulerID);
|
||||
|
|
|
@ -188,7 +188,6 @@
|
|||
; CHECK-NEXT: Machine Optimization Remark Emitter
|
||||
; CHECK-NEXT: Shrink Wrapping analysis
|
||||
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
|
||||
; CHECK-NEXT: Control Flow Optimizer
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Tail Duplication
|
||||
|
|
|
@ -29,8 +29,14 @@ define i32 @test_stack_guard_remat2() ssp {
|
|||
; CHECK-NEXT: Lloh5:
|
||||
; CHECK-NEXT: ldr x9, [x9]
|
||||
; CHECK-NEXT: str x8, [sp]
|
||||
; CHECK-NEXT: Lloh6:
|
||||
; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE
|
||||
; CHECK-NEXT: stur x9, [x29, #-8]
|
||||
; CHECK-NEXT: Lloh7:
|
||||
; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]
|
||||
; CHECK-NEXT: ldur x9, [x29, #-8]
|
||||
; CHECK-NEXT: Lloh8:
|
||||
; CHECK-NEXT: ldr x8, [x8]
|
||||
; CHECK-NEXT: cmp x8, x9
|
||||
; CHECK-NEXT: b.ne LBB0_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %entry
|
||||
|
@ -40,6 +46,7 @@ define i32 @test_stack_guard_remat2() ssp {
|
|||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: LBB0_2: ; %entry
|
||||
; CHECK-NEXT: bl ___stack_chk_fail
|
||||
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh6, Lloh7, Lloh8
|
||||
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh1, Lloh3, Lloh5
|
||||
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh2, Lloh4
|
||||
entry:
|
||||
|
|
|
@ -59,23 +59,26 @@ define float @foo2(double* %x0, double* %x1) nounwind {
|
|||
; CHECK-NEXT: addvl sp, sp, #-4
|
||||
; CHECK-NEXT: sub sp, sp, #16
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: add x8, sp, #16
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: add x8, sp, #16
|
||||
; CHECK-NEXT: fmov s0, #1.00000000
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: mov w1, #1
|
||||
; CHECK-NEXT: mov w2, #2
|
||||
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: mov w3, #3
|
||||
; CHECK-NEXT: mov w4, #4
|
||||
; CHECK-NEXT: mov w5, #5
|
||||
; CHECK-NEXT: mov w6, #6
|
||||
; CHECK-NEXT: mov w7, #7
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
|
||||
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: mov w7, #7
|
||||
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: str x8, [sp]
|
||||
; CHECK-NEXT: bl callee2
|
||||
|
|
|
@ -157,6 +157,8 @@ define amdgpu_kernel void @kernel_caller_byval() {
|
|||
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16
|
||||
; FLATSCR-NEXT: s_mov_b32 s11, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 s10, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 s9, 0
|
||||
|
@ -169,8 +171,9 @@ define amdgpu_kernel void @kernel_caller_byval() {
|
|||
; FLATSCR-NEXT: s_mov_b32 s4, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 s3, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 s2, 0
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16
|
||||
; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 s40, 0
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:24
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:32
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:40
|
||||
|
@ -185,7 +188,6 @@ define amdgpu_kernel void @kernel_caller_byval() {
|
|||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s2 offset:112
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128
|
||||
; FLATSCR-NEXT: s_mov_b32 s40, 0
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s40 offset:8
|
||||
; FLATSCR-NEXT: s_mov_b32 s39, 0
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s39 offset:16
|
||||
|
|
|
@ -1354,6 +1354,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
|
|||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: s_mov_b32 s6, 0
|
||||
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX7-NEXT: s_cbranch_execz .LBB13_2
|
||||
|
|
|
@ -537,6 +537,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
|
|||
; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
|
||||
; GFX803-NEXT: ;;#ASMSTART
|
||||
; GFX803-NEXT: ;;#ASMEND
|
||||
; GFX803-NEXT: s_mov_b32 s4, 0x40000
|
||||
; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
|
||||
; GFX803-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
|
||||
|
@ -553,6 +554,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
|
|||
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
|
||||
; GFX900-NEXT: ;;#ASMSTART
|
||||
; GFX900-NEXT: ;;#ASMEND
|
||||
; GFX900-NEXT: s_mov_b32 s4, 0x40000
|
||||
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
|
||||
|
@ -567,6 +569,8 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
|
|||
; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc dlc
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
|
||||
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1010-NEXT: s_mov_b32 s4, 0x20000
|
||||
; GFX1010-NEXT: ;;#ASMSTART
|
||||
; GFX1010-NEXT: ;;#ASMEND
|
||||
; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
|
||||
|
@ -581,6 +585,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
|
|||
; GFX1100-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1100-NEXT: s_movk_i32 s0, 0x1000
|
||||
; GFX1100-NEXT: scratch_store_b32 off, v0, s0 ; 4-byte Folded Spill
|
||||
; GFX1100-NEXT: s_movk_i32 s0, 0x1000
|
||||
; GFX1100-NEXT: ;;#ASMSTART
|
||||
; GFX1100-NEXT: ;;#ASMEND
|
||||
; GFX1100-NEXT: scratch_load_b32 v0, off, s0 ; 4-byte Folded Reload
|
||||
|
|
|
@ -76,10 +76,12 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1
|
|||
; CHECK-NEXT: ; %bb.10: ; %bb16
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 s[16:17], 0
|
||||
; CHECK-NEXT: s_mov_b64 s[20:21], -1
|
||||
; CHECK-NEXT: s_mov_b64 s[22:23], s[10:11]
|
||||
; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17]
|
||||
; CHECK-NEXT: s_branch .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 s[22:23], -1
|
||||
; CHECK-NEXT: s_mov_b64 s[20:21], 0
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr16_sgpr17
|
||||
; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17]
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -374,7 +374,6 @@
|
|||
; GCN-O1-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O1-NEXT: Shrink Wrapping analysis
|
||||
; GCN-O1-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; GCN-O1-NEXT: Machine Late Instructions Cleanup Pass
|
||||
; GCN-O1-NEXT: Control Flow Optimizer
|
||||
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O1-NEXT: Tail Duplication
|
||||
|
@ -671,7 +670,6 @@
|
|||
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O1-OPTS-NEXT: Shrink Wrapping analysis
|
||||
; GCN-O1-OPTS-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; GCN-O1-OPTS-NEXT: Machine Late Instructions Cleanup Pass
|
||||
; GCN-O1-OPTS-NEXT: Control Flow Optimizer
|
||||
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O1-OPTS-NEXT: Tail Duplication
|
||||
|
@ -970,7 +968,6 @@
|
|||
; GCN-O2-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O2-NEXT: Shrink Wrapping analysis
|
||||
; GCN-O2-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; GCN-O2-NEXT: Machine Late Instructions Cleanup Pass
|
||||
; GCN-O2-NEXT: Control Flow Optimizer
|
||||
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O2-NEXT: Tail Duplication
|
||||
|
@ -1282,7 +1279,6 @@
|
|||
; GCN-O3-NEXT: Machine Optimization Remark Emitter
|
||||
; GCN-O3-NEXT: Shrink Wrapping analysis
|
||||
; GCN-O3-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; GCN-O3-NEXT: Machine Late Instructions Cleanup Pass
|
||||
; GCN-O3-NEXT: Control Flow Optimizer
|
||||
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; GCN-O3-NEXT: Tail Duplication
|
||||
|
|
|
@ -188,6 +188,7 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
|||
; GCN-NEXT: ; %bb.3: ; %LeafBlock1
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GCN-NEXT: s_cmp_eq_u32 s8, 1
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], -1
|
||||
; GCN-NEXT: s_cbranch_scc0 .LBB1_5
|
||||
; GCN-NEXT: ; %bb.4: ; %case1
|
||||
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
|
|
|
@ -187,6 +187,8 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
|
|||
; SI-NEXT: s_branch .LBB3_3
|
||||
; SI-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[8:9], 0
|
||||
; SI-NEXT: s_mov_b64 s[12:13], -1
|
||||
; SI-NEXT: s_mov_b64 s[14:15], -1
|
||||
; SI-NEXT: .LBB3_2: ; %Flow
|
||||
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[14:15]
|
||||
|
@ -204,6 +206,7 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
|
|||
; SI-NEXT: s_cbranch_vccz .LBB3_1
|
||||
; SI-NEXT: ; %bb.5: ; %if.end
|
||||
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[14:15], -1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[6:7]
|
||||
; SI-NEXT: s_cbranch_vccz .LBB3_7
|
||||
; SI-NEXT: ; %bb.6: ; %if.else
|
||||
|
@ -260,6 +263,8 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
|
|||
; FLAT-NEXT: s_branch .LBB3_3
|
||||
; FLAT-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[8:9], 0
|
||||
; FLAT-NEXT: s_mov_b64 s[12:13], -1
|
||||
; FLAT-NEXT: s_mov_b64 s[14:15], -1
|
||||
; FLAT-NEXT: .LBB3_2: ; %Flow
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
|
||||
; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15]
|
||||
|
@ -277,6 +282,7 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
|
|||
; FLAT-NEXT: s_cbranch_vccz .LBB3_1
|
||||
; FLAT-NEXT: ; %bb.5: ; %if.end
|
||||
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
|
||||
; FLAT-NEXT: s_mov_b64 s[14:15], -1
|
||||
; FLAT-NEXT: s_mov_b64 vcc, s[6:7]
|
||||
; FLAT-NEXT: s_cbranch_vccz .LBB3_7
|
||||
; FLAT-NEXT: ; %bb.6: ; %if.else
|
||||
|
|
|
@ -60,6 +60,7 @@ define amdgpu_kernel void @kernel(i32 %a, i32 addrspace(1)* %x, i32 noundef %n)
|
|||
; CHECK-NEXT: s_cmp_lg_u32 s10, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB0_14
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], 0
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], -1
|
||||
; CHECK-NEXT: .LBB0_4: ; %Flow3
|
||||
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec
|
||||
|
@ -102,6 +103,7 @@ define amdgpu_kernel void @kernel(i32 %a, i32 addrspace(1)* %x, i32 noundef %n)
|
|||
; CHECK-NEXT: s_branch .LBB0_10
|
||||
; CHECK-NEXT: .LBB0_14: ; %cond.false.i8
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], -1
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], 0
|
||||
; CHECK-NEXT: s_trap 2
|
||||
; CHECK-NEXT: s_branch .LBB0_4
|
||||
entry:
|
||||
|
|
|
@ -140,6 +140,7 @@ define void @my_func(i32 %0) {
|
|||
; GCN-NEXT: s_cbranch_scc1 .LBB0_10
|
||||
; GCN-NEXT: ; %bb.9:
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
|
||||
; GCN-NEXT: s_cbranch_execnz .LBB0_3
|
||||
; GCN-NEXT: s_branch .LBB0_4
|
||||
|
@ -172,6 +173,7 @@ define void @my_func(i32 %0) {
|
|||
; GCN-NEXT: ; %bb.15: ; %LeafBlock9
|
||||
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], -1
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc
|
||||
; GCN-NEXT: ; %bb.16: ; %do.body.i.i.i.i
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], exec
|
||||
|
|
|
@ -34,6 +34,7 @@ define amdgpu_kernel void @test_inst_offset_kernel() {
|
|||
; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
|
||||
; FLATSCR-NEXT: ;;#ASMSTART
|
||||
; FLATSCR-NEXT: ;;#ASMEND
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0xffc
|
||||
; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8
|
||||
|
@ -70,6 +71,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() {
|
|||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
|
||||
; MUBUF-NEXT: ;;#ASMSTART
|
||||
; MUBUF-NEXT: ;;#ASMEND
|
||||
; MUBUF-NEXT: s_mov_b32 s4, 0x40000
|
||||
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
|
||||
|
@ -88,6 +90,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() {
|
|||
; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
|
||||
; FLATSCR-NEXT: ;;#ASMSTART
|
||||
; FLATSCR-NEXT: ;;#ASMEND
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x1000
|
||||
; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8
|
||||
|
@ -234,6 +237,7 @@ define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 {
|
|||
; FLATSCR-NEXT: ;;#ASMEND
|
||||
; FLATSCR-NEXT: ;;#ASMSTART
|
||||
; FLATSCR-NEXT: ;;#ASMEND
|
||||
; FLATSCR-NEXT: s_movk_i32 s8, 0x1004
|
||||
; FLATSCR-NEXT: scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: ;;#ASMSTART
|
||||
|
@ -316,6 +320,7 @@ define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() {
|
|||
; FLATSCR-NEXT: ;;#ASMEND
|
||||
; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0xff8
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: ;;#ASMSTART
|
||||
|
@ -362,6 +367,7 @@ define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
|
|||
; MUBUF-NEXT: ;;#ASMEND
|
||||
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_mov_b32 s4, 0x3ff00
|
||||
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
|
||||
; MUBUF-NEXT: s_nop 0
|
||||
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
|
||||
|
@ -385,6 +391,7 @@ define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
|
|||
; FLATSCR-NEXT: ;;#ASMEND
|
||||
; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0xffc
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: ;;#ASMSTART
|
||||
|
|
|
@ -10551,6 +10551,7 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
|
|||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x84800
|
||||
; GFX6-NEXT: buffer_load_dword v17, off, s[40:43], s2 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
|
||||
|
@ -10795,7 +10796,7 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
|
|||
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s0 ; 16-byte Folded Spill
|
||||
; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100
|
||||
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 ; 16-byte Folded Spill
|
||||
; GFX9-FLATSCR-NEXT: s_nop 0
|
||||
; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100
|
||||
; GFX9-FLATSCR-NEXT: ;;#ASMSTART
|
||||
; GFX9-FLATSCR-NEXT: ;;#ASMEND
|
||||
; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[8:11], off, s0 ; 16-byte Folded Reload
|
||||
|
@ -11031,6 +11032,7 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
|
|||
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v60
|
||||
; GFX10-FLATSCR-NEXT: ;;#ASMSTART
|
||||
; GFX10-FLATSCR-NEXT: ;;#ASMEND
|
||||
; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010
|
||||
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v65
|
||||
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v66
|
||||
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v67
|
||||
|
|
|
@ -149,7 +149,6 @@
|
|||
; CHECK-NEXT: Machine Optimization Remark Emitter
|
||||
; CHECK-NEXT: Shrink Wrapping analysis
|
||||
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
|
||||
; CHECK-NEXT: Control Flow Optimizer
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Tail Duplication
|
||||
|
|
|
@ -1652,6 +1652,7 @@ define void @infiniteloop3() "frame-pointer"="all" {
|
|||
; THUMB-ENABLE-NEXT: movs r0, #0
|
||||
; THUMB-ENABLE-NEXT: cbnz r0, LBB11_5
|
||||
; THUMB-ENABLE-NEXT: @ %bb.1: @ %loop2a.preheader
|
||||
; THUMB-ENABLE-NEXT: movs r0, #0
|
||||
; THUMB-ENABLE-NEXT: movs r1, #0
|
||||
; THUMB-ENABLE-NEXT: mov r2, r0
|
||||
; THUMB-ENABLE-NEXT: b LBB11_3
|
||||
|
@ -1678,6 +1679,7 @@ define void @infiniteloop3() "frame-pointer"="all" {
|
|||
; THUMB-DISABLE-NEXT: movs r0, #0
|
||||
; THUMB-DISABLE-NEXT: cbnz r0, LBB11_5
|
||||
; THUMB-DISABLE-NEXT: @ %bb.1: @ %loop2a.preheader
|
||||
; THUMB-DISABLE-NEXT: movs r0, #0
|
||||
; THUMB-DISABLE-NEXT: movs r1, #0
|
||||
; THUMB-DISABLE-NEXT: mov r2, r0
|
||||
; THUMB-DISABLE-NEXT: b LBB11_3
|
||||
|
|
|
@ -3764,6 +3764,7 @@ define i64 @stest_f32i64_mm(float %x) {
|
|||
; SOFT-NEXT: @ %bb.18: @ %entry
|
||||
; SOFT-NEXT: mov r3, r6
|
||||
; SOFT-NEXT: .LBB48_19: @ %entry
|
||||
; SOFT-NEXT: ldr r0, .LCPI48_0
|
||||
; SOFT-NEXT: cmp r4, r0
|
||||
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
|
||||
; SOFT-NEXT: beq .LBB48_21
|
||||
|
@ -4346,6 +4347,7 @@ define i64 @stest_f16i64_mm(half %x) {
|
|||
; SOFT-NEXT: @ %bb.18: @ %entry
|
||||
; SOFT-NEXT: mov r3, r6
|
||||
; SOFT-NEXT: .LBB51_19: @ %entry
|
||||
; SOFT-NEXT: ldr r0, .LCPI51_0
|
||||
; SOFT-NEXT: cmp r4, r0
|
||||
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
|
||||
; SOFT-NEXT: beq .LBB51_21
|
||||
|
|
|
@ -22,7 +22,7 @@ entry:
|
|||
; for.body -> for.cond.backedge (100%)
|
||||
; -> cond.false.i (0%)
|
||||
; CHECK: bb.1.for.body:
|
||||
; CHECK: successors: %bb.2(0x80000000), %bb.5(0x00000000)
|
||||
; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000000)
|
||||
for.body:
|
||||
br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -mtriple=armv7-apple-ios8.0 -o - %s | FileCheck %s
|
||||
|
||||
%BigInt = type i8500
|
||||
%BigInt = type i5500
|
||||
|
||||
define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
|
||||
; CHECK-LABEL: test_moved_jumptable:
|
||||
|
|
|
@ -283,6 +283,7 @@ define arm_aapcs_vfpcc i32 @t10(float %x) nounwind {
|
|||
; CHECK-NEXT: vst1.32 {d17[1]}, [r0:32]
|
||||
; CHECK-NEXT: mov r0, #0
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: movne r0, #0
|
||||
; CHECK-NEXT: bxne lr
|
||||
; CHECK-NEXT: LBB9_1:
|
||||
; CHECK-NEXT: trap
|
||||
|
|
|
@ -14,8 +14,9 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
|
|||
|
||||
; <label>:4: ; preds = %2
|
||||
br label %5
|
||||
; CHECK: if r4 s>= r3 goto +10 <LBB0_2>
|
||||
; CHECK-LABEL: <LBB0_1>:
|
||||
; CHECK: if r4 s>= r3 goto +11 <LBB0_3>
|
||||
; CHECK: r0 = 0
|
||||
; CHECK-LABEL: <LBB0_2>:
|
||||
|
||||
; <label>:5: ; preds = %4, %5
|
||||
%6 = phi i32 [ %9, %5 ], [ 0, %4 ]
|
||||
|
@ -27,12 +28,12 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
|
|||
%12 = icmp slt i32 %10, %11
|
||||
br i1 %12, label %5, label %13
|
||||
; CHECK: r1 = r3
|
||||
; CHECK: if r2 s> r3 goto -10 <LBB0_1>
|
||||
; CHECK: if r2 s> r3 goto -10 <LBB0_2>
|
||||
|
||||
; <label>:13: ; preds = %5, %2
|
||||
%14 = phi i32 [ 0, %2 ], [ %9, %5 ]
|
||||
ret i32 %14
|
||||
; CHECK-LABEL: <LBB0_2>:
|
||||
; CHECK-LABEL: <LBB0_3>:
|
||||
; CHECK: exit
|
||||
}
|
||||
attributes #0 = { norecurse nounwind readnone }
|
||||
|
|
|
@ -841,6 +841,7 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) {
|
|||
; MMR3-NEXT: or16 $6, $4
|
||||
; MMR3-NEXT: lw $4, 8($sp) # 4-byte Folded Reload
|
||||
; MMR3-NEXT: movn $1, $7, $4
|
||||
; MMR3-NEXT: li16 $7, 0
|
||||
; MMR3-NEXT: movn $1, $6, $10
|
||||
; MMR3-NEXT: lw $4, 24($sp) # 4-byte Folded Reload
|
||||
; MMR3-NEXT: movz $1, $4, $16
|
||||
|
|
|
@ -915,6 +915,7 @@ define signext i128 @shl_i128(i128 signext %a, i128 signext %b) {
|
|||
; MMR3-NEXT: or16 $5, $3
|
||||
; MMR3-NEXT: lw $3, 12($sp) # 4-byte Folded Reload
|
||||
; MMR3-NEXT: movn $8, $7, $3
|
||||
; MMR3-NEXT: li16 $7, 0
|
||||
; MMR3-NEXT: movn $8, $5, $10
|
||||
; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload
|
||||
; MMR3-NEXT: movz $8, $3, $16
|
||||
|
|
|
@ -182,7 +182,6 @@
|
|||
; CHECK-NEXT: Machine Optimization Remark Emitter
|
||||
; CHECK-NEXT: Shrink Wrapping analysis
|
||||
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
|
||||
; CHECK-NEXT: Control Flow Optimizer
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Tail Duplication
|
||||
|
|
|
@ -11,6 +11,7 @@ define dso_local void @wibble(ptr nocapture readonly %arg, i32 signext %arg1, pt
|
|||
; CHECK-NEXT: blt 0, .LBB0_5
|
||||
; CHECK-NEXT: # %bb.1: # %bb6
|
||||
; CHECK-NEXT: clrldi 4, 4, 32
|
||||
; CHECK-NEXT: li 7, 7
|
||||
; CHECK-NEXT: addi 4, 4, -1
|
||||
; CHECK-NEXT: mtctr 4
|
||||
; CHECK-NEXT: li 4, 8
|
||||
|
|
|
@ -53,7 +53,7 @@ define signext i32 @bar() #0 {
|
|||
; AIX64-NEXT: L..BB0_1: # %for.cond
|
||||
; AIX64-NEXT: #
|
||||
; AIX64-NEXT: lwz 3, 120(1)
|
||||
; AIX64-NEXT: ld 4, L..C0(2)
|
||||
; AIX64-NEXT: ld 4, L..C0(2) # @x
|
||||
; AIX64-NEXT: lwz 4, 0(4)
|
||||
; AIX64-NEXT: cmpw 3, 4
|
||||
; AIX64-NEXT: bge 0, L..BB0_4
|
||||
|
|
|
@ -618,6 +618,7 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
|
|||
; P8-NEXT: lfs f0, .LCPI13_0@toc@l(r3)
|
||||
; P8-NEXT: lis r3, -32768
|
||||
; P8-NEXT: fcmpo cr0, f2, f3
|
||||
; P8-NEXT: xxlxor f3, f3, f3
|
||||
; P8-NEXT: fcmpo cr1, f1, f0
|
||||
; P8-NEXT: crand 4*cr5+lt, 4*cr1+eq, lt
|
||||
; P8-NEXT: crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
|
||||
|
@ -659,6 +660,7 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
|
|||
; P9-NEXT: lfs f0, .LCPI13_0@toc@l(r3)
|
||||
; P9-NEXT: fcmpo cr1, f2, f3
|
||||
; P9-NEXT: lis r3, -32768
|
||||
; P9-NEXT: xxlxor f3, f3, f3
|
||||
; P9-NEXT: fcmpo cr0, f1, f0
|
||||
; P9-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
|
||||
; P9-NEXT: crandc 4*cr5+gt, lt, eq
|
||||
|
|
|
@ -1295,6 +1295,7 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
|
|||
; PC64LE-NEXT: lfs 0, .LCPI31_0@toc@l(3)
|
||||
; PC64LE-NEXT: lis 3, -32768
|
||||
; PC64LE-NEXT: fcmpo 0, 2, 3
|
||||
; PC64LE-NEXT: xxlxor 3, 3, 3
|
||||
; PC64LE-NEXT: fcmpo 1, 1, 0
|
||||
; PC64LE-NEXT: crand 20, 6, 0
|
||||
; PC64LE-NEXT: crandc 21, 4, 6
|
||||
|
@ -1332,6 +1333,7 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
|
|||
; PC64LE9-NEXT: lfs 0, .LCPI31_0@toc@l(3)
|
||||
; PC64LE9-NEXT: fcmpo 1, 2, 3
|
||||
; PC64LE9-NEXT: lis 3, -32768
|
||||
; PC64LE9-NEXT: xxlxor 3, 3, 3
|
||||
; PC64LE9-NEXT: fcmpo 0, 1, 0
|
||||
; PC64LE9-NEXT: crand 20, 2, 4
|
||||
; PC64LE9-NEXT: crandc 21, 0, 2
|
||||
|
|
|
@ -1,327 +0,0 @@
|
|||
# RUN: llc -mtriple=s390x-linux-gnu -start-before=prologepilog %s -o - -mcpu=z14 \
|
||||
# RUN: -verify-machineinstrs 2>&1 | FileCheck %s
|
||||
# REQUIRES: asserts
|
||||
#
|
||||
# Test that redundant frame addressing anchor points are removed by
|
||||
# MachineLateInstrsCleanup.
|
||||
|
||||
--- |
|
||||
define void @fun1() { ret void }
|
||||
define void @fun2() { ret void }
|
||||
define void @fun3() { ret void }
|
||||
define void @fun4() { ret void }
|
||||
define void @fun5() { ret void }
|
||||
define void @fun6() { ret void }
|
||||
define void @fun7() { ret void }
|
||||
define void @fun8() { ret void }
|
||||
|
||||
declare i32 @foo()
|
||||
|
||||
@ptr = external dso_local local_unnamed_addr global ptr
|
||||
---
|
||||
|
||||
# Test elimination of redundant LAYs in successor blocks.
|
||||
# CHECK-LABEL: fun1:
|
||||
# CHECK: lay %r1, 4096(%r15)
|
||||
# CHECK: # %bb.1:
|
||||
# CHECK-NOT: lay
|
||||
# CHECK: .LBB0_2:
|
||||
# CHECK-NOT: lay
|
||||
---
|
||||
name: fun1
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, size: 5000 }
|
||||
- { id: 1, size: 2500 }
|
||||
- { id: 2, size: 2500 }
|
||||
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $f16d
|
||||
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
|
||||
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.1, 0, $noreg
|
||||
CHIMux undef $r0l, 3, implicit-def $cc
|
||||
BRC 14, 8, %bb.2, implicit killed $cc
|
||||
J %bb.1
|
||||
|
||||
bb.1:
|
||||
liveins: $f16d
|
||||
VST64 renamable $f16d, %stack.2, 0, $noreg
|
||||
J %bb.2
|
||||
|
||||
bb.2:
|
||||
liveins: $f16d
|
||||
VST64 renamable $f16d, %stack.1, 0, $noreg
|
||||
Return
|
||||
...
|
||||
|
||||
# In this function the LAY in bb.1 will have a different offset, so the first
|
||||
# LAY in bb.2 must remain.
|
||||
# CHECK-LABEL: fun2:
|
||||
# CHECK: lay %r1, 4096(%r15)
|
||||
# CHECK: # %bb.1:
|
||||
# CHECK: lay %r1, 8192(%r15)
|
||||
# CHECK: .LBB1_2:
|
||||
# CHECK: lay %r1, 4096(%r15)
|
||||
# CHECK-NOT: lay
|
||||
---
|
||||
name: fun2
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, size: 5000 }
|
||||
- { id: 1, size: 5000 }
|
||||
- { id: 2, size: 2500 }
|
||||
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $f16d
|
||||
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
|
||||
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.1, 0, $noreg
|
||||
CHIMux undef $r0l, 3, implicit-def $cc
|
||||
BRC 14, 8, %bb.2, implicit killed $cc
|
||||
J %bb.1
|
||||
|
||||
bb.1:
|
||||
liveins: $f16d
|
||||
VST64 renamable $f16d, %stack.2, 0, $noreg
|
||||
J %bb.2
|
||||
|
||||
bb.2:
|
||||
liveins: $f16d
|
||||
VST64 renamable $f16d, %stack.1, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.1, 0, $noreg
|
||||
Return
|
||||
...
|
||||
|
||||
# Test case with a loop (with room for improvement: since %r1 is not clobbered
|
||||
# inside the loop only the first LAY is needed).
|
||||
# CHECK-LABEL: fun3:
|
||||
# CHECK: lay %r1, 4096(%r15)
|
||||
# CHECK: .LBB2_1:
|
||||
# CHECK: lay %r1, 4096(%r15)
|
||||
# CHECK: .LBB2_2:
|
||||
# CHECK-NOT: lay %r1, 4096(%r15)
|
||||
---
|
||||
name: fun3
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, size: 5000 }
|
||||
- { id: 1, size: 2500 }
|
||||
- { id: 2, size: 2500 }
|
||||
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $f16d
|
||||
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
|
||||
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.1, 0, $noreg
|
||||
CHIMux undef $r0l, 3, implicit-def $cc
|
||||
BRC 14, 8, %bb.2, implicit killed $cc
|
||||
J %bb.1
|
||||
|
||||
bb.1:
|
||||
liveins: $f16d
|
||||
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
|
||||
|
||||
VST64 renamable $f16d, %stack.2, 0, $noreg
|
||||
CHIMux undef $r0l, 3, implicit-def $cc
|
||||
BRC 14, 8, %bb.1, implicit killed $cc
|
||||
J %bb.2
|
||||
|
||||
bb.2:
|
||||
liveins: $f16d
|
||||
VST64 renamable $f16d, %stack.1, 0, $noreg
|
||||
Return
|
||||
...
|
||||
|
||||
# Test case with a call which clobbers r1: the second LAY after the call is needed.
|
||||
# CHECK-LABEL: fun4:
|
||||
# CHECK: lay %r1, 4096(%r15)
|
||||
# CHECK: brasl
|
||||
# CHECK: lay %r1, 4096(%r15)
|
||||
---
|
||||
name: fun4
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, size: 5000 }
|
||||
- { id: 1, size: 2500 }
|
||||
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $f16d
|
||||
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.1, 0, $noreg
|
||||
ADJCALLSTACKDOWN 0, 0
|
||||
CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
|
||||
ADJCALLSTACKUP 0, 0
|
||||
$f17d = IMPLICIT_DEF
|
||||
VST64 renamable $f17d, %stack.1, 0, $noreg
|
||||
Return
|
||||
...
|
||||
|
||||
# Test case where index reg is loaded instead of using an LAY. Only one LGHI is needed.
|
||||
# CHECK-LABEL: fun5:
|
||||
# CHECK: lghi %r1, 4096
|
||||
# CHECK-NOT: lghi
|
||||
---
|
||||
name: fun5
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, size: 5000 }
|
||||
- { id: 1, size: 2500 }
|
||||
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $f16d
|
||||
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
VST64 renamable $f16d, %stack.0, 0, $noreg
|
||||
$f0q = nofpexcept LXEB %stack.1, 0, $noreg, implicit $fpc
|
||||
$f1q = nofpexcept LXEB %stack.1, 0, $noreg, implicit $fpc
|
||||
Return
|
||||
...
|
||||
|
||||
# Test where the constant is a Global. Only one LARL is needed.
|
||||
# CHECK-LABEL: fun6:
|
||||
# CHECK: larl %r1, ptr
|
||||
# CHECK-NOT: larl
|
||||
---
|
||||
name: fun6
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
tracksDebugUserValues: true
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
fixedStack:
|
||||
- { id: 0, offset: -160, size: 8, alignment: 8 }
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
|
||||
renamable $r1d = LARL @ptr
|
||||
CGHSI killed renamable $r1d, 0, 0, implicit-def $cc :: (volatile dereferenceable load (s64) from @ptr)
|
||||
BRC 14, 8, %bb.2, implicit killed $cc
|
||||
J %bb.1
|
||||
|
||||
bb.1:
|
||||
renamable $r1d = LARL @ptr
|
||||
MVGHI killed renamable $r1d, 0, 0
|
||||
|
||||
bb.2:
|
||||
Return
|
||||
|
||||
...
|
||||
|
||||
# Load of an invariant location (GOT). Only one LGRL is needed.
|
||||
# CHECK-LABEL: fun7:
|
||||
# CHECK: lgrl %r1, ptr
|
||||
# CHECK-NOT: lgrl
|
||||
---
|
||||
name: fun7
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
tracksDebugUserValues: true
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
fixedStack:
|
||||
- { id: 0, offset: -160, size: 8, alignment: 8 }
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
|
||||
renamable $r1d = LGRL @ptr :: (load (s64) from got)
|
||||
CGHSI killed renamable $r1d, 0, 0, implicit-def $cc :: (volatile dereferenceable load (s64) from @ptr)
|
||||
BRC 14, 8, %bb.2, implicit killed $cc
|
||||
J %bb.1
|
||||
|
||||
bb.1:
|
||||
renamable $r1d = LGRL @ptr :: (load (s64) from got)
|
||||
MVGHI killed renamable $r1d, 0, 0
|
||||
|
||||
bb.2:
|
||||
Return
|
||||
|
||||
...
|
||||
|
||||
# Load from constant pool. Only one LARL is needed.
|
||||
# CHECK-LABEL: fun8:
|
||||
# CHECK: larl %r1, .LCPI7_0
|
||||
# CHECK-NOT: larl
|
||||
---
|
||||
name: fun8
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
tracksDebugUserValues: true
|
||||
liveins:
|
||||
- { reg: '$f0s' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
fixedStack:
|
||||
- { id: 0, offset: -160, size: 8, alignment: 8 }
|
||||
constants:
|
||||
- id: 0
|
||||
value: float 0x43E0000000000000
|
||||
alignment: 4
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
successors: %bb.1, %bb.2
|
||||
liveins: $f0s
|
||||
|
||||
renamable $r1d = LARL %const.0
|
||||
renamable $f1s = LE killed renamable $r1d, 0, $noreg :: (load (s32) from constant-pool)
|
||||
nofpexcept CEBR renamable $f0s, renamable $f1s, implicit-def $cc, implicit $fpc
|
||||
BRC 15, 11, %bb.2, implicit killed $cc
|
||||
|
||||
bb.1:
|
||||
liveins: $f0s
|
||||
|
||||
J %bb.3
|
||||
|
||||
bb.2 (%ir-block.0):
|
||||
liveins: $f0s, $f1s
|
||||
|
||||
renamable $r1d = LARL %const.0
|
||||
renamable $f1s = LE killed renamable $r1d, 0, $noreg :: (load (s32) from constant-pool)
|
||||
|
||||
bb.3 (%ir-block.0):
|
||||
liveins: $r2d
|
||||
|
||||
Return
|
||||
|
||||
...
|
|
@ -77,9 +77,12 @@ entry:
|
|||
; CHECK-FP-ATPCS: adds r0, #8
|
||||
; CHECK-FP-ATPCS: stm r0!, {r1, r2, r3}
|
||||
; CHECK-FP-AAPCS: mov r0, r11
|
||||
; CHECK-FP-AAPCS: mov r7, r0
|
||||
; CHECK-FP-AAPCS: adds r7, #8
|
||||
; CHECK-FP-AAPCS: stm r7!, {r1, r2, r3}
|
||||
; CHECK-FP-AAPCS: str r1, [r0, #8]
|
||||
; CHECK-FP-AAPCS: mov r0, r11
|
||||
; CHECK-FP-AAPCS: str r2, [r0, #12]
|
||||
; CHECK-FP-AAPCS: mov r0, r11
|
||||
; CHECK-FP-AAPCS: str r3, [r0, #16]
|
||||
|
||||
; Re-aligned stack, access via FP
|
||||
; int test_args_realign(int a, int b, int c, int d, int e) {
|
||||
; __attribute__((aligned(16))) int v[4];
|
||||
|
@ -145,9 +148,11 @@ entry:
|
|||
; CHECK-ATPCS-NEXT: adds r0, #8
|
||||
; CHECK-ATPCS-NEXT: stm r0!, {r1, r2, r3}
|
||||
; CHECK-AAPCS: mov r0, r11
|
||||
; CHECK-AAPCS: mov r7, r0
|
||||
; CHECK-AAPCS: adds r7, #8
|
||||
; CHECK-AAPCS: stm r7!, {r1, r2, r3}
|
||||
; CHECK-AAPCS: str r1, [r0, #8]
|
||||
; CHECK-AAPCS: mov r0, r11
|
||||
; CHECK-AAPCS: str r2, [r0, #12]
|
||||
; CHECK-AAPCS: mov r0, r11
|
||||
; CHECK-AAPCS: str r3, [r0, #16]
|
||||
; VLAs present, access via FP
|
||||
; int test_args_vla(int a, int b, int c, int d, int e) {
|
||||
; int v[a];
|
||||
|
@ -303,9 +308,11 @@ entry:
|
|||
; CHECK-FP-ATPCS-NEXT: adds r0, #8
|
||||
; CHECK-FP-ATPCS-NEXT: stm r0!, {r1, r2, r3}
|
||||
; CHECK-FP-AAPCS: mov r0, r11
|
||||
; CHECK-FP-AAPCS-NEXT: mov r5, r0
|
||||
; CHECK-FP-AAPCS-NEXT: adds r5, #8
|
||||
; CHECK-FP-AAPCS-NEXT: stm r5!, {r1, r2, r3}
|
||||
; CHECK-FP-AAPCS-NEXT: str r1, [r0, #8]
|
||||
; CHECK-FP-AAPCS-NEXT: mov r0, r11
|
||||
; CHECK-FP-AAPCS-NEXT: str r2, [r0, #12]
|
||||
; CHECK-FP-AAPCS-NEXT: mov r0, r11
|
||||
; CHECK-FP-AAPCS-NEXT: str r3, [r0, #16]
|
||||
|
||||
; struct S { int x[128]; } s;
|
||||
; int test(S a, int b) {
|
||||
|
|
|
@ -1890,6 +1890,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
|
|||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: csel r5, r5, r8, gt
|
||||
; CHECK-NEXT: mov.w r8, #0
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: csel r5, r6, r5, eq
|
||||
|
@ -2151,6 +2152,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
|
|||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: csel r5, r5, r8, gt
|
||||
; CHECK-NEXT: mov.w r8, #0
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: csel r5, r6, r5, eq
|
||||
|
@ -2408,6 +2410,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
|
|||
; CHECK-NEXT: cmp.w r2, #-1
|
||||
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: csel r5, r5, r8, gt
|
||||
; CHECK-NEXT: mov.w r8, #0
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: csel r5, r6, r5, eq
|
||||
|
|
|
@ -18,6 +18,7 @@ define fastcc ptr @pushdecl(ptr %x) nounwind {
|
|||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: retl
|
||||
; CHECK-NEXT: .LBB0_1: # %bb160
|
||||
; CHECK-NEXT: movb $1, %al
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: retl
|
||||
|
|
|
@ -27,6 +27,7 @@ define i16 @SQLDriversW(ptr %henv, i16 zeroext %fDir, ptr %szDrvDesc, i16 signe
|
|||
; CHECK-NEXT: jne LBB0_6
|
||||
; CHECK-NEXT: ## %bb.4: ## %bb37
|
||||
; CHECK-NEXT: movw $0, 40(%edi)
|
||||
; CHECK-NEXT: movb $1, %al
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: leal (,%ecx,4), %ecx
|
||||
; CHECK-NEXT: leal (,%ebx,4), %edx
|
||||
|
|
|
@ -58,6 +58,7 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
|
|||
; CHECK-NEXT: tileloadd (%rax,%r14), %tmm0
|
||||
; CHECK-NEXT: movabsq $64, %rcx
|
||||
; CHECK-NEXT: tileloadd 1088(%rsp,%rcx), %tmm1 # 1024-byte Folded Reload
|
||||
; CHECK-NEXT: movabsq $64, %rcx
|
||||
; CHECK-NEXT: tileloadd 64(%rsp,%rcx), %tmm2 # 1024-byte Folded Reload
|
||||
; CHECK-NEXT: tdpbssd %tmm2, %tmm1, %tmm0
|
||||
; CHECK-NEXT: tilestored %tmm0, (%rax,%r14)
|
||||
|
|
|
@ -46,6 +46,7 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
|
|||
; CHECK-NEXT: movabsq $64, %rax
|
||||
; CHECK-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
|
||||
; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm5
|
||||
; CHECK-NEXT: movabsq $64, %rax
|
||||
; CHECK-NEXT: tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
|
@ -63,6 +64,7 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
|
|||
; CHECK-NEXT: movabsq $64, %rax
|
||||
; CHECK-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
|
||||
; CHECK-NEXT: tdpbssd %tmm3, %tmm2, %tmm5
|
||||
; CHECK-NEXT: movabsq $64, %rax
|
||||
; CHECK-NEXT: tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
|
|
|
@ -8,6 +8,7 @@ target triple = "x86_64-apple-macosx"
|
|||
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
|
||||
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
|
||||
define void @foo() #0 {
|
||||
entry:
|
||||
%_tags = alloca [3 x i32], align 4
|
||||
|
|
|
@ -338,24 +338,26 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
|
|||
; X86-SLOW-NEXT: movl %edx, %ebx
|
||||
; X86-SLOW-NEXT: movl %esi, %edx
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-SLOW-NEXT: jmp .LBB6_3
|
||||
; X86-SLOW-NEXT: .LBB6_1:
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLOW-NEXT: .LBB6_3:
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SLOW-NEXT: testb $32, %al
|
||||
; X86-SLOW-NEXT: je .LBB6_5
|
||||
; X86-SLOW-NEXT: jne .LBB6_4
|
||||
; X86-SLOW-NEXT: # %bb.5:
|
||||
; X86-SLOW-NEXT: movl %ecx, %ebx
|
||||
; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: jmp .LBB6_6
|
||||
; X86-SLOW-NEXT: .LBB6_4:
|
||||
; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: movl %ebp, %esi
|
||||
; X86-SLOW-NEXT: movl %edx, %ebp
|
||||
; X86-SLOW-NEXT: movl %ecx, %edx
|
||||
; X86-SLOW-NEXT: jmp .LBB6_6
|
||||
; X86-SLOW-NEXT: .LBB6_1:
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLOW-NEXT: testb $32, %al
|
||||
; X86-SLOW-NEXT: jne .LBB6_4
|
||||
; X86-SLOW-NEXT: .LBB6_5:
|
||||
; X86-SLOW-NEXT: movl %ecx, %ebx
|
||||
; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: .LBB6_6:
|
||||
; X86-SLOW-NEXT: movl %edx, %edi
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SLOW-NEXT: movl %eax, %ecx
|
||||
; X86-SLOW-NEXT: shll %cl, %edi
|
||||
; X86-SLOW-NEXT: shrl %ebx
|
||||
|
|
|
@ -247,6 +247,7 @@ define <4 x double> @load_v4f64_v4i32_zero(<4 x i32> %trigger, ptr %addr) {
|
|||
; SSE-NEXT: retq
|
||||
; SSE-NEXT: LBB3_1: ## %cond.load
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-NEXT: testb $2, %al
|
||||
; SSE-NEXT: je LBB3_4
|
||||
; SSE-NEXT: LBB3_3: ## %cond.load1
|
||||
|
@ -1128,6 +1129,7 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
|
|||
; SSE2-NEXT: retq
|
||||
; SSE2-NEXT: LBB10_1: ## %cond.load
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: testb $2, %al
|
||||
; SSE2-NEXT: je LBB10_4
|
||||
; SSE2-NEXT: LBB10_3: ## %cond.load1
|
||||
|
@ -1207,6 +1209,7 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
|
|||
; SSE42-NEXT: retq
|
||||
; SSE42-NEXT: LBB10_1: ## %cond.load
|
||||
; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE42-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE42-NEXT: testb $2, %al
|
||||
; SSE42-NEXT: je LBB10_4
|
||||
; SSE42-NEXT: LBB10_3: ## %cond.load1
|
||||
|
@ -2647,6 +2650,7 @@ define <8 x i32> @load_v8i32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
|
|||
; SSE2-NEXT: retq
|
||||
; SSE2-NEXT: LBB20_1: ## %cond.load
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: testb $2, %al
|
||||
; SSE2-NEXT: je LBB20_4
|
||||
; SSE2-NEXT: LBB20_3: ## %cond.load1
|
||||
|
@ -2726,6 +2730,7 @@ define <8 x i32> @load_v8i32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
|
|||
; SSE42-NEXT: retq
|
||||
; SSE42-NEXT: LBB20_1: ## %cond.load
|
||||
; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE42-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE42-NEXT: testb $2, %al
|
||||
; SSE42-NEXT: je LBB20_4
|
||||
; SSE42-NEXT: LBB20_3: ## %cond.load1
|
||||
|
|
|
@ -2231,6 +2231,7 @@ define <16 x i32> @splat_v3i32(ptr %ptr) {
|
|||
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,0,1]
|
||||
; SSE42-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE42-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE42-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -170,7 +170,6 @@
|
|||
; CHECK-NEXT: Machine Optimization Remark Emitter
|
||||
; CHECK-NEXT: Shrink Wrapping analysis
|
||||
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
|
||||
; CHECK-NEXT: Control Flow Optimizer
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Tail Duplication
|
||||
|
|
|
@ -1240,6 +1240,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
|||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: testl %ecx, %ecx
|
||||
; X86-NEXT: cmovsl %esi, %eax
|
||||
; X86-NEXT: movl $0, %esi
|
||||
; X86-NEXT: movl $-1, %ebx
|
||||
; X86-NEXT: cmovsl %ebx, %edi
|
||||
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
|
||||
|
|
|
@ -533,6 +533,7 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
|
|||
; i686-NEXT: .LBB6_9: # %entry
|
||||
; i686-NEXT: movl %edi, %esi
|
||||
; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; i686-NEXT: shrl %cl, %ebp
|
||||
; i686-NEXT: testb $32, %cl
|
||||
|
@ -845,6 +846,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
|
|||
; i686-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; i686-NEXT: movb $64, %cl
|
||||
; i686-NEXT: subb %dl, %cl
|
||||
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; i686-NEXT: shldl %cl, %ebx, %ebp
|
||||
; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
|
|
|
@ -354,6 +354,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
|
|||
; X86-NEXT: cmpw %si, %dx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: cmovnel %eax, %ebx
|
||||
; X86-NEXT: movl $65535, %eax # imm = 0xFFFF
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: shll %cl, %esi
|
||||
|
|
|
@ -111,12 +111,14 @@ define <4 x i32> @ossfuzz15662(ptr %in) {
|
|||
; X32: # %bb.0:
|
||||
; X32-NEXT: xorps %xmm0, %xmm0
|
||||
; X32-NEXT: movaps %xmm0, (%eax)
|
||||
; X32-NEXT: xorps %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: ossfuzz15662:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: movaps %xmm0, (%rax)
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%C10 = icmp ule i1 false, false
|
||||
%C3 = icmp ule i1 true, undef
|
||||
|
|
|
@ -178,12 +178,14 @@ define <4 x i32> @test17(<4 x i32> %a0, ptr %dummy) {
|
|||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-NEXT: movaps %xmm0, (%eax)
|
||||
; X86-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test17:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: movaps %xmm0, (%rdi)
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 6)
|
||||
store <4 x i32> %a, ptr %dummy
|
||||
|
@ -197,12 +199,14 @@ define <4 x i32> @test18(<4 x i32> %a0, ptr %dummy) {
|
|||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-NEXT: movaps %xmm0, (%eax)
|
||||
; X86-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test18:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: movaps %xmm0, (%rdi)
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 3)
|
||||
store <4 x i32> %a, ptr %dummy
|
||||
|
|
|
@ -87,10 +87,13 @@ declare void @g(i32*, i32*)
|
|||
; CHECK: ldaw r0, sp[0]
|
||||
; CHECK: ldw r5, cp[[[INDEX1]]]
|
||||
; CHECK: stw r1, r0[r5]
|
||||
; CHECK: ldaw r0, sp[0]
|
||||
; CHECK: ldw r1, cp[[[INDEX2]]]
|
||||
; CHECK: stw r2, r0[r1]
|
||||
; CHECK: ldaw r0, sp[0]
|
||||
; CHECK: ldw r1, cp[[[INDEX3]]]
|
||||
; CHECK: stw r3, r0[r1]
|
||||
; CHECK: ldaw r0, sp[0]
|
||||
; CHECK: ldw r1, cp[[[INDEX4]]]
|
||||
; CHECK: stw r11, r0[r1]
|
||||
; CHECK: ldaw sp, sp[65535]
|
||||
|
|
Loading…
Reference in New Issue