Revert "Reapply "[CodeGen] Add new pass for late cleanup of redundant definitions.""

This reverts commit 17db0de330.

Some more bots got broken - need to investigate.
This commit is contained in:
Jonas Paulsson 2022-12-05 00:52:00 +01:00
parent 2577cb7a20
commit 122efef8ee
58 changed files with 1258 additions and 895 deletions

View File

@ -1130,9 +1130,6 @@ void CodeGenPassBuilder<Derived>::addMachineLateOptimization(
if (!TM.requiresStructuredCFG())
addPass(TailDuplicatePass());
// Cleanup of redundant (identical) address/immediate loads.
addPass(MachineLateInstrsCleanupPass());
// Copy propagation.
addPass(MachineCopyPropagationPass());
}

View File

@ -151,7 +151,6 @@ DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ())
DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass, ())
DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ())
DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ())

View File

@ -334,10 +334,6 @@ namespace llvm {
MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr);
/// MachineLateInstrsCleanup - This pass removes redundant identical
/// instructions after register allocation and rematerialization.
extern char &MachineLateInstrsCleanupID;
/// PeepholeOptimizer - This pass performs peephole optimizations -
/// like extension and comparison eliminations.
extern char &PeepholeOptimizerID;

View File

@ -277,7 +277,6 @@ void initializeMachineDominanceFrontierPass(PassRegistry&);
void initializeMachineDominatorTreePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
void initializeMachineFunctionSplitterPass(PassRegistry &);
void initializeMachineLateInstrsCleanupPass(PassRegistry&);
void initializeMachineLICMPass(PassRegistry&);
void initializeMachineLoopInfoPass(PassRegistry&);
void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);

View File

@ -119,7 +119,6 @@ add_llvm_component_library(LLVMCodeGen
MachineFunctionSplitter.cpp
MachineInstrBundle.cpp
MachineInstr.cpp
MachineLateInstrsCleanup.cpp
MachineLICM.cpp
MachineLoopInfo.cpp
MachineLoopUtils.cpp

View File

@ -78,7 +78,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCycleInfoWrapperPassPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLateInstrsCleanupPass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoWrapperPassPass(Registry);

View File

@ -1,240 +0,0 @@
//==--- MachineLateInstrsCleanup.cpp - Late Instructions Cleanup Pass -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This simple pass removes any identical and redundant immediate or address
// loads to the same register. The immediate loads removed can originally be
// the result of rematerialization, while the addresses are redundant frame
// addressing anchor points created during Frame Indices elimination.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "machine-latecleanup"
STATISTIC(NumRemoved, "Number of redundant instructions removed.");
namespace {
class MachineLateInstrsCleanup : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
// Data structures to map regs to their definitions per MBB.
using Reg2DefMap = std::map<Register, MachineInstr*>;
std::vector<Reg2DefMap> RegDefs;
// Walk through the instructions in MBB and remove any redundant
// instructions.
bool processBlock(MachineBasicBlock *MBB);
public:
static char ID; // Pass identification, replacement for typeid
MachineLateInstrsCleanup() : MachineFunctionPass(ID) {
initializeMachineLateInstrsCleanupPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
};
} // end anonymous namespace
char MachineLateInstrsCleanup::ID = 0;
char &llvm::MachineLateInstrsCleanupID = MachineLateInstrsCleanup::ID;
INITIALIZE_PASS(MachineLateInstrsCleanup, DEBUG_TYPE,
"Machine Late Instructions Cleanup Pass", false, false)
bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
bool Changed = false;
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
RegDefs.clear();
RegDefs.resize(MF.getNumBlockIDs());
// Visit all MBBs in an order that maximises the reuse from predecessors.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
for (MachineBasicBlock *MBB : RPOT)
Changed |= processBlock(MBB);
return Changed;
}
// Clear any previous kill flag on Reg found before I in MBB. Walk backwards
// in MBB and if needed continue in predecessors until a use/def of Reg is
// encountered. This seems to be faster in practice than tracking kill flags
// in a map.
static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
BitVector &VisitedPreds,
const TargetRegisterInfo *TRI) {
VisitedPreds.set(MBB->getNumber());
while (I != MBB->begin()) {
I--;
bool Found = false;
for (auto &MO : I->operands())
if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
if (MO.isDef())
return;
if (MO.readsReg()) {
MO.setIsKill(false);
Found = true; // Keep going for an implicit kill of the super-reg.
}
}
if (Found)
return;
}
// If an earlier def is not in MBB, continue in predecessors.
if (!MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
assert(!MBB->pred_empty() && "Predecessor def not found!");
for (MachineBasicBlock *Pred : MBB->predecessors())
if (!VisitedPreds.test(Pred->getNumber()))
clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
}
static void removeRedundantDef(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
Register Reg = MI->getOperand(0).getReg();
BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
MI->eraseFromParent();
++NumRemoved;
}
// Return true if MI is a potential candidate for reuse/removal and if so
// also the register it defines in DefedReg. A candidate is a simple
// instruction that does not touch memory, has only one register definition
// and the only reg it may use is FrameReg. Typically this is an immediate
// load or a load-address instruction.
static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
Register FrameReg) {
DefedReg = MCRegister::NoRegister;
bool SawStore = true;
if (!MI->isSafeToMove(nullptr, SawStore) || MI->isImplicitDef() ||
MI->isInlineAsm())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
if (MO.isDef()) {
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
else
return false;
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
MO.isGlobal() || MO.isSymbol()))
return false;
}
return DefedReg.isValid();
}
bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool Changed = false;
Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];
// Find reusable definitions in the predecessor(s).
if (!MBB->pred_empty()) {
MachineBasicBlock *FirstPred = *MBB->pred_begin();
for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()])
if (llvm::all_of(
drop_begin(MBB->predecessors()),
[&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
return PredDefI != RegDefs[Pred->getNumber()].end() &&
DefMI->isIdenticalTo(*PredDefI->second);
})) {
MBBDefs[Reg] = DefMI;
LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
<< printMBBReference(*MBB) << ": " << *DefMI;);
}
}
// Process MBB.
MachineFunction *MF = MBB->getParent();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Register FrameReg = TRI->getFrameRegister(*MF);
for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
// If FrameReg is modified, no previous load-address instructions are valid.
if (MI.modifiesRegister(FrameReg, TRI)) {
MBBDefs.clear();
continue;
}
Register DefedReg;
bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
// Check for an earlier identical and reusable instruction.
if (IsCandidate) {
auto DefI = MBBDefs.find(DefedReg);
if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
removeRedundantDef(&MI, TRI);
Changed = true;
continue;
}
}
// Clear any entries in map that MI clobbers.
for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
Register Reg = DefI->first;
if (MI.modifiesRegister(Reg, TRI))
DefI = MBBDefs.erase(DefI);
else
++DefI;
}
// Record this MI for potential later reuse.
if (IsCandidate) {
LLVM_DEBUG(dbgs() << "Found interesting instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
MBBDefs[DefedReg] = &MI;
}
}
return Changed;
}

View File

@ -1522,9 +1522,6 @@ void TargetPassConfig::addOptimizedRegAlloc() {
/// Add passes that optimize machine instructions after register allocation.
void TargetPassConfig::addMachineLateOptimization() {
// Cleanup of redundant immediate/address loads.
addPass(&MachineLateInstrsCleanupID);
// Branch folding must be run after regalloc and prolog/epilog insertion.
addPass(&BranchFolderPassID);

View File

@ -291,7 +291,6 @@ void NVPTXPassConfig::addIRPasses() {
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineLateInstrsCleanupID);
disablePass(&MachineCopyPropagationID);
disablePass(&TailDuplicateID);
disablePass(&StackMapLivenessID);

View File

@ -286,10 +286,6 @@ void RISCVPassConfig::addPreRegAlloc() {
void RISCVPassConfig::addPostRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
addPass(createRISCVRedundantCopyEliminationPass());
// Temporarily disabled until post-RA pseudo expansion problem is fixed,
// see D123394 and D139169.
disablePass(&MachineLateInstrsCleanupID);
}
yaml::MachineFunctionInfo *

View File

@ -501,7 +501,6 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
// them.
// These functions all require the NoVRegs property.
disablePass(&MachineLateInstrsCleanupID);
disablePass(&MachineCopyPropagationID);
disablePass(&PostRAMachineSinkingID);
disablePass(&PostRASchedulerID);

View File

@ -188,7 +188,6 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication

View File

@ -29,8 +29,14 @@ define i32 @test_stack_guard_remat2() ssp {
; CHECK-NEXT: Lloh5:
; CHECK-NEXT: ldr x9, [x9]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: Lloh6:
; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE
; CHECK-NEXT: stur x9, [x29, #-8]
; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]
; CHECK-NEXT: ldur x9, [x29, #-8]
; CHECK-NEXT: Lloh8:
; CHECK-NEXT: ldr x8, [x8]
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: b.ne LBB0_2
; CHECK-NEXT: ; %bb.1: ; %entry
@ -40,6 +46,7 @@ define i32 @test_stack_guard_remat2() ssp {
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_2: ; %entry
; CHECK-NEXT: bl ___stack_chk_fail
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh6, Lloh7, Lloh8
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh1, Lloh3, Lloh5
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh2, Lloh4
entry:

View File

@ -59,23 +59,26 @@ define float @foo2(double* %x0, double* %x1) nounwind {
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: mov w1, #1
; CHECK-NEXT: mov w2, #2
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: mov w3, #3
; CHECK-NEXT: mov w4, #4
; CHECK-NEXT: mov w5, #5
; CHECK-NEXT: mov w6, #6
; CHECK-NEXT: mov w7, #7
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: mov w7, #7
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl callee2

View File

@ -157,6 +157,8 @@ define amdgpu_kernel void @kernel_caller_byval() {
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0
; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16
; FLATSCR-NEXT: s_mov_b32 s11, 0
; FLATSCR-NEXT: s_mov_b32 s10, 0
; FLATSCR-NEXT: s_mov_b32 s9, 0
@ -169,8 +171,9 @@ define amdgpu_kernel void @kernel_caller_byval() {
; FLATSCR-NEXT: s_mov_b32 s4, 0
; FLATSCR-NEXT: s_mov_b32 s3, 0
; FLATSCR-NEXT: s_mov_b32 s2, 0
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16
; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0
; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0
; FLATSCR-NEXT: s_mov_b32 s40, 0
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:24
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:32
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:40
@ -185,7 +188,6 @@ define amdgpu_kernel void @kernel_caller_byval() {
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s2 offset:112
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128
; FLATSCR-NEXT: s_mov_b32 s40, 0
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s40 offset:8
; FLATSCR-NEXT: s_mov_b32 s39, 0
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s39 offset:16

View File

@ -1354,6 +1354,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX7-NEXT: s_cbranch_execz .LBB13_2

View File

@ -537,6 +537,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; GFX803-NEXT: ;;#ASMSTART
; GFX803-NEXT: ;;#ASMEND
; GFX803-NEXT: s_mov_b32 s4, 0x40000
; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
@ -553,6 +554,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s4, 0x40000
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
@ -567,6 +569,8 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc dlc
; GFX1010-NEXT: s_waitcnt vmcnt(0)
; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-NEXT: s_mov_b32 s4, 0x20000
; GFX1010-NEXT: ;;#ASMSTART
; GFX1010-NEXT: ;;#ASMEND
; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
@ -581,6 +585,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX1100-NEXT: s_waitcnt vmcnt(0)
; GFX1100-NEXT: s_movk_i32 s0, 0x1000
; GFX1100-NEXT: scratch_store_b32 off, v0, s0 ; 4-byte Folded Spill
; GFX1100-NEXT: s_movk_i32 s0, 0x1000
; GFX1100-NEXT: ;;#ASMSTART
; GFX1100-NEXT: ;;#ASMEND
; GFX1100-NEXT: scratch_load_b32 v0, off, s0 ; 4-byte Folded Reload

View File

@ -76,10 +76,12 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1
; CHECK-NEXT: ; %bb.10: ; %bb16
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_mov_b64 s[16:17], 0
; CHECK-NEXT: s_mov_b64 s[20:21], -1
; CHECK-NEXT: s_mov_b64 s[22:23], s[10:11]
; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17]
; CHECK-NEXT: s_branch .LBB0_2
; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_mov_b64 s[22:23], -1
; CHECK-NEXT: s_mov_b64 s[20:21], 0
; CHECK-NEXT: ; implicit-def: $sgpr16_sgpr17
; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17]

File diff suppressed because it is too large Load Diff

View File

@ -374,7 +374,6 @@
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: Shrink Wrapping analysis
; GCN-O1-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; GCN-O1-NEXT: Machine Late Instructions Cleanup Pass
; GCN-O1-NEXT: Control Flow Optimizer
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-NEXT: Tail Duplication
@ -671,7 +670,6 @@
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: Shrink Wrapping analysis
; GCN-O1-OPTS-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; GCN-O1-OPTS-NEXT: Machine Late Instructions Cleanup Pass
; GCN-O1-OPTS-NEXT: Control Flow Optimizer
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: Tail Duplication
@ -970,7 +968,6 @@
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: Shrink Wrapping analysis
; GCN-O2-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; GCN-O2-NEXT: Machine Late Instructions Cleanup Pass
; GCN-O2-NEXT: Control Flow Optimizer
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O2-NEXT: Tail Duplication
@ -1282,7 +1279,6 @@
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: Shrink Wrapping analysis
; GCN-O3-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; GCN-O3-NEXT: Machine Late Instructions Cleanup Pass
; GCN-O3-NEXT: Control Flow Optimizer
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O3-NEXT: Tail Duplication

View File

@ -188,6 +188,7 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; GCN-NEXT: ; %bb.3: ; %LeafBlock1
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_cmp_eq_u32 s8, 1
; GCN-NEXT: s_mov_b64 s[4:5], -1
; GCN-NEXT: s_cbranch_scc0 .LBB1_5
; GCN-NEXT: ; %bb.4: ; %case1
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1

View File

@ -187,6 +187,8 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; SI-NEXT: s_branch .LBB3_3
; SI-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_mov_b64 s[8:9], 0
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: s_mov_b64 s[14:15], -1
; SI-NEXT: .LBB3_2: ; %Flow
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[14:15]
@ -204,6 +206,7 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; SI-NEXT: s_cbranch_vccz .LBB3_1
; SI-NEXT: ; %bb.5: ; %if.end
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_mov_b64 s[14:15], -1
; SI-NEXT: s_mov_b64 vcc, s[6:7]
; SI-NEXT: s_cbranch_vccz .LBB3_7
; SI-NEXT: ; %bb.6: ; %if.else
@ -260,6 +263,8 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; FLAT-NEXT: s_branch .LBB3_3
; FLAT-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_mov_b64 s[8:9], 0
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: s_mov_b64 s[14:15], -1
; FLAT-NEXT: .LBB3_2: ; %Flow
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15]
@ -277,6 +282,7 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; FLAT-NEXT: s_cbranch_vccz .LBB3_1
; FLAT-NEXT: ; %bb.5: ; %if.end
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_mov_b64 s[14:15], -1
; FLAT-NEXT: s_mov_b64 vcc, s[6:7]
; FLAT-NEXT: s_cbranch_vccz .LBB3_7
; FLAT-NEXT: ; %bb.6: ; %if.else

View File

@ -60,6 +60,7 @@ define amdgpu_kernel void @kernel(i32 %a, i32 addrspace(1)* %x, i32 noundef %n)
; CHECK-NEXT: s_cmp_lg_u32 s10, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_14
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: s_mov_b64 s[2:3], 0
; CHECK-NEXT: s_mov_b64 s[0:1], -1
; CHECK-NEXT: .LBB0_4: ; %Flow3
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec
@ -102,6 +103,7 @@ define amdgpu_kernel void @kernel(i32 %a, i32 addrspace(1)* %x, i32 noundef %n)
; CHECK-NEXT: s_branch .LBB0_10
; CHECK-NEXT: .LBB0_14: ; %cond.false.i8
; CHECK-NEXT: s_mov_b64 s[2:3], -1
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_trap 2
; CHECK-NEXT: s_branch .LBB0_4
entry:

View File

@ -140,6 +140,7 @@ define void @my_func(i32 %0) {
; GCN-NEXT: s_cbranch_scc1 .LBB0_10
; GCN-NEXT: ; %bb.9:
; GCN-NEXT: s_mov_b64 s[6:7], -1
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
; GCN-NEXT: s_cbranch_execnz .LBB0_3
; GCN-NEXT: s_branch .LBB0_4
@ -172,6 +173,7 @@ define void @my_func(i32 %0) {
; GCN-NEXT: ; %bb.15: ; %LeafBlock9
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
; GCN-NEXT: s_mov_b64 s[8:9], -1
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc
; GCN-NEXT: ; %bb.16: ; %do.body.i.i.i.i
; GCN-NEXT: s_mov_b64 s[4:5], exec

View File

@ -34,6 +34,7 @@ define amdgpu_kernel void @test_inst_offset_kernel() {
; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_movk_i32 s0, 0xffc
; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8
@ -70,6 +71,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() {
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: s_mov_b32 s4, 0x40000
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
@ -88,6 +90,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() {
; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_movk_i32 s0, 0x1000
; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8
@ -234,6 +237,7 @@ define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 {
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_movk_i32 s8, 0x1004
; FLATSCR-NEXT: scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
@ -316,6 +320,7 @@ define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() {
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_movk_i32 s0, 0xff8
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
@ -362,6 +367,7 @@ define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_mov_b32 s4, 0x3ff00
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_nop 0
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
@ -385,6 +391,7 @@ define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_movk_i32 s0, 0xffc
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART

View File

@ -10551,6 +10551,7 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: ;;#ASMSTART
; GFX6-NEXT: ;;#ASMEND
; GFX6-NEXT: s_mov_b32 s2, 0x84800
; GFX6-NEXT: buffer_load_dword v17, off, s[40:43], s2 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
@ -10795,7 +10796,7 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s0 ; 16-byte Folded Spill
; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 ; 16-byte Folded Spill
; GFX9-FLATSCR-NEXT: s_nop 0
; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100
; GFX9-FLATSCR-NEXT: ;;#ASMSTART
; GFX9-FLATSCR-NEXT: ;;#ASMEND
; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[8:11], off, s0 ; 16-byte Folded Reload
@ -11031,6 +11032,7 @@ define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v60
; GFX10-FLATSCR-NEXT: ;;#ASMSTART
; GFX10-FLATSCR-NEXT: ;;#ASMEND
; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v65
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v66
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v67

View File

@ -149,7 +149,6 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication

View File

@ -1652,6 +1652,7 @@ define void @infiniteloop3() "frame-pointer"="all" {
; THUMB-ENABLE-NEXT: movs r0, #0
; THUMB-ENABLE-NEXT: cbnz r0, LBB11_5
; THUMB-ENABLE-NEXT: @ %bb.1: @ %loop2a.preheader
; THUMB-ENABLE-NEXT: movs r0, #0
; THUMB-ENABLE-NEXT: movs r1, #0
; THUMB-ENABLE-NEXT: mov r2, r0
; THUMB-ENABLE-NEXT: b LBB11_3
@ -1678,6 +1679,7 @@ define void @infiniteloop3() "frame-pointer"="all" {
; THUMB-DISABLE-NEXT: movs r0, #0
; THUMB-DISABLE-NEXT: cbnz r0, LBB11_5
; THUMB-DISABLE-NEXT: @ %bb.1: @ %loop2a.preheader
; THUMB-DISABLE-NEXT: movs r0, #0
; THUMB-DISABLE-NEXT: movs r1, #0
; THUMB-DISABLE-NEXT: mov r2, r0
; THUMB-DISABLE-NEXT: b LBB11_3

View File

@ -3764,6 +3764,7 @@ define i64 @stest_f32i64_mm(float %x) {
; SOFT-NEXT: @ %bb.18: @ %entry
; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: .LBB48_19: @ %entry
; SOFT-NEXT: ldr r0, .LCPI48_0
; SOFT-NEXT: cmp r4, r0
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: beq .LBB48_21
@ -4346,6 +4347,7 @@ define i64 @stest_f16i64_mm(half %x) {
; SOFT-NEXT: @ %bb.18: @ %entry
; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: .LBB51_19: @ %entry
; SOFT-NEXT: ldr r0, .LCPI51_0
; SOFT-NEXT: cmp r4, r0
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: beq .LBB51_21

View File

@ -22,7 +22,7 @@ entry:
; for.body -> for.cond.backedge (100%)
; -> cond.false.i (0%)
; CHECK: bb.1.for.body:
; CHECK: successors: %bb.2(0x80000000), %bb.5(0x00000000)
; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000000)
for.body:
br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1

View File

@ -1,6 +1,6 @@
; RUN: llc -mtriple=armv7-apple-ios8.0 -o - %s | FileCheck %s
%BigInt = type i8500
%BigInt = type i5500
define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
; CHECK-LABEL: test_moved_jumptable:

View File

@ -283,6 +283,7 @@ define arm_aapcs_vfpcc i32 @t10(float %x) nounwind {
; CHECK-NEXT: vst1.32 {d17[1]}, [r0:32]
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movne r0, #0
; CHECK-NEXT: bxne lr
; CHECK-NEXT: LBB9_1:
; CHECK-NEXT: trap

View File

@ -14,8 +14,9 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
; <label>:4: ; preds = %2
br label %5
; CHECK: if r4 s>= r3 goto +10 <LBB0_2>
; CHECK-LABEL: <LBB0_1>:
; CHECK: if r4 s>= r3 goto +11 <LBB0_3>
; CHECK: r0 = 0
; CHECK-LABEL: <LBB0_2>:
; <label>:5: ; preds = %4, %5
%6 = phi i32 [ %9, %5 ], [ 0, %4 ]
@ -27,12 +28,12 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
%12 = icmp slt i32 %10, %11
br i1 %12, label %5, label %13
; CHECK: r1 = r3
; CHECK: if r2 s> r3 goto -10 <LBB0_1>
; CHECK: if r2 s> r3 goto -10 <LBB0_2>
; <label>:13: ; preds = %5, %2
%14 = phi i32 [ 0, %2 ], [ %9, %5 ]
ret i32 %14
; CHECK-LABEL: <LBB0_2>:
; CHECK-LABEL: <LBB0_3>:
; CHECK: exit
}
attributes #0 = { norecurse nounwind readnone }

View File

@ -841,6 +841,7 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) {
; MMR3-NEXT: or16 $6, $4
; MMR3-NEXT: lw $4, 8($sp) # 4-byte Folded Reload
; MMR3-NEXT: movn $1, $7, $4
; MMR3-NEXT: li16 $7, 0
; MMR3-NEXT: movn $1, $6, $10
; MMR3-NEXT: lw $4, 24($sp) # 4-byte Folded Reload
; MMR3-NEXT: movz $1, $4, $16

View File

@ -915,6 +915,7 @@ define signext i128 @shl_i128(i128 signext %a, i128 signext %b) {
; MMR3-NEXT: or16 $5, $3
; MMR3-NEXT: lw $3, 12($sp) # 4-byte Folded Reload
; MMR3-NEXT: movn $8, $7, $3
; MMR3-NEXT: li16 $7, 0
; MMR3-NEXT: movn $8, $5, $10
; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload
; MMR3-NEXT: movz $8, $3, $16

View File

@ -182,7 +182,6 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication

View File

@ -11,6 +11,7 @@ define dso_local void @wibble(ptr nocapture readonly %arg, i32 signext %arg1, pt
; CHECK-NEXT: blt 0, .LBB0_5
; CHECK-NEXT: # %bb.1: # %bb6
; CHECK-NEXT: clrldi 4, 4, 32
; CHECK-NEXT: li 7, 7
; CHECK-NEXT: addi 4, 4, -1
; CHECK-NEXT: mtctr 4
; CHECK-NEXT: li 4, 8

View File

@ -53,7 +53,7 @@ define signext i32 @bar() #0 {
; AIX64-NEXT: L..BB0_1: # %for.cond
; AIX64-NEXT: #
; AIX64-NEXT: lwz 3, 120(1)
; AIX64-NEXT: ld 4, L..C0(2)
; AIX64-NEXT: ld 4, L..C0(2) # @x
; AIX64-NEXT: lwz 4, 0(4)
; AIX64-NEXT: cmpw 3, 4
; AIX64-NEXT: bge 0, L..BB0_4

View File

@ -618,6 +618,7 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
; P8-NEXT: lfs f0, .LCPI13_0@toc@l(r3)
; P8-NEXT: lis r3, -32768
; P8-NEXT: fcmpo cr0, f2, f3
; P8-NEXT: xxlxor f3, f3, f3
; P8-NEXT: fcmpo cr1, f1, f0
; P8-NEXT: crand 4*cr5+lt, 4*cr1+eq, lt
; P8-NEXT: crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
@ -659,6 +660,7 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
; P9-NEXT: lfs f0, .LCPI13_0@toc@l(r3)
; P9-NEXT: fcmpo cr1, f2, f3
; P9-NEXT: lis r3, -32768
; P9-NEXT: xxlxor f3, f3, f3
; P9-NEXT: fcmpo cr0, f1, f0
; P9-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
; P9-NEXT: crandc 4*cr5+gt, lt, eq

View File

@ -1295,6 +1295,7 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
; PC64LE-NEXT: lfs 0, .LCPI31_0@toc@l(3)
; PC64LE-NEXT: lis 3, -32768
; PC64LE-NEXT: fcmpo 0, 2, 3
; PC64LE-NEXT: xxlxor 3, 3, 3
; PC64LE-NEXT: fcmpo 1, 1, 0
; PC64LE-NEXT: crand 20, 6, 0
; PC64LE-NEXT: crandc 21, 4, 6
@ -1332,6 +1333,7 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
; PC64LE9-NEXT: lfs 0, .LCPI31_0@toc@l(3)
; PC64LE9-NEXT: fcmpo 1, 2, 3
; PC64LE9-NEXT: lis 3, -32768
; PC64LE9-NEXT: xxlxor 3, 3, 3
; PC64LE9-NEXT: fcmpo 0, 1, 0
; PC64LE9-NEXT: crand 20, 2, 4
; PC64LE9-NEXT: crandc 21, 0, 2

View File

@ -1,327 +0,0 @@
# RUN: llc -mtriple=s390x-linux-gnu -start-before=prologepilog %s -o - -mcpu=z14 \
# RUN: -verify-machineinstrs 2>&1 | FileCheck %s
# REQUIRES: asserts
#
# Test that redundant frame addressing anchor points are removed by
# MachineLateInstrsCleanup.
--- |
define void @fun1() { ret void }
define void @fun2() { ret void }
define void @fun3() { ret void }
define void @fun4() { ret void }
define void @fun5() { ret void }
define void @fun6() { ret void }
define void @fun7() { ret void }
define void @fun8() { ret void }
declare i32 @foo()
@ptr = external dso_local local_unnamed_addr global ptr
---
# Test elimination of redundant LAYs in successor blocks.
# CHECK-LABEL: fun1:
# CHECK: lay %r1, 4096(%r15)
# CHECK: # %bb.1:
# CHECK-NOT: lay
# CHECK: .LBB0_2:
# CHECK-NOT: lay
---
name: fun1
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 2500 }
- { id: 2, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
CHIMux undef $r0l, 3, implicit-def $cc
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
liveins: $f16d
VST64 renamable $f16d, %stack.2, 0, $noreg
J %bb.2
bb.2:
liveins: $f16d
VST64 renamable $f16d, %stack.1, 0, $noreg
Return
...
# In this function the LAY in bb.1 will have a different offset, so the first
# LAY in bb.2 must remain.
# CHECK-LABEL: fun2:
# CHECK: lay %r1, 4096(%r15)
# CHECK: # %bb.1:
# CHECK: lay %r1, 8192(%r15)
# CHECK: .LBB1_2:
# CHECK: lay %r1, 4096(%r15)
# CHECK-NOT: lay
---
name: fun2
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 5000 }
- { id: 2, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
CHIMux undef $r0l, 3, implicit-def $cc
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
liveins: $f16d
VST64 renamable $f16d, %stack.2, 0, $noreg
J %bb.2
bb.2:
liveins: $f16d
VST64 renamable $f16d, %stack.1, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
Return
...
# Test case with a loop (with room for improvement: since %r1 is not clobbered
# inside the loop only the first LAY is needed).
# CHECK-LABEL: fun3:
# CHECK: lay %r1, 4096(%r15)
# CHECK: .LBB2_1:
# CHECK: lay %r1, 4096(%r15)
# CHECK: .LBB2_2:
# CHECK-NOT: lay %r1, 4096(%r15)
---
name: fun3
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 2500 }
- { id: 2, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
CHIMux undef $r0l, 3, implicit-def $cc
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
liveins: $f16d
successors: %bb.2(0x00000001), %bb.1(0x7fffffff)
VST64 renamable $f16d, %stack.2, 0, $noreg
CHIMux undef $r0l, 3, implicit-def $cc
BRC 14, 8, %bb.1, implicit killed $cc
J %bb.2
bb.2:
liveins: $f16d
VST64 renamable $f16d, %stack.1, 0, $noreg
Return
...
# Test case with a call which clobbers r1: the second LAY after the call is needed.
# CHECK-LABEL: fun4:
# CHECK: lay %r1, 4096(%r15)
# CHECK: brasl
# CHECK: lay %r1, 4096(%r15)
---
name: fun4
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
ADJCALLSTACKDOWN 0, 0
CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
ADJCALLSTACKUP 0, 0
$f17d = IMPLICIT_DEF
VST64 renamable $f17d, %stack.1, 0, $noreg
Return
...
# Test case where index reg is loaded instead of using an LAY. Only one LGHI is needed.
# CHECK-LABEL: fun5:
# CHECK: lghi %r1, 4096
# CHECK-NOT: lghi
---
name: fun5
tracksRegLiveness: true
stack:
- { id: 0, size: 5000 }
- { id: 1, size: 2500 }
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $f16d
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
$f0q = nofpexcept LXEB %stack.1, 0, $noreg, implicit $fpc
$f1q = nofpexcept LXEB %stack.1, 0, $noreg, implicit $fpc
Return
...
# Test where the constant is a Global. Only one LARL is needed.
# CHECK-LABEL: fun6:
# CHECK: larl %r1, ptr
# CHECK-NOT: larl
---
name: fun6
alignment: 16
tracksRegLiveness: true
tracksDebugUserValues: true
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
fixedStack:
- { id: 0, offset: -160, size: 8, alignment: 8 }
machineFunctionInfo: {}
body: |
bb.0:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
renamable $r1d = LARL @ptr
CGHSI killed renamable $r1d, 0, 0, implicit-def $cc :: (volatile dereferenceable load (s64) from @ptr)
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
renamable $r1d = LARL @ptr
MVGHI killed renamable $r1d, 0, 0
bb.2:
Return
...
# Load of an invariant location (GOT). Only one LGRL is needed.
# CHECK-LABEL: fun7:
# CHECK: lgrl %r1, ptr
# CHECK-NOT: lgrl
---
name: fun7
alignment: 16
tracksRegLiveness: true
tracksDebugUserValues: true
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
fixedStack:
- { id: 0, offset: -160, size: 8, alignment: 8 }
machineFunctionInfo: {}
body: |
bb.0:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
renamable $r1d = LGRL @ptr :: (load (s64) from got)
CGHSI killed renamable $r1d, 0, 0, implicit-def $cc :: (volatile dereferenceable load (s64) from @ptr)
BRC 14, 8, %bb.2, implicit killed $cc
J %bb.1
bb.1:
renamable $r1d = LGRL @ptr :: (load (s64) from got)
MVGHI killed renamable $r1d, 0, 0
bb.2:
Return
...
# Load from constant pool. Only one LARL is needed.
# CHECK-LABEL: fun8:
# CHECK: larl %r1, .LCPI7_0
# CHECK-NOT: larl
---
name: fun8
alignment: 16
tracksRegLiveness: true
tracksDebugUserValues: true
liveins:
- { reg: '$f0s' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
fixedStack:
- { id: 0, offset: -160, size: 8, alignment: 8 }
constants:
- id: 0
value: float 0x43E0000000000000
alignment: 4
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
successors: %bb.1, %bb.2
liveins: $f0s
renamable $r1d = LARL %const.0
renamable $f1s = LE killed renamable $r1d, 0, $noreg :: (load (s32) from constant-pool)
nofpexcept CEBR renamable $f0s, renamable $f1s, implicit-def $cc, implicit $fpc
BRC 15, 11, %bb.2, implicit killed $cc
bb.1:
liveins: $f0s
J %bb.3
bb.2 (%ir-block.0):
liveins: $f0s, $f1s
renamable $r1d = LARL %const.0
renamable $f1s = LE killed renamable $r1d, 0, $noreg :: (load (s32) from constant-pool)
bb.3 (%ir-block.0):
liveins: $r2d
Return
...

View File

@ -77,9 +77,12 @@ entry:
; CHECK-FP-ATPCS: adds r0, #8
; CHECK-FP-ATPCS: stm r0!, {r1, r2, r3}
; CHECK-FP-AAPCS: mov r0, r11
; CHECK-FP-AAPCS: mov r7, r0
; CHECK-FP-AAPCS: adds r7, #8
; CHECK-FP-AAPCS: stm r7!, {r1, r2, r3}
; CHECK-FP-AAPCS: str r1, [r0, #8]
; CHECK-FP-AAPCS: mov r0, r11
; CHECK-FP-AAPCS: str r2, [r0, #12]
; CHECK-FP-AAPCS: mov r0, r11
; CHECK-FP-AAPCS: str r3, [r0, #16]
; Re-aligned stack, access via FP
; int test_args_realign(int a, int b, int c, int d, int e) {
; __attribute__((aligned(16))) int v[4];
@ -145,9 +148,11 @@ entry:
; CHECK-ATPCS-NEXT: adds r0, #8
; CHECK-ATPCS-NEXT: stm r0!, {r1, r2, r3}
; CHECK-AAPCS: mov r0, r11
; CHECK-AAPCS: mov r7, r0
; CHECK-AAPCS: adds r7, #8
; CHECK-AAPCS: stm r7!, {r1, r2, r3}
; CHECK-AAPCS: str r1, [r0, #8]
; CHECK-AAPCS: mov r0, r11
; CHECK-AAPCS: str r2, [r0, #12]
; CHECK-AAPCS: mov r0, r11
; CHECK-AAPCS: str r3, [r0, #16]
; VLAs present, access via FP
; int test_args_vla(int a, int b, int c, int d, int e) {
; int v[a];
@ -303,9 +308,11 @@ entry:
; CHECK-FP-ATPCS-NEXT: adds r0, #8
; CHECK-FP-ATPCS-NEXT: stm r0!, {r1, r2, r3}
; CHECK-FP-AAPCS: mov r0, r11
; CHECK-FP-AAPCS-NEXT: mov r5, r0
; CHECK-FP-AAPCS-NEXT: adds r5, #8
; CHECK-FP-AAPCS-NEXT: stm r5!, {r1, r2, r3}
; CHECK-FP-AAPCS-NEXT: str r1, [r0, #8]
; CHECK-FP-AAPCS-NEXT: mov r0, r11
; CHECK-FP-AAPCS-NEXT: str r2, [r0, #12]
; CHECK-FP-AAPCS-NEXT: mov r0, r11
; CHECK-FP-AAPCS-NEXT: str r3, [r0, #16]
; struct S { int x[128]; } s;
; int test(S a, int b) {

View File

@ -1890,6 +1890,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: csel r5, r5, r8, gt
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: csel r5, r6, r5, eq
@ -2151,6 +2152,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: csel r5, r5, r8, gt
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: csel r5, r6, r5, eq
@ -2408,6 +2410,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: csel r5, r5, r8, gt
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: csel r5, r6, r5, eq

View File

@ -18,6 +18,7 @@ define fastcc ptr @pushdecl(ptr %x) nounwind {
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retl
; CHECK-NEXT: .LBB0_1: # %bb160
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retl

View File

@ -27,6 +27,7 @@ define i16 @SQLDriversW(ptr %henv, i16 zeroext %fDir, ptr %szDrvDesc, i16 signe
; CHECK-NEXT: jne LBB0_6
; CHECK-NEXT: ## %bb.4: ## %bb37
; CHECK-NEXT: movw $0, 40(%edi)
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: leal (,%ecx,4), %ecx
; CHECK-NEXT: leal (,%ebx,4), %edx

View File

@ -58,6 +58,7 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: tileloadd (%rax,%r14), %tmm0
; CHECK-NEXT: movabsq $64, %rcx
; CHECK-NEXT: tileloadd 1088(%rsp,%rcx), %tmm1 # 1024-byte Folded Reload
; CHECK-NEXT: movabsq $64, %rcx
; CHECK-NEXT: tileloadd 64(%rsp,%rcx), %tmm2 # 1024-byte Folded Reload
; CHECK-NEXT: tdpbssd %tmm2, %tmm1, %tmm0
; CHECK-NEXT: tilestored %tmm0, (%rax,%r14)

View File

@ -46,6 +46,7 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm5
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vzeroupper
@ -63,6 +64,7 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 1088(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: tdpbssd %tmm3, %tmm2, %tmm5
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm5, 64(%rsp,%rax) # 1024-byte Folded Spill
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vzeroupper

View File

@ -8,6 +8,7 @@ target triple = "x86_64-apple-macosx"
; CHECK-LABEL: foo:
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
define void @foo() #0 {
entry:
%_tags = alloca [3 x i32], align 4

View File

@ -338,24 +338,26 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: movl %edx, %ebx
; X86-SLOW-NEXT: movl %esi, %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: .LBB6_3:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: je .LBB6_5
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl %ebp, %esi
; X86-SLOW-NEXT: movl %edx, %ebp
; X86-SLOW-NEXT: movl %ecx, %edx
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: .LBB6_5:
; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: .LBB6_6:
; X86-SLOW-NEXT: movl %edx, %edi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %edi
; X86-SLOW-NEXT: shrl %ebx

View File

@ -247,6 +247,7 @@ define <4 x double> @load_v4f64_v4i32_zero(<4 x i32> %trigger, ptr %addr) {
; SSE-NEXT: retq
; SSE-NEXT: LBB3_1: ## %cond.load
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: testb $2, %al
; SSE-NEXT: je LBB3_4
; SSE-NEXT: LBB3_3: ## %cond.load1
@ -1128,6 +1129,7 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
; SSE2-NEXT: retq
; SSE2-NEXT: LBB10_1: ## %cond.load
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: testb $2, %al
; SSE2-NEXT: je LBB10_4
; SSE2-NEXT: LBB10_3: ## %cond.load1
@ -1207,6 +1209,7 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
; SSE42-NEXT: retq
; SSE42-NEXT: LBB10_1: ## %cond.load
; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE42-NEXT: xorps %xmm1, %xmm1
; SSE42-NEXT: testb $2, %al
; SSE42-NEXT: je LBB10_4
; SSE42-NEXT: LBB10_3: ## %cond.load1
@ -2647,6 +2650,7 @@ define <8 x i32> @load_v8i32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
; SSE2-NEXT: retq
; SSE2-NEXT: LBB20_1: ## %cond.load
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: testb $2, %al
; SSE2-NEXT: je LBB20_4
; SSE2-NEXT: LBB20_3: ## %cond.load1
@ -2726,6 +2730,7 @@ define <8 x i32> @load_v8i32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
; SSE42-NEXT: retq
; SSE42-NEXT: LBB20_1: ## %cond.load
; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE42-NEXT: pxor %xmm1, %xmm1
; SSE42-NEXT: testb $2, %al
; SSE42-NEXT: je LBB20_4
; SSE42-NEXT: LBB20_3: ## %cond.load1

View File

@ -2231,6 +2231,7 @@ define <16 x i32> @splat_v3i32(ptr %ptr) {
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,0,1]
; SSE42-NEXT: pxor %xmm1, %xmm1
; SSE42-NEXT: xorps %xmm3, %xmm3
; SSE42-NEXT: retq
;

View File

@ -170,7 +170,6 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication

View File

@ -1240,6 +1240,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: cmovsl %esi, %eax
; X86-NEXT: movl $0, %esi
; X86-NEXT: movl $-1, %ebx
; X86-NEXT: cmovsl %ebx, %edi
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload

View File

@ -533,6 +533,7 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
; i686-NEXT: .LBB6_9: # %entry
; i686-NEXT: movl %edi, %esi
; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; i686-NEXT: shrl %cl, %ebp
; i686-NEXT: testb $32, %cl
@ -845,6 +846,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
; i686-NEXT: movl {{[0-9]+}}(%esp), %esi
; i686-NEXT: movb $64, %cl
; i686-NEXT: subb %dl, %cl
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx
; i686-NEXT: shldl %cl, %ebx, %ebp
; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill

View File

@ -354,6 +354,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; X86-NEXT: cmpw %si, %dx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: cmovnel %eax, %ebx
; X86-NEXT: movl $65535, %eax # imm = 0xFFFF
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shll %cl, %esi

View File

@ -111,12 +111,14 @@ define <4 x i32> @ossfuzz15662(ptr %in) {
; X32: # %bb.0:
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: movaps %xmm0, (%eax)
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: ossfuzz15662:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, (%rax)
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%C10 = icmp ule i1 false, false
%C3 = icmp ule i1 true, undef

View File

@ -178,12 +178,14 @@ define <4 x i32> @test17(<4 x i32> %a0, ptr %dummy) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, (%eax)
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test17:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 6)
store <4 x i32> %a, ptr %dummy
@ -197,12 +199,14 @@ define <4 x i32> @test18(<4 x i32> %a0, ptr %dummy) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, (%eax)
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test18:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 3)
store <4 x i32> %a, ptr %dummy

View File

@ -87,10 +87,13 @@ declare void @g(i32*, i32*)
; CHECK: ldaw r0, sp[0]
; CHECK: ldw r5, cp[[[INDEX1]]]
; CHECK: stw r1, r0[r5]
; CHECK: ldaw r0, sp[0]
; CHECK: ldw r1, cp[[[INDEX2]]]
; CHECK: stw r2, r0[r1]
; CHECK: ldaw r0, sp[0]
; CHECK: ldw r1, cp[[[INDEX3]]]
; CHECK: stw r3, r0[r1]
; CHECK: ldaw r0, sp[0]
; CHECK: ldw r1, cp[[[INDEX4]]]
; CHECK: stw r11, r0[r1]
; CHECK: ldaw sp, sp[65535]