[AMDGPU] gfx11 subtarget features & early tests
Tablegen definitions for subtarget features and cpp predicate functions to access the features. New Sub-TargetProcessors and common latencies. Simple changes to MIR codegen tests which pass on gfx11 because they have the same output as previous subtargets or operate on pseudo instructions which are reused from previous subtargets. Contributors: Jay Foad <jay.foad@amd.com> Petar Avramovic <Petar.Avramovic@amd.com> Patch 4/N for upstreaming of AMDGPU gfx11 architecture Depends on D124538 Reviewed By: Petar.Avramovic, foad Differential Revision: https://reviews.llvm.org/D125261
This commit is contained in:
parent
b049eb1fec
commit
18ed279a3a
|
@ -325,6 +325,12 @@ def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
|
|||
"Additional instructions for GFX10+"
|
||||
>;
|
||||
|
||||
def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts",
|
||||
"GFX11Insts",
|
||||
"true",
|
||||
"Additional instructions for GFX11+"
|
||||
>;
|
||||
|
||||
def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts",
|
||||
"GFX10_3Insts",
|
||||
"true",
|
||||
|
@ -355,6 +361,12 @@ def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
|
|||
"Has i16/f16 instructions"
|
||||
>;
|
||||
|
||||
def FeatureTrue16BitInsts : SubtargetFeature<"true16",
|
||||
"HasTrue16BitInsts",
|
||||
"true",
|
||||
"True 16-bit operand instructions"
|
||||
>;
|
||||
|
||||
def FeatureVOP3P : SubtargetFeature<"vop3p",
|
||||
"HasVOP3PInsts",
|
||||
"true",
|
||||
|
@ -554,6 +566,13 @@ def FeatureDot7Insts : SubtargetFeature<"dot7-insts",
|
|||
"Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
|
||||
>;
|
||||
|
||||
def FeatureDot8Insts : SubtargetFeature<"dot8-insts",
|
||||
"HasDot8Insts",
|
||||
"true",
|
||||
"Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16, "
|
||||
"v_dot4_i32_iu8, v_dot8_i32_iu4 instructions"
|
||||
>;
|
||||
|
||||
def FeatureMAIInsts : SubtargetFeature<"mai-insts",
|
||||
"HasMAIInsts",
|
||||
"true",
|
||||
|
@ -667,6 +686,12 @@ class SubtargetFeatureNSAMaxSize <int Value> : SubtargetFeature <
|
|||
def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>;
|
||||
def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>;
|
||||
|
||||
def FeatureVOPD : SubtargetFeature<"vopd",
|
||||
"HasVOPDInsts",
|
||||
"true",
|
||||
"Has VOPD dual issue wave32 instructions"
|
||||
>;
|
||||
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
|
@ -864,6 +889,25 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
|
|||
]
|
||||
>;
|
||||
|
||||
def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
|
||||
"gfx11",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
|
||||
FeatureFlatAddressSpace, Feature16BitInsts,
|
||||
FeatureInv2PiInlineImm, FeatureApertureRegs,
|
||||
FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts,
|
||||
FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts,
|
||||
FeatureGFX11Insts, FeatureVOP3P, FeatureVOPD, FeatureTrue16BitInsts,
|
||||
FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
|
||||
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
|
||||
FeatureAddNoCarryInsts, FeatureFmaMixInsts,
|
||||
FeatureNoSdstCMPX, FeatureVscnt,
|
||||
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
|
||||
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
|
||||
FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
|
||||
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
|
||||
]
|
||||
>;
|
||||
|
||||
class FeatureSet<list<SubtargetFeature> Features_> {
|
||||
list<SubtargetFeature> Features = Features_;
|
||||
}
|
||||
|
@ -1197,6 +1241,33 @@ def FeatureISAVersion10_3_0 : FeatureSet<
|
|||
FeatureWavefrontSize32,
|
||||
FeatureShaderCyclesRegister]>;
|
||||
|
||||
def FeatureISAVersion11_Common : FeatureSet<
|
||||
[FeatureGFX11,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureDot5Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureDot8Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureNSAMaxSize5,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureShaderCyclesRegister,
|
||||
FeatureArchitectedFlatScratch,
|
||||
FeatureAtomicFaddRtnInsts,
|
||||
FeatureAtomicFaddNoRtnInsts,
|
||||
FeatureImageInsts,
|
||||
FeaturePackedTID,
|
||||
FeatureVcmpxPermlaneHazard]>;
|
||||
|
||||
// Features for GFX 11.0.0 and 11.0.1
|
||||
def FeatureISAVersion11_0 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_Common.Features,
|
||||
[])>;
|
||||
|
||||
def FeatureISAVersion11_0_2 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_Common.Features,
|
||||
[])>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {
|
||||
|
@ -1249,7 +1320,6 @@ def SDWA9AsmParserVariant : AsmParserVariant {
|
|||
let Name = AMDGPUAsmVariants.SDWA9;
|
||||
}
|
||||
|
||||
|
||||
def DPPAsmParserVariant : AsmParserVariant {
|
||||
let Variant = AMDGPUAsmVariants.DPP_ID;
|
||||
let Name = AMDGPUAsmVariants.DPP;
|
||||
|
@ -1289,6 +1359,12 @@ def isGFX6GFX7GFX10 :
|
|||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX11Insts))>;
|
||||
|
||||
def isGFX6GFX7GFX10Plus :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>;
|
||||
|
||||
def isGFX7Only :
|
||||
|
@ -1298,6 +1374,12 @@ def isGFX7Only :
|
|||
def isGFX7GFX10 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX11Insts))>;
|
||||
|
||||
def isGFX7GFX10GFX11 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">,
|
||||
AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>;
|
||||
|
||||
def isGFX7GFX8GFX9 :
|
||||
|
@ -1321,6 +1403,21 @@ def isGFX6GFX7GFX8GFX9NotGFX90A :
|
|||
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
|
||||
AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>;
|
||||
|
||||
def isGFX6GFX7GFX8GFX9GFX10 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of (not FeatureGFX11Insts))>;
|
||||
|
||||
def isGFX7GFX8GFX9GFX10 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>;
|
||||
|
||||
def isGFX7Plus :
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
|
||||
AssemblerPredicate<(all_of FeatureCIInsts)>;
|
||||
|
@ -1371,6 +1468,11 @@ def isGFX940Plus :
|
|||
Predicate<"Subtarget->hasGFX940Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX940Insts)>;
|
||||
|
||||
def isGFX940GFX11Plus :
|
||||
Predicate<"Subtarget->hasGFX940Insts() ||"
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">,
|
||||
AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>;
|
||||
|
||||
def isGFX8GFX9NotGFX940 :
|
||||
Predicate<"!Subtarget->hasGFX940Insts() &&"
|
||||
"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
|
||||
|
@ -1382,6 +1484,10 @@ def isGFX8GFX9 :
|
|||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
|
||||
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>;
|
||||
|
||||
def isGFX10Only :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX11Insts))>;
|
||||
|
||||
def isGFX10Plus :
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of FeatureGFX10Insts)>;
|
||||
|
@ -1391,6 +1497,25 @@ def isGFX10Before1030 :
|
|||
"!Subtarget->hasGFX10_3Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>;
|
||||
|
||||
def isGFX9GFX10 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX11Insts))>;
|
||||
|
||||
def isGFX8GFX9GFX10 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX11Insts))>;
|
||||
|
||||
def isGFX11Only :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">,
|
||||
AssemblerPredicate<(all_of FeatureGFX11Insts)>;
|
||||
|
||||
def isGFX11Plus :
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">,
|
||||
AssemblerPredicate<(all_of FeatureGFX11Insts)>;
|
||||
|
||||
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
|
||||
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
|
||||
|
||||
|
@ -1406,7 +1531,7 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
|
|||
def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">,
|
||||
AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>;
|
||||
def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">,
|
||||
AssemblerPredicate<(any_of FeatureGFX940Insts)>;
|
||||
AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>;
|
||||
|
||||
def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>;
|
||||
|
@ -1439,6 +1564,11 @@ def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
|
|||
|
||||
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
|
||||
AssemblerPredicate<(all_of Feature16BitInsts)>;
|
||||
|
||||
def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">,
|
||||
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
|
||||
def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">;
|
||||
|
||||
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
|
||||
AssemblerPredicate<(all_of FeatureVOP3P)>;
|
||||
|
||||
|
@ -1542,6 +1672,9 @@ def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
|
|||
def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureDot7Insts)>;
|
||||
|
||||
def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureDot8Insts)>;
|
||||
|
||||
def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
|
||||
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
|
||||
|
||||
|
|
|
@ -38,7 +38,8 @@ public:
|
|||
SEA_ISLANDS = 6,
|
||||
VOLCANIC_ISLANDS = 7,
|
||||
GFX9 = 8,
|
||||
GFX10 = 9
|
||||
GFX10 = 9,
|
||||
GFX11 = 10
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -47,6 +48,7 @@ private:
|
|||
protected:
|
||||
bool GCN3Encoding = false;
|
||||
bool Has16BitInsts = false;
|
||||
bool HasTrue16BitInsts = false;
|
||||
bool HasMadMixInsts = false;
|
||||
bool HasMadMacF32Insts = false;
|
||||
bool HasDsSrc2Insts = false;
|
||||
|
@ -145,6 +147,8 @@ public:
|
|||
return Has16BitInsts;
|
||||
}
|
||||
|
||||
bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
|
||||
|
||||
bool hasMadMixInsts() const {
|
||||
return HasMadMixInsts;
|
||||
}
|
||||
|
|
|
@ -243,3 +243,23 @@ def : ProcessorModel<"gfx1035", GFX10SpeedModel,
|
|||
def : ProcessorModel<"gfx1036", GFX10SpeedModel,
|
||||
FeatureISAVersion10_3_0.Features
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GCN GFX11.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : ProcessorModel<"gfx1100", GFX11SpeedModel,
|
||||
FeatureISAVersion11_0.Features
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx1101", GFX11SpeedModel,
|
||||
FeatureISAVersion11_0.Features
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx1102", GFX11SpeedModel,
|
||||
FeatureISAVersion11_0_2.Features
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx1103", GFX11SpeedModel,
|
||||
FeatureISAVersion11_0_2.Features
|
||||
>;
|
||||
|
|
|
@ -103,6 +103,7 @@ protected:
|
|||
bool GFX90AInsts = false;
|
||||
bool GFX940Insts = false;
|
||||
bool GFX10Insts = false;
|
||||
bool GFX11Insts = false;
|
||||
bool GFX10_3Insts = false;
|
||||
bool GFX7GFX8GFX9Insts = false;
|
||||
bool SGPRInitBug = false;
|
||||
|
@ -141,6 +142,7 @@ protected:
|
|||
bool HasDot5Insts = false;
|
||||
bool HasDot6Insts = false;
|
||||
bool HasDot7Insts = false;
|
||||
bool HasDot8Insts = false;
|
||||
bool HasMAIInsts = false;
|
||||
bool HasPkFmacF16Inst = false;
|
||||
bool HasAtomicFaddRtnInsts = false;
|
||||
|
@ -187,6 +189,7 @@ protected:
|
|||
bool HasFlatSegmentOffsetBug = false;
|
||||
bool HasImageStoreD16Bug = false;
|
||||
bool HasImageGather4D16Bug = false;
|
||||
bool HasVOPDInsts = false;
|
||||
|
||||
// Dummy feature to use for assembler in tablegen.
|
||||
bool FeatureDisable = false;
|
||||
|
@ -566,7 +569,7 @@ public:
|
|||
return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
|
||||
}
|
||||
|
||||
bool hasFlatScratchSVSMode() const { return GFX940Insts; }
|
||||
bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
|
||||
|
||||
bool hasScalarFlatScratchInsts() const {
|
||||
return ScalarFlatScratchInsts;
|
||||
|
@ -702,6 +705,10 @@ public:
|
|||
return HasDot7Insts;
|
||||
}
|
||||
|
||||
bool hasDot8Insts() const {
|
||||
return HasDot8Insts;
|
||||
}
|
||||
|
||||
bool hasMAIInsts() const {
|
||||
return HasMAIInsts;
|
||||
}
|
||||
|
@ -821,6 +828,9 @@ public:
|
|||
/// \returns true if the subtarget has the v_permlanex16_b32 instruction.
|
||||
bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
|
||||
|
||||
/// \returns true if the subtarget has the v_permlane64_b32 instruction.
|
||||
bool hasPermLane64() const { return getGeneration() >= GFX11; }
|
||||
|
||||
bool hasDPP() const {
|
||||
return HasDPP;
|
||||
}
|
||||
|
@ -1001,9 +1011,35 @@ public:
|
|||
|
||||
bool hasGFX90AInsts() const { return GFX90AInsts; }
|
||||
|
||||
bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
|
||||
|
||||
bool hasLdsDirect() const { return getGeneration() >= GFX11; }
|
||||
|
||||
bool hasVALUPartialForwardingHazard() const {
|
||||
return getGeneration() >= GFX11;
|
||||
}
|
||||
|
||||
bool hasVALUTransUseHazard() const { return getGeneration() >= GFX11; }
|
||||
|
||||
/// Return if operations acting on VGPR tuples require even alignment.
|
||||
bool needsAlignedVGPRs() const { return GFX90AInsts; }
|
||||
|
||||
/// Return true if the target has the S_PACK_HL_B32_B16 instruction.
|
||||
bool hasSPackHL() const { return GFX11Insts; }
|
||||
|
||||
/// Return true if the target's EXP instruction has the COMPR flag, which
|
||||
/// affects the meaning of the EN (enable) bits.
|
||||
bool hasCompressedExport() const { return !GFX11Insts; }
|
||||
|
||||
/// Return true if the target's EXP instruction supports the NULL export
|
||||
/// target.
|
||||
bool hasNullExportTarget() const { return !GFX11Insts; }
|
||||
|
||||
bool hasVOPDInsts() const { return HasVOPDInsts; }
|
||||
|
||||
/// Return true if the target has the S_DELAY_ALU instruction.
|
||||
bool hasDelayAlu() const { return GFX11Insts; }
|
||||
|
||||
bool hasPackedTID() const { return HasPackedTID; }
|
||||
|
||||
// GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
|
||||
|
@ -1041,6 +1077,9 @@ public:
|
|||
return getGeneration() >= GFX9;
|
||||
}
|
||||
|
||||
// \returns true if the target supports the pre-NGG legacy geometry path.
|
||||
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
|
||||
|
||||
/// \returns SGPR allocation granularity supported by the subtarget.
|
||||
unsigned getSGPRAllocGranule() const {
|
||||
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
|
||||
|
@ -1221,6 +1260,10 @@ public:
|
|||
|
||||
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
|
||||
SDep &Dep) const override;
|
||||
|
||||
// \returns true if it's beneficial on this subtarget for the scheduler to
|
||||
// cluster stores as well as loads.
|
||||
bool shouldClusterStores() const { return getGeneration() >= GFX11; }
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -7835,6 +7835,7 @@ SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace,
|
|||
}
|
||||
|
||||
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
|
||||
// and the columns of the getMCOpcodeGen table.
|
||||
enum SIEncodingFamily {
|
||||
SI = 0,
|
||||
VI = 1,
|
||||
|
@ -7845,7 +7846,8 @@ enum SIEncodingFamily {
|
|||
GFX10 = 6,
|
||||
SDWA10 = 7,
|
||||
GFX90A = 8,
|
||||
GFX940 = 9
|
||||
GFX940 = 9,
|
||||
GFX11 = 10,
|
||||
};
|
||||
|
||||
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
|
||||
|
@ -7860,6 +7862,8 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
|
|||
return SIEncodingFamily::VI;
|
||||
case AMDGPUSubtarget::GFX10:
|
||||
return SIEncodingFamily::GFX10;
|
||||
case AMDGPUSubtarget::GFX11:
|
||||
return SIEncodingFamily::GFX11;
|
||||
}
|
||||
llvm_unreachable("Unknown subtarget generation!");
|
||||
}
|
||||
|
|
|
@ -17,7 +17,8 @@ class GCNPredicateControl : PredicateControl {
|
|||
}
|
||||
|
||||
// Except for the NONE field, this must be kept in sync with the
|
||||
// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
|
||||
// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the
|
||||
// getMCOpcodeGen table.
|
||||
def SIEncodingFamily {
|
||||
int NONE = -1;
|
||||
int SI = 0;
|
||||
|
@ -30,6 +31,7 @@ def SIEncodingFamily {
|
|||
int SDWA10 = 7;
|
||||
int GFX90A = 8;
|
||||
int GFX940 = 9;
|
||||
int GFX11 = 10;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2579,6 +2581,7 @@ def getMCOpcodeGen : InstrMapping {
|
|||
let RowFields = ["PseudoInstr"];
|
||||
let ColFields = ["Subtarget"];
|
||||
let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
|
||||
// These columns must be kept in sync with the SIEncodingFamily enumeration.
|
||||
let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
|
||||
[!cast<string>(SIEncodingFamily.VI)],
|
||||
[!cast<string>(SIEncodingFamily.SDWA)],
|
||||
|
@ -2592,7 +2595,8 @@ def getMCOpcodeGen : InstrMapping {
|
|||
[!cast<string>(SIEncodingFamily.GFX10)],
|
||||
[!cast<string>(SIEncodingFamily.SDWA10)],
|
||||
[!cast<string>(SIEncodingFamily.GFX90A)],
|
||||
[!cast<string>(SIEncodingFamily.GFX940)]];
|
||||
[!cast<string>(SIEncodingFamily.GFX940)],
|
||||
[!cast<string>(SIEncodingFamily.GFX11)]];
|
||||
}
|
||||
|
||||
// Get equivalent SOPK instruction.
|
||||
|
|
|
@ -89,6 +89,7 @@ def SIQuarterSpeedModel : SISchedMachineModel;
|
|||
def SIDPFullSpeedModel : SISchedMachineModel;
|
||||
def SIDPGFX940FullSpeedModel : SISchedMachineModel;
|
||||
def GFX10SpeedModel : SISchedMachineModel;
|
||||
def GFX11SpeedModel : SISchedMachineModel;
|
||||
|
||||
// XXX: Are the resource counts correct?
|
||||
def HWBranch : ProcResource<1> {
|
||||
|
@ -311,3 +312,29 @@ def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
|
|||
def : InstRW<[WriteCopy], (instrs COPY)>;
|
||||
|
||||
} // End SchedModel = GFX10SpeedModel
|
||||
|
||||
let SchedModel = GFX11SpeedModel in {
|
||||
|
||||
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
|
||||
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
|
||||
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
|
||||
def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
|
||||
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
|
||||
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
|
||||
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>;
|
||||
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>;
|
||||
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>;
|
||||
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
|
||||
def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>;
|
||||
|
||||
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
|
||||
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
|
||||
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
|
||||
def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
|
||||
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
|
||||
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
|
||||
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
|
||||
|
||||
def : InstRW<[WriteCopy], (instrs COPY)>;
|
||||
|
||||
} // End SchedModel = GFX11SpeedModel
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: smax_neg_abs_pattern_s32_ss
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: add_s32
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
# Note: 16-bit instructions generally produce a 0 result in the high 16-bits on GFX8 and GFX9 and preserve high 16 bits on GFX10+
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: exp0
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
name: amdgpu_atomic_cmpxchg_s32_flat
|
||||
|
@ -40,6 +41,15 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -105,6 +115,15 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -152,6 +171,15 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = COPY $vgpr4_vgpr5
|
||||
|
@ -217,6 +245,15 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = COPY $vgpr4_vgpr5
|
||||
|
@ -294,6 +331,25 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX11-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -338,6 +394,14 @@ body: |
|
|||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -379,6 +443,14 @@ body: |
|
|||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = COPY $vgpr4_vgpr5
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: amdgpu_atomic_cmpxchg_s32_global
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: ashr_s32_ss
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: FileCheck --check-prefix=ERR %s < %t
|
||||
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: FileCheck --check-prefix=ERR %s < %t
|
||||
|
||||
# ERR-NOT: remark
|
||||
# ERR: remark: <unknown>:0:0: cannot select: %4:sgpr(s16) = G_ASHR %2:sgpr, %3:sgpr(s16) (in function: ashr_s16_s16_ss)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_s32_vv)
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
|
||||
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
|
||||
|
||||
# ERR-NOT: remark
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
name: flat_atomicrmw_add_s32
|
||||
|
@ -34,6 +35,13 @@ body: |
|
|||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0)
|
||||
|
@ -68,6 +76,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0)
|
||||
|
@ -124,6 +138,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
@ -180,6 +201,12 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
@ -238,6 +265,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2048
|
||||
|
@ -294,6 +328,12 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2048
|
||||
|
@ -352,6 +392,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -408,6 +455,12 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -476,6 +529,23 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4097
|
||||
|
@ -542,6 +612,22 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4097
|
||||
|
@ -580,6 +666,13 @@ body: |
|
|||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0)
|
||||
|
@ -614,6 +707,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s64_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0)
|
||||
|
@ -670,6 +769,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -726,6 +832,12 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
name: global_atomicrmw_add_s32
|
||||
|
@ -47,6 +48,13 @@ body: |
|
|||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1)
|
||||
|
@ -92,6 +100,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1)
|
||||
|
@ -150,6 +164,13 @@ body: |
|
|||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
@ -207,6 +228,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
@ -277,6 +304,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2048
|
||||
|
@ -344,6 +378,12 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2048
|
||||
|
@ -414,6 +454,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -481,6 +528,12 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -562,6 +615,23 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4097
|
||||
|
@ -640,6 +710,22 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4097
|
||||
|
@ -690,6 +776,13 @@ body: |
|
|||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1)
|
||||
|
@ -735,6 +828,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1)
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s64_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1)
|
||||
|
@ -803,6 +902,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -870,6 +976,12 @@ body: |
|
|||
; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1)
|
||||
; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
# GFX6/7 selection should fail.
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
# GFX6/7 selection should fail.
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
|
||||
---
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
|
||||
---
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE64
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE32
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE32
|
||||
|
||||
---
|
||||
name: constant_v_s32
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-vgpr-index-mode -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s
|
||||
|
||||
---
|
||||
name: extract_vector_elt_s_s32_v2s32
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: fabs_s32_ss
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
name: fcmp_false_s32_vv
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
name: fcmp_false_s16_vv
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx906 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9-DL %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: fmaxnum_ieee_f16_vv
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: fmaxnum_ieee_v2f16_vv
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: fmaxnum_f16_vv
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
# FIXME: Ideally this would fail to select with ieee mode enabled.
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: fminnum_ieee_f16_vv
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: fminnum_ieee_v2f16_vv
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: fminnum_f16_vv
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: fminnum_v2f16_vv
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: fmul_v2f16_vv
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: fneg_s32_ss
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: test_freeze_s1_vgpr_to_vgpr
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_64_xexec %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_64_xexec %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_32_xm0_xexec %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_32_xm0_xexec %s
|
||||
|
||||
---
|
||||
name: icmp_s32_s_mix
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s
|
||||
|
||||
---
|
||||
name: insert_vector_elt_s_s32_v2s32
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -32,6 +33,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -67,6 +74,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
; GFX11-LABEL: name: load_atomic_flat_v2s16_seq_cst
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -102,6 +115,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3)
|
||||
; GFX11-LABEL: name: load_atomic_flat_p3_seq_cst
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -137,6 +156,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_atomic_flat_s64_seq_cst
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -172,6 +197,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -207,6 +238,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
; GFX11-LABEL: name: load_atomic_flat_v4s16_seq_cst
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -242,6 +279,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; GFX11-LABEL: name: load_atomic_flat_p1_seq_cst
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -277,6 +320,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
; GFX11-LABEL: name: load_atomic_flat_p0_seq_cst
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -342,6 +391,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
%2:vgpr(p0) = G_PTR_ADD %0, %1
|
||||
|
@ -399,6 +464,12 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
%2:vgpr(p0) = G_PTR_ADD %0, %1
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-FLAT %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
|
||||
---
|
||||
|
@ -40,6 +41,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_4
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -81,6 +88,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_2
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -122,6 +135,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -155,6 +174,10 @@ body: |
|
|||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_flat_v2s32
|
||||
; GFX11: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -196,6 +219,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
|
||||
; GFX11-LABEL: name: load_flat_v3s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0)
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
|
@ -237,6 +266,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX11-LABEL: name: load_flat_v4s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -278,6 +313,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_flat_s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -319,6 +360,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX11-LABEL: name: load_flat_v2s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -360,6 +407,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX11-LABEL: name: load_flat_v2p1
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -401,6 +454,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
|
||||
; GFX11-LABEL: name: load_flat_s96
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0)
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
|
@ -442,6 +501,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX11-LABEL: name: load_flat_s128
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -483,6 +548,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_flat_p3_from_4
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -524,6 +595,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_flat_p1_from_8
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -565,6 +642,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
|
||||
; GFX11-LABEL: name: load_flat_p999_from_8
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -598,6 +681,10 @@ body: |
|
|||
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
|
||||
; GFX11-LABEL: name: load_flat_v2p3
|
||||
; GFX11: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -639,6 +726,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_flat_v2s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -680,6 +773,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>))
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_flat_v4s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>))
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -721,6 +820,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
|
||||
; GFX11-LABEL: name: load_flat_v6s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 0)
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
|
@ -762,6 +867,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX11-LABEL: name: load_flat_v8s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 0)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -837,6 +948,12 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_2047
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 2047
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -910,6 +1027,12 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_2048
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 2048
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -993,6 +1116,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2047
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2047
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1076,6 +1215,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2048
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1149,6 +1304,12 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1232,6 +1393,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_4096
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4096
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1315,6 +1492,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -4095
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1398,6 +1591,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4096
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -4096
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1481,6 +1690,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_8191
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 8191
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1564,6 +1789,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_8192
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 8192
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1647,6 +1888,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8191
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -8191
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1730,6 +1987,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8192
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8))
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -8192
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
name: load_global_s32_from_sgpr
|
||||
|
@ -26,6 +27,13 @@ body: |
|
|||
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(p1) = COPY %0
|
||||
%2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1)
|
||||
|
@ -59,6 +67,13 @@ body: |
|
|||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
|
||||
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(p1) = COPY %0
|
||||
|
@ -95,6 +110,13 @@ body: |
|
|||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
|
||||
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(p1) = COPY %0
|
||||
|
@ -151,6 +173,23 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr
|
||||
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]]
|
||||
; GFX11-NEXT: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1
|
||||
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX11-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(p1) = COPY %0
|
||||
|
@ -207,6 +246,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
|
||||
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(p1) = COPY %0
|
||||
|
@ -265,6 +311,13 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
|
||||
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(p1) = COPY %0
|
||||
|
@ -301,6 +354,13 @@ body: |
|
|||
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4096
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -334,6 +394,13 @@ body: |
|
|||
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4097
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -387,6 +454,23 @@ body: |
|
|||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -4097
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -420,6 +504,13 @@ body: |
|
|||
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 2049
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -463,6 +554,13 @@ body: |
|
|||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -2049
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -495,6 +593,13 @@ body: |
|
|||
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4294967295
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -547,6 +652,23 @@ body: |
|
|||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4294967296
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -600,6 +722,23 @@ body: |
|
|||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4294971390
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -653,6 +792,23 @@ body: |
|
|||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -4294967295
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -705,6 +861,23 @@ body: |
|
|||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -4294967296
|
||||
%2:sgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -732,6 +905,11 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_copy_undef_sgpr
|
||||
; GFX11: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = G_IMPLICIT_DEF
|
||||
%1:vgpr(p1) = COPY %0
|
||||
%2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1)
|
||||
|
@ -755,6 +933,10 @@ body: |
|
|||
; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_undef_vgpr
|
||||
; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = G_IMPLICIT_DEF
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -65,6 +66,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_4
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -128,6 +135,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_2
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -191,6 +204,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -254,6 +273,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_global_v2s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -317,6 +342,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
; GFX11-LABEL: name: load_global_v4s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -370,6 +401,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_global_s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -423,6 +460,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
; GFX11-LABEL: name: load_global_v2s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -476,6 +519,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX11-LABEL: name: load_global_v2p1
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -529,6 +578,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX11-LABEL: name: load_global_s128
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -582,6 +637,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_p3_from_4
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -635,6 +696,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_global_p1_from_8
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -688,6 +755,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
|
||||
; GFX11-LABEL: name: load_global_p999_from_8
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -741,6 +814,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
|
||||
; GFX11-LABEL: name: load_global_v2p3
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -794,6 +873,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX11-LABEL: name: load_global_v2s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -847,6 +932,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX11-LABEL: name: load_global_v4s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -900,6 +991,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
; GFX11-LABEL: name: load_global_v8s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -987,6 +1084,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_2047
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 2047
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1082,6 +1185,12 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_2048
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 2048
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1187,6 +1296,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_m2047
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2047
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1292,6 +1407,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_m2048
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1387,6 +1508,12 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1494,6 +1621,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_4096
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4096
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1609,6 +1752,12 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -4095
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1724,6 +1873,12 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -4096
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1831,6 +1986,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_8191
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 8191
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -1938,6 +2109,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_8192
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 8192
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -2063,6 +2250,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -8191
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
@ -2188,6 +2391,22 @@ body: |
|
|||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192
|
||||
; GFX11: liveins: $vgpr0_vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -8192
|
||||
%2:vgpr(p1) = G_PTR_ADD %0, %1
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -33,6 +34,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
|
||||
; GFX11-LABEL: name: load_local_v4s32_align16
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -69,6 +76,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
; GFX11-LABEL: name: load_local_v4s32_align_8
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -105,6 +118,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
; GFX11-LABEL: name: load_local_v4s32_align_8_offset_160
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 400
|
||||
%2:vgpr(p3) = G_PTR_ADD %0, %1
|
||||
|
@ -149,6 +168,14 @@ body: |
|
|||
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
; GFX11-LABEL: name: load_local_v4s32_align_8_offset_320
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec
|
||||
; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4000
|
||||
%2:vgpr(p3) = G_PTR_ADD %0, %1
|
||||
|
@ -187,6 +214,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
; GFX11-LABEL: name: load_local_v2s64
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -223,6 +256,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX11-LABEL: name: load_local_v2p1
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -259,6 +298,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX11-LABEL: name: load_local_s128
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
@ -295,6 +340,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
; GFX11-LABEL: name: load_local_v8s16
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -O0 -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -30,6 +31,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_4
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -62,6 +69,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_2
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_USHORT]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -94,6 +107,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -126,6 +145,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_p3_from_4
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -158,6 +183,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_p5_from_4
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p5), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -191,6 +222,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_v2s16
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -229,6 +266,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_2047
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2047
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -267,6 +310,14 @@ body: |
|
|||
; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_2047_known_bits
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||||
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_AND_B32_e64_]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2147483647
|
||||
%2:vgpr(s32) = G_AND %0, %1
|
||||
|
@ -306,6 +357,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_2048
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2048
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -344,6 +401,12 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_m2047
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -2047
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -382,6 +445,12 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_m2048
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -2048
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -418,6 +487,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_4095
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -456,6 +531,14 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_4096
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4096
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -494,6 +577,12 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_m4095
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -4095
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -532,6 +621,12 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_m4096
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -4096
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -570,6 +665,14 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_8191
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
|
||||
; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 8191
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -608,6 +711,14 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_8192
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
|
||||
; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 8192
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -646,6 +757,14 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_m8191
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
|
||||
; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -8191
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -684,6 +803,14 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_gep_m8192
|
||||
; GFX11: liveins: $vgpr0
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
|
||||
; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -8192
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -711,6 +838,10 @@ body: |
|
|||
; GFX9-LABEL: name: load_private_s32_from_4_constant_0
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_4_constant_0
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -736,6 +867,10 @@ body: |
|
|||
; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_4_constant_sgpr_16
|
||||
; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p5) = G_CONSTANT i32 16
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -761,6 +896,10 @@ body: |
|
|||
; GFX9-LABEL: name: load_private_s32_from_1_constant_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_constant_4095
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -788,6 +927,10 @@ body: |
|
|||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_constant_4096
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -815,6 +958,9 @@ body: |
|
|||
; GFX9-LABEL: name: load_private_s32_from_fi
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_fi
|
||||
; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -841,6 +987,9 @@ body: |
|
|||
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4095
|
||||
; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -870,6 +1019,9 @@ body: |
|
|||
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
|
||||
; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:sgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(s32) = COPY %1
|
||||
|
@ -906,6 +1058,10 @@ body: |
|
|||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4096
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SVS]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4096
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -936,6 +1092,10 @@ body: |
|
|||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX11-LABEL: name: load_private_s32_from_neg1
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 -1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5)
|
||||
$vgpr0 = COPY %1
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: lshr_s32_ss
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
# RUN: FileCheck --check-prefix=ERR %s < %t
|
||||
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: FileCheck --check-prefix=ERR %s < %t
|
||||
|
||||
# ERR-NOT: remark
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
|
||||
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
|
||||
|
||||
# ERR-NOT: remark
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: smed3_s16_vvv
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: umed3_s16_vvv
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE32 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE32 %s
|
||||
|
||||
---
|
||||
name: gep_p0_sgpr_sgpr
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: shl_s32_ss
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
# RUN: FileCheck --check-prefix=ERR %s < %t
|
||||
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: FileCheck --check-prefix=ERR %s < %t
|
||||
|
||||
# ERR-NOT: remark
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
|
||||
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
|
||||
|
||||
# ERR-NOT: remark
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: smax_s32_ss
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: smin_s32_ss
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: FileCheck -check-prefix=ERR %s < %t
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
# ERR-NOT: remark:
|
||||
# ERR: remark: <unknown>:0:0: cannot select: %2:sgpr(s32) = G_SMULH %0:sgpr, %1:sgpr (in function: smulh_s32_ss)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -39,6 +40,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
|
||||
; GFX11-LABEL: name: store_flat_s32_to_4
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store (s32), align 4, addrspace 0)
|
||||
|
@ -79,6 +86,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16))
|
||||
; GFX11-LABEL: name: store_flat_s32_to_2
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store (s16), align 2, addrspace 0)
|
||||
|
@ -119,6 +132,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8))
|
||||
; GFX11-LABEL: name: store_flat_s32_to_1
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store (s8), align 1, addrspace 0)
|
||||
|
@ -160,6 +179,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
||||
; GFX11-LABEL: name: store_flat_s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store (s64), align 8, addrspace 0)
|
||||
|
@ -200,6 +225,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX10-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16)
|
||||
; GFX11-LABEL: name: store_flat_s96
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX11-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
|
||||
G_STORE %1, %0 :: (store (s96), align 16, addrspace 0)
|
||||
|
@ -240,6 +271,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128))
|
||||
; GFX11-LABEL: name: store_flat_s128
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store (s128), align 16, addrspace 0)
|
||||
|
@ -281,6 +318,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>))
|
||||
; GFX11-LABEL: name: store_flat_v2s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 0)
|
||||
|
@ -321,6 +364,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16)
|
||||
; GFX11-LABEL: name: store_flat_v3s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
|
||||
G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 0)
|
||||
|
@ -361,6 +410,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>))
|
||||
; GFX11-LABEL: name: store_flat_v4s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 0)
|
||||
|
@ -402,6 +457,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>))
|
||||
; GFX11-LABEL: name: store_flat_v2s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 0)
|
||||
|
@ -443,6 +504,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>))
|
||||
; GFX11-LABEL: name: store_flat_v4s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 0)
|
||||
|
@ -484,6 +551,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX10-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16)
|
||||
; GFX11-LABEL: name: store_flat_v6s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX11-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4
|
||||
G_STORE %1, %0 :: (store (<6 x s16>), align 16, addrspace 0)
|
||||
|
@ -524,6 +597,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
|
||||
; GFX11-LABEL: name: store_flat_v8s16
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 0)
|
||||
|
@ -565,6 +644,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>))
|
||||
; GFX11-LABEL: name: store_flat_v2s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 0)
|
||||
|
@ -606,6 +691,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1))
|
||||
; GFX11-LABEL: name: store_flat_p1
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store (p1), align 8, addrspace 0)
|
||||
|
@ -647,6 +738,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>))
|
||||
; GFX11-LABEL: name: store_flat_v2p1
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX11-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store (<2 x p1>), align 16, addrspace 0)
|
||||
|
@ -688,6 +785,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3))
|
||||
; GFX11-LABEL: name: store_flat_p3
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store (p3), align 4, addrspace 0)
|
||||
|
@ -729,6 +832,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>))
|
||||
; GFX11-LABEL: name: store_flat_v2p3
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 0)
|
||||
|
@ -769,6 +878,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32))
|
||||
; GFX11-LABEL: name: store_atomic_flat_s32
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 0)
|
||||
|
@ -810,6 +925,12 @@ body: |
|
|||
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64))
|
||||
; GFX11-LABEL: name: store_atomic_flat_s64
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 0)
|
||||
|
@ -881,6 +1002,12 @@ body: |
|
|||
; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
|
||||
; GFX11-LABEL: name: store_flat_s32_gep_2047
|
||||
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32))
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
# 96-bit load/store is only legal for SI, so split the test out.
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -O0 -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -31,6 +32,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_4
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (s32), align 4, addrspace 5)
|
||||
|
@ -64,6 +71,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_2
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (s16), align 2, addrspace 5)
|
||||
|
@ -97,6 +110,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_1
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (s8), align 1, addrspace 5)
|
||||
|
@ -130,6 +149,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_v2s16
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5)
|
||||
|
@ -163,6 +188,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_p3
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (p3), align 4, addrspace 5)
|
||||
|
@ -196,6 +227,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_p5
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (p5), align 4, addrspace 5)
|
||||
|
@ -224,6 +261,9 @@ body: |
|
|||
; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5)
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -254,6 +294,10 @@ body: |
|
|||
; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4095
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_1_constant_4095
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store (s8), align 1, addrspace 5)
|
||||
|
@ -284,6 +328,10 @@ body: |
|
|||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_1_constant_4096
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store (s8), align 1, addrspace 5)
|
||||
|
@ -316,6 +364,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_s32_to_4
|
||||
; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (s32), align 4, addrspace 5)
|
||||
|
@ -348,6 +402,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_s32_to_2
|
||||
; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (s16), align 2, addrspace 5)
|
||||
|
@ -380,6 +440,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_s32_to_1
|
||||
; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (s8), align 1, addrspace 5)
|
||||
|
@ -400,6 +466,7 @@ body: |
|
|||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
|
||||
; GFX6-LABEL: name: kernel_store_private_v2s16
|
||||
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6-NEXT: {{ $}}
|
||||
|
@ -412,6 +479,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_v2s16
|
||||
; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5)
|
||||
|
@ -444,6 +517,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_p3
|
||||
; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (p3), align 4, addrspace 5)
|
||||
|
@ -476,6 +555,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_p5
|
||||
; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store (p5), align 4, addrspace 5)
|
||||
|
@ -508,6 +593,11 @@ body: |
|
|||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
|
||||
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5)
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -542,6 +632,12 @@ body: |
|
|||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_s32_to_1_constant_4095
|
||||
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store (s8), align 1, addrspace 5)
|
||||
|
@ -576,6 +672,12 @@ body: |
|
|||
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5)
|
||||
; GFX11-LABEL: name: kernel_store_private_s32_to_1_constant_4096
|
||||
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store (s8), align 1, addrspace 5)
|
||||
|
@ -606,6 +708,12 @@ body: |
|
|||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
|
||||
G_STORE %0, %1 :: (store (s32), align 4, addrspace 5)
|
||||
|
@ -637,6 +745,12 @@ body: |
|
|||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
|
||||
%2:sgpr(s32) = G_CONSTANT i32 4095
|
||||
|
@ -674,6 +788,12 @@ body: |
|
|||
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
|
||||
%2:vgpr(s32) = G_CONSTANT i32 4095
|
||||
|
@ -714,6 +834,15 @@ body: |
|
|||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec
|
||||
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
|
||||
; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096
|
||||
; GFX11: liveins: $vgpr0, $vgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec
|
||||
; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
|
||||
%2:sgpr(s32) = G_CONSTANT i32 4096
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: sub_s32
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -31,6 +32,15 @@ body: |
|
|||
; GFX8: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GFX8: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc
|
||||
; GFX8: S_ENDPGM 0, implicit [[S_OR_B32_]]
|
||||
; GFX11-LABEL: name: trunc_sgpr_v2s32_to_v2s16
|
||||
; GFX11: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc
|
||||
; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
||||
; GFX11: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GFX11: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc
|
||||
; GFX11: S_ENDPGM 0, implicit [[S_OR_B32_]]
|
||||
%0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<2 x s16>) = G_TRUNC %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
@ -60,6 +70,15 @@ body: |
|
|||
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX8: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY2]], 0, 5, 2, 4, implicit $exec, implicit [[COPY1]](tied-def 0)
|
||||
; GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_sdwa]]
|
||||
; GFX11-LABEL: name: trunc_vgpr_v2s32_to_v2s16
|
||||
; GFX11: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX11: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GFX11: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; GFX11: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec
|
||||
; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
|
||||
; GFX11: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GFX11: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_LSHLREV_B32_e64_]], [[V_AND_B32_e64_]], implicit $exec
|
||||
; GFX11: S_ENDPGM 0, implicit [[V_OR_B32_e64_]]
|
||||
%0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = G_TRUNC %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
# These violate the constant bus restriction pre-gfx10
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
name: uadde_s32_s1_sss
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: uaddo_s32_s1_sss
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
name: uitofp_s32_to_s32_vv
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: umax_s32_ss
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: umin_s32_ss
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: FileCheck -check-prefix=ERR %s < %t
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
# ERR-NOT: remark:
|
||||
# ERR: remark: <unknown>:0:0: cannot select: %2:sgpr(s32) = G_UMULH %0:sgpr, %1:sgpr (in function: umulh_s32_ss)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
# These violate the constant bus restriction pre-gfx10
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
name: usube_s32_s1_sss
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: usubo_s32_s1_sss
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: test_add_s32
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: test_addrspacecast_p0_to_p1
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
name: test_wavefrontsize
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s
|
||||
|
||||
---
|
||||
name: test_ashr_s32_s32
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
name: legal_brcond_vcc
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue