[SDAG] Use UnknownSize for masked load/store MMO size

A masked load or store will load a potentially unknown number of bytes
from a memory location - that is not generally known at compile time.
They do not necessarily load/store the entire vector width, and treating
them as such can lead to incorrect aliasing information (for example, if
the underlying object is smaller than the size of the vector).

This makes sure that the MMO is given an unknown size to represent this.
which is less accurate that "may load/store from up to 16 bytes", but
less incorrect that "will load/store from 16 bytes".

Differential Revision: https://reviews.llvm.org/D113888
This commit is contained in:
David Green 2021-11-23 09:47:56 +00:00
parent 59f4b3d308
commit 32b6c17b29
7 changed files with 37 additions and 46 deletions

View File

@ -938,7 +938,8 @@ public:
int64_t Offset, LLT Ty);
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size));
return getMachineMemOperand(
MMO, Offset, Size == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * Size));
}
/// getMachineMemOperand - Allocate a new MachineMemOperand by copying

View File

@ -1820,10 +1820,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
else
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment,
MLD->getAAInfo(), MLD->getRanges());
MLD->getPointerInfo(), MachineMemOperand::MOLoad,
MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(),
MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
@ -1837,7 +1837,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Generate hi masked load.
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MachinePointerInfo MPI;
if (LoMemVT.isScalableVector())
@ -1847,8 +1846,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
LoMemVT.getStoreSize().getFixedSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(),
MLD->getRanges());
MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
@ -2662,10 +2661,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
SDValue Lo, Hi, Res;
unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment,
N->getAAInfo(), N->getRanges());
N->getPointerInfo(), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@ -2689,10 +2687,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MPI = N->getPointerInfo().getWithOffset(
LoMemVT.getStoreSize().getFixedSize());
unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(),
N->getRanges());
MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),

View File

@ -4336,9 +4336,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());
MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@ -4496,22 +4494,14 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
MemoryLocation ML;
if (VT.isScalableVector())
ML = MemoryLocation::getAfter(PtrOperand);
else
ML = MemoryLocation(PtrOperand, LocationSize::precise(
DAG.getDataLayout().getTypeStoreSize(I.getType())),
AAInfo);
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,

View File

@ -9,6 +9,7 @@
#include "HexagonISelLowering.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/Support/CommandLine.h"
@ -1846,16 +1847,18 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = MemN->getChain();
SDValue Base0 = MemN->getBasePtr();
SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl);
unsigned MemOpc = MemN->getOpcode();
MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
if (MachineMemOperand *MMO = MemN->getMemOperand()) {
MachineFunction &MF = DAG.getMachineFunction();
MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen);
MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen);
uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
? (uint64_t)MemoryLocation::UnknownSize
: HwLen;
MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
}
unsigned MemOpc = MemN->getOpcode();
if (MemOpc == ISD::LOAD) {
assert(cast<LoadSDNode>(Op)->isUnindexed());
SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);

View File

@ -11,15 +11,15 @@ define i32 @incorrectmmo() {
; CHECK-NEXT: adr r0, .LCPI0_0
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: add.w r0, sp, #2
; CHECK-NEXT: ldrb.w r1, [sp, #3]
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrbt.8 q0, [r0]
; CHECK-NEXT: ldrb.w r2, [sp, #4]
; CHECK-NEXT: ldrb.w r0, [sp, #2]
; CHECK-NEXT: ldrb.w r3, [sp, #10]
; CHECK-NEXT: ldrb.w r1, [sp, #3]
; CHECK-NEXT: ldrb.w r2, [sp, #4]
; CHECK-NEXT: add r0, r1
; CHECK-NEXT: ldrb.w r1, [sp, #11]
; CHECK-NEXT: ldrb.w r3, [sp, #10]
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: ldrb.w r1, [sp, #11]
; CHECK-NEXT: add r0, r3
; CHECK-NEXT: add r0, r1
; CHECK-NEXT: add sp, #12

View File

@ -516,8 +516,6 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i
; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1
; AVX512F-NEXT: kshiftrw $8, %k1, %k2
; AVX512F-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movl %eax, %ecx
@ -535,7 +533,9 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i
; AVX512F-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F
; AVX512F-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101
; AVX512F-NEXT: shrl $24, %eax
; AVX512F-NEXT: kshiftrw $8, %k1, %k2
; AVX512F-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
; AVX512F-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@ -570,8 +570,6 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i
; AVX512VLBW: ## %bb.0:
; AVX512VLBW-NEXT: vpsllw $7, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpmovb2m %xmm2, %k1
; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k2
; AVX512VLBW-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
; AVX512VLBW-NEXT: kmovd %k1, %eax
; AVX512VLBW-NEXT: movzbl %al, %eax
; AVX512VLBW-NEXT: movl %eax, %ecx
@ -589,7 +587,9 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i
; AVX512VLBW-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F
; AVX512VLBW-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101
; AVX512VLBW-NEXT: shrl $24, %eax
; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k2
; AVX512VLBW-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
; AVX512VLBW-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
; AVX512VLBW-NEXT: vzeroupper
; AVX512VLBW-NEXT: retq
call void @llvm.masked.compressstore.v16f64(<16 x double> %V, double* %base, <16 x i1> %mask)

View File

@ -14,10 +14,10 @@ define void @test_v16f(<16 x i32> %x) {
; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
; CHECK-NEXT: [[VPCMPEQDYrr:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY]], [[AVX_SET0_]]
; CHECK-NEXT: [[VPCMPEQDYrr1:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY1]], [[AVX_SET0_]]
; CHECK-NEXT: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load (s256) from %ir.stack_input_vec, align 4)
; CHECK-NEXT: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load (s256) from %ir.stack_input_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store (s256) into %ir.stack_output_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store (s256) into %ir.stack_output_vec, align 4)
; CHECK-NEXT: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load unknown-size from %ir.stack_input_vec, align 4)
; CHECK-NEXT: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load unknown-size from %ir.stack_input_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store unknown-size into %ir.stack_output_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store unknown-size into %ir.stack_output_vec, align 4)
; CHECK-NEXT: RET 0
bb:
%stack_input_vec = alloca <16 x float>, align 64
@ -41,10 +41,10 @@ define void @test_v8d(<8 x i64> %x) {
; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
; CHECK-NEXT: [[VPCMPEQQYrr:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY]], [[AVX_SET0_]]
; CHECK-NEXT: [[VPCMPEQQYrr1:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY1]], [[AVX_SET0_]]
; CHECK-NEXT: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load (s256) from %ir.stack_input_vec, align 4)
; CHECK-NEXT: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load (s256) from %ir.stack_input_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store (s256) into %ir.stack_output_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store (s256) into %ir.stack_output_vec, align 4)
; CHECK-NEXT: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load unknown-size from %ir.stack_input_vec, align 4)
; CHECK-NEXT: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load unknown-size from %ir.stack_input_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store unknown-size into %ir.stack_output_vec + 32, align 4)
; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store unknown-size into %ir.stack_output_vec, align 4)
; CHECK-NEXT: RET 0
bb:
%stack_input_vec = alloca <8 x double>, align 64