From 7c10541e568f7453915d5d207c55f0fb7fb4c43c Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 7 Apr 2020 14:05:29 -0700 Subject: [PATCH] [SelectionDAG] Fix usage of Align constructing MachineMemOperands. The "Align" passed into getMachineMemOperand etc. is the alignment of the MachinePointerInfo, not the alignment of the memory operation. (getAlign() on a MachineMemOperand automatically reduces the alignment to account for this.) We were passing on wrong (overconservative) alignment in a bunch of places. Fix a bunch of these, mostly in legalization. And while I'm here, switch to the new Align APIs. The test changes are all scheduling changes: the biggest effect of preserving large alignments is that it improves alias analysis, so the scheduler has more freedom. (I was originally just trying to do a minor cleanup in SelectionDAGBuilder, but I accidentally went deeper down the rabbit hole.) Differential Revision: https://reviews.llvm.org/D77687 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 69 +++++++++---------- .../SelectionDAG/LegalizeFloatTypes.cpp | 20 +++--- .../SelectionDAG/LegalizeIntegerTypes.cpp | 33 +++++---- .../SelectionDAG/LegalizeTypesGeneric.cpp | 35 +++++----- .../SelectionDAG/LegalizeVectorOps.cpp | 11 ++- .../SelectionDAG/LegalizeVectorTypes.cpp | 49 +++++++------ .../SelectionDAG/SelectionDAGBuilder.cpp | 8 +-- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 6 +- llvm/test/CodeGen/RISCV/vararg.ll | 6 +- .../test/CodeGen/X86/avx512-insert-extract.ll | 24 +++---- llvm/test/CodeGen/X86/nosse-vector.ll | 2 +- llvm/test/CodeGen/X86/sadd_sat_vec.ll | 4 +- llvm/test/CodeGen/X86/ssub_sat_vec.ll | 4 +- llvm/test/CodeGen/X86/uadd_sat_vec.ll | 4 +- llvm/test/CodeGen/X86/usub_sat_vec.ll | 4 +- llvm/test/CodeGen/X86/widen_cast-2.ll | 4 +- llvm/test/CodeGen/X86/widen_load-2.ll | 2 +- 17 files changed, 134 insertions(+), 151 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 112a06385931..b1a06653b3cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -434,7 +434,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // We generally can't do this one for long doubles. SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); @@ -444,8 +443,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), SDLoc(CFP), MVT::i32); - return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, - MMOFlags, AAInfo); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -454,7 +453,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -467,12 +466,12 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, - MMOFlags, AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - MinAlign(Alignment, 4U), MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -487,7 +486,6 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Ptr = ST->getBasePtr(); SDLoc dl(Node); - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); @@ -528,9 +526,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote stores to same size type"); Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -553,7 +550,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -575,7 +572,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); + RoundVT, ST->getOriginalAlign(), MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -584,10 +581,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore( - Chain, dl, Hi, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -596,18 +592,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, + ST->getOriginalAlign(), MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getTruncStore( - Chain, dl, Value, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo); } // The order of the stores doesn't matter. @@ -643,15 +638,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { if (TLI.isTypeLegal(StVT)) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); } else { // The in-memory type isn't legal. Truncate to the type it would promote // to, and then do a truncstore. Value = DAG.getNode(ISD::TRUNCATE, dl, TLI.getTypeToTransformTo(*DAG.getContext(), StVT), Value); - Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - StVT, Alignment, MMOFlags, AAInfo); + Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), StVT, + ST->getOriginalAlign(), MMOFlags, AAInfo); } ReplaceNode(SDValue(Node, 0), Result); @@ -721,7 +717,6 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); - unsigned Alignment = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); @@ -748,9 +743,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); + SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), NVT, + LD->getOriginalAlign(), MMOFlags, AAInfo); Ch = Result.getValue(1); // The chain. @@ -788,16 +783,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, - AAInfo); + LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, - AAInfo); + ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -817,16 +811,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, - AAInfo); + LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, - AAInfo); + ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 994c491da691..a67b37fc827e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -665,8 +665,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags, - L->getAAInfo()); + L->getPointerInfo(), NVT, L->getOriginalAlign(), + MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -676,8 +676,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Do a non-extending load followed by FP_EXTEND. NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(), - MMOFlags, L->getAAInfo()); + L->getPointerInfo(), L->getMemoryVT(), + L->getOriginalAlign(), MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -2335,12 +2335,10 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { // Load the value as an integer value with the same number of bits. EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, - SDLoc(N), L->getChain(), L->getBasePtr(), - L->getOffset(), L->getPointerInfo(), IVT, - L->getAlignment(), - L->getMemOperand()->getFlags(), - L->getAAInfo()); + SDValue newL = DAG.getLoad( + L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N), + L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT, + L->getOriginalAlign(), L->getMemOperand()->getFlags(), L->getAAInfo()); // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); @@ -2620,7 +2618,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) { SDValue NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), MVT::i16, SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), MVT::i16, L->getAlignment(), + L->getPointerInfo(), MVT::i16, L->getOriginalAlign(), L->getMemOperand()->getFlags(), L->getAAInfo()); // Legalize the chain result by replacing uses of the old value chain with the // new one diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 716fe9ddd60c..453500aa9e51 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2926,7 +2926,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); - unsigned Alignment = N->getAlignment(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); @@ -2937,7 +2936,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT, - Alignment, MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -2959,8 +2958,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } else if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. - Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags, - AAInfo); + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), + N->getOriginalAlign(), MMOFlags, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2971,7 +2970,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2989,7 +2988,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - Alignment, MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); @@ -2997,7 +2996,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -4204,7 +4203,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); - unsigned Alignment = N->getAlignment(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); @@ -4215,15 +4213,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - N->getMemoryVT(), Alignment, MMOFlags, AAInfo); + N->getMemoryVT(), N->getOriginalAlign(), MMOFlags, + AAInfo); } if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, - AAInfo); + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), + N->getOriginalAlign(), MMOFlags, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -4232,9 +4231,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getTruncStore( - Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + NEVT, N->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -4262,8 +4261,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { } // Store both the high bits and maybe some of the low bits. - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment, - MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, + N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); @@ -4271,7 +4270,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 8231a320b4f3..3c1f8e61b531 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -154,9 +154,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment( + Align Alignment = DAG.getDataLayout().getPrefTypeAlign( NOutVT.getTypeForEVT(*DAG.getContext())); - SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment.value()); int SPFI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); @@ -165,7 +165,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -173,8 +173,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize)); + PtrInfo.getWithOffset(IncrementSize), Alignment); // Handle endianness of the load. if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) @@ -251,21 +250,20 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - unsigned Alignment = LD->getAlignment(); AAMDNodes AAInfo = LD->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); - Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment, - LD->getMemOperand()->getFlags(), AAInfo); + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), + AAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); - Hi = DAG.getLoad(NVT, dl, Chain, Ptr, - LD->getPointerInfo().getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), AAInfo); + Hi = DAG.getLoad( + NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -462,7 +460,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); - unsigned Alignment = St->getAlignment(); AAMDNodes AAInfo = St->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -474,14 +471,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, - St->getMemOperand()->getFlags(), AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), + St->getOriginalAlign(), St->getMemOperand()->getFlags(), + AAInfo); Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getStore(Chain, dl, Hi, Ptr, - St->getPointerInfo().getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize), - St->getMemOperand()->getFlags(), AAInfo); + Hi = DAG.getStore( + Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), + St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 4d923a3c84dc..ef994b3f10ba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -742,11 +742,10 @@ std::pair VectorLegalizer::ExpandLoad(SDNode *N) { unsigned LoadBytes = WideBytes; if (RemainingBytes >= LoadBytes) { - ScalarLoad = - DAG.getLoad(WideVT, dl, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - MinAlign(LD->getAlignment(), Offset), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + ScalarLoad = DAG.getLoad( + WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), LD->getOriginalAlign(), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -756,7 +755,7 @@ std::pair VectorLegalizer::ExpandLoad(SDNode *N) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, - MinAlign(LD->getAlignment(), Offset), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c761d4b8b526..38e0da1c4646 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -749,7 +749,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ return DAG.getTruncStore( N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), N->getAlignment(), + N->getMemoryVT().getVectorElementType(), N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), @@ -1163,7 +1163,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // Store the new subvector into the specified index. SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); + Align Alignment = DAG.getDataLayout().getPrefTypeAlign(VecType); Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); // Load the Lo part from the stack slot. @@ -1175,8 +1175,9 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - MinAlign(Alignment, IncrementSize)); + Hi = + DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, + MachinePointerInfo().getWithOffset(IncrementSize), Alignment); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -1458,7 +1459,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // so use a truncating store. SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); + Align Alignment = DAG.getDataLayout().getPrefTypeAlign(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT); @@ -1476,8 +1477,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize)); + PtrInfo.getWithOffset(IncrementSize), Alignment); // If we adjusted the original type, we need to truncate the results. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); @@ -1508,7 +1508,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Ptr = LD->getBasePtr(); SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT MemoryVT = LD->getMemoryVT(); - Align Alignment = LD->getOriginalAlign(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); @@ -1524,13 +1523,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, } Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); + LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, - Alignment, MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -4855,19 +4855,19 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; - unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. + // Allow wider loads. + unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment(); // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - Align, MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. @@ -4910,7 +4910,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Increment), MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { // Later code assumes the vector loads produced will be mergeable, so we @@ -4928,7 +4928,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Increment), MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); } @@ -5005,7 +5005,6 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); @@ -5019,14 +5018,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, Align, MMOFlags, AAInfo); + LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - Align, MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -5045,7 +5044,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, // element type or scalar stores. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - unsigned Align = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); @@ -5073,7 +5071,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, DAG.getVectorIdxConstant(Idx, dl)); StChain.push_back(DAG.getStore( Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Offset), MMOFlags, AAInfo)); + ST->getOriginalAlign(), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; @@ -5092,7 +5090,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, DAG.getVectorIdxConstant(Idx++, dl)); StChain.push_back(DAG.getStore( Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Offset), MMOFlags, AAInfo)); + ST->getOriginalAlign(), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); @@ -5110,7 +5108,6 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, // and then store it. Instead, we extract each element and then store it. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - unsigned Align = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); @@ -5132,9 +5129,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getVectorIdxConstant(0, dl)); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo(), StEltVT, Align, - MMOFlags, AAInfo)); + StChain.push_back( + DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, + ST->getOriginalAlign(), MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); @@ -5142,7 +5139,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, DAG.getVectorIdxConstant(0, dl)); StChain.push_back(DAG.getTruncStore( Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), - StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo)); + StEltVT, ST->getOriginalAlign(), MMOFlags, AAInfo)); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2202a8aac80f..a6af33e17c17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3955,7 +3955,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Ptr = getValue(SV); Type *Ty = I.getType(); - unsigned Alignment = I.getAlignment(); + Align Alignment = DL->getValueOrABITypeAlignment(I.getAlign(), Ty); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4148,7 +4148,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); SmallVector Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); - unsigned Alignment = I.getAlignment(); + Align Alignment = + DL->getValueOrABITypeAlignment(I.getAlign(), SrcV->getType()); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4580,8 +4581,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), - I.getAlign().getValueOr(DAG.getEVTAlign(MemVT)), AAMDNodes(), nullptr, - SSID, Order); + *I.getAlign(), AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 922b2aaa30e0..97303d25ea0c 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -992,7 +992,7 @@ declare void @test_stackarg_int(i32, i32, i32, i32, i32, i32, i32, i32, i8 zeroe ; 32BIT-DAG: renamable $r[[REGLLIADDR:[0-9]+]] = LWZtoc @lli, $r2 :: (load 4 from got) ; 32BIT-DAG: renamable $r[[REGLLI1:[0-9]+]] = LWZ 0, renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli, align 8) ; 32BIT-DAG: STW killed renamable $r[[REGLLI1]], 68, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGLLI2:[0-9]+]] = LWZ 4, killed renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli + 4) +; 32BIT-DAG: renamable $r[[REGLLI2:[0-9]+]] = LWZ 4, killed renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REGLLI2]], 72, $r1 :: (store 4) ; 32BIT-DAG: STW renamable $r[[REGI]], 76, $r1 :: (store 4) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 @@ -1484,7 +1484,7 @@ entry: ; 32BIT-DAG: $r10 = LI 8 ; 32BIT-DAG: renamable $r[[REGLL1ADDR:[0-9]+]] = LWZtoc @ll1, $r2 :: (load 4 from got) ; 32BIT-DAG: renamable $r[[REGLL1A:[0-9]+]] = LWZ 0, renamable $r[[REGLL1ADDR]] :: (dereferenceable load 4 from @ll1, align 8) -; 32BIT-DAG: renamable $r[[REGLL1B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL1ADDR]] :: (dereferenceable load 4 from @ll1 + 4) +; 32BIT-DAG: renamable $r[[REGLL1B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL1ADDR]] :: (dereferenceable load 4 from @ll1 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REGLL1A]], 56, $r1 :: (store 4) ; 32BIT-DAG: STW killed renamable $r[[REGLL1B]], 60, $r1 :: (store 4) ; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si1, $r2 :: (load 4 from got) @@ -1501,7 +1501,7 @@ entry: ; 32BIT-DAG: STW killed renamable $r[[REGSI]], 76, $r1 :: (store 4) ; 32BIT-DAG: renamable $r[[REGLL2ADDR:[0-9]+]] = LWZtoc @ll2, $r2 :: (load 4 from got) ; 32BIT-DAG: renamable $r[[REGLL2A:[0-9]+]] = LWZ 0, renamable $r[[REGLL2ADDR]] :: (dereferenceable load 4 from @ll2, align 8) -; 32BIT-DAG: renamable $r[[REGLL2B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL2ADDR]] :: (dereferenceable load 4 from @ll2 + 4) +; 32BIT-DAG: renamable $r[[REGLL2B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL2ADDR]] :: (dereferenceable load 4 from @ll2 + 4, align 8) ; 32BIT-DAG: STW killed renamable $r[[REGLL2A]], 80, $r1 :: (store 4) ; 32BIT-DAG: STW killed renamable $r[[REGLL2B]], 84, $r1 :: (store 4) ; 32BIT-DAG: renamable $r[[REGUCADDR:[0-9]+]] = LWZtoc @uc1, $r2 :: (load 4 from got) diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index f989e9a8ee69..49e98bfe9ee9 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -828,10 +828,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -32 ; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 19 ; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -8 ; ILP32-ILP32F-FPELIM-NEXT: addi a3, sp, 27 @@ -853,10 +853,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 16(sp) ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 24 ; ILP32-ILP32F-WITHFP-NEXT: sw a7, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 ; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -8 ; ILP32-ILP32F-WITHFP-NEXT: addi a3, s0, 19 @@ -877,10 +877,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 28(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 20(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 19 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, sp, 27 diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index bef1d1d94b76..7a99fcc3be33 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -2215,22 +2215,22 @@ define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* % ; KNL-NEXT: korw %k1, %k2, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k1 -; KNL-NEXT: kmovw %k1, %edi -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: testb $1, %cl ; KNL-NEXT: movl $0, %ecx ; KNL-NEXT: je LBB85_2 ; KNL-NEXT: ## %bb.1: -; KNL-NEXT: movzwl (%rsi), %ecx +; KNL-NEXT: movzwl 2(%rsi), %ecx ; KNL-NEXT: LBB85_2: +; KNL-NEXT: kmovw %k0, %edi ; KNL-NEXT: testb $1, %dil ; KNL-NEXT: je LBB85_4 ; KNL-NEXT: ## %bb.3: -; KNL-NEXT: movzwl 2(%rsi), %eax +; KNL-NEXT: movzwl (%rsi), %eax ; KNL-NEXT: LBB85_4: -; KNL-NEXT: movw %ax, 2(%rdx) -; KNL-NEXT: movw %cx, (%rdx) +; KNL-NEXT: movw %ax, (%rdx) +; KNL-NEXT: movw %cx, 2(%rdx) ; KNL-NEXT: retq ; ; SKX-LABEL: test_concat_v2i1: @@ -2265,22 +2265,22 @@ define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* % ; SKX-NEXT: korw %k1, %k2, %k1 ; SKX-NEXT: kandw %k1, %k0, %k0 ; SKX-NEXT: kshiftrb $1, %k0, %k1 -; SKX-NEXT: kmovd %k1, %edi -; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: kmovd %k1, %ecx ; SKX-NEXT: xorl %eax, %eax ; SKX-NEXT: testb $1, %cl ; SKX-NEXT: movl $0, %ecx ; SKX-NEXT: je LBB85_2 ; SKX-NEXT: ## %bb.1: -; SKX-NEXT: movzwl (%rsi), %ecx +; SKX-NEXT: movzwl 2(%rsi), %ecx ; SKX-NEXT: LBB85_2: +; SKX-NEXT: kmovd %k0, %edi ; SKX-NEXT: testb $1, %dil ; SKX-NEXT: je LBB85_4 ; SKX-NEXT: ## %bb.3: -; SKX-NEXT: movzwl 2(%rsi), %eax +; SKX-NEXT: movzwl (%rsi), %eax ; SKX-NEXT: LBB85_4: -; SKX-NEXT: movw %ax, 2(%rdx) -; SKX-NEXT: movw %cx, (%rdx) +; SKX-NEXT: movw %ax, (%rdx) +; SKX-NEXT: movw %cx, 2(%rdx) ; SKX-NEXT: retq %tmp = load <2 x half>, <2 x half>* %arg, align 8 %tmp3 = fcmp fast olt <2 x half> %tmp, diff --git a/llvm/test/CodeGen/X86/nosse-vector.ll b/llvm/test/CodeGen/X86/nosse-vector.ll index ef2b40a8741b..1203e2921f62 100644 --- a/llvm/test/CodeGen/X86/nosse-vector.ll +++ b/llvm/test/CodeGen/X86/nosse-vector.ll @@ -278,8 +278,8 @@ define void @add_2i64_mem(<2 x i64>* %p0, <2 x i64>* %p1, <2 x i64>* %p2) nounwi ; X32-NEXT: addl 8(%ecx), %edi ; X32-NEXT: adcl 12(%ecx), %esi ; X32-NEXT: movl %edi, 8(%eax) -; X32-NEXT: movl %esi, 12(%eax) ; X32-NEXT: movl %ebx, (%eax) +; X32-NEXT: movl %esi, 12(%eax) ; X32-NEXT: movl %edx, 4(%eax) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll index 2961129dfb17..3096d4fb9472 100644 --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -380,10 +380,10 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind ; SSE: # %bb.0: ; SSE-NEXT: movdqa (%rdi), %xmm0 ; SSE-NEXT: movdqa 16(%rdi), %xmm1 -; SSE-NEXT: paddsw (%rsi), %xmm0 ; SSE-NEXT: paddsw 16(%rsi), %xmm1 -; SSE-NEXT: movq %xmm1, 16(%rdx) +; SSE-NEXT: paddsw (%rsi), %xmm0 ; SSE-NEXT: movdqa %xmm0, (%rdx) +; SSE-NEXT: movq %xmm1, 16(%rdx) ; SSE-NEXT: retq ; ; AVX1-LABEL: v12i16: diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll index 784fa0e21ec5..59a18bfa85f6 100644 --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -380,10 +380,10 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind ; SSE: # %bb.0: ; SSE-NEXT: movdqa (%rdi), %xmm0 ; SSE-NEXT: movdqa 16(%rdi), %xmm1 -; SSE-NEXT: psubsw (%rsi), %xmm0 ; SSE-NEXT: psubsw 16(%rsi), %xmm1 -; SSE-NEXT: movq %xmm1, 16(%rdx) +; SSE-NEXT: psubsw (%rsi), %xmm0 ; SSE-NEXT: movdqa %xmm0, (%rdx) +; SSE-NEXT: movq %xmm1, 16(%rdx) ; SSE-NEXT: retq ; ; AVX1-LABEL: v12i16: diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll index 3a4e59742891..1a50b30d14eb 100644 --- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll @@ -380,10 +380,10 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind ; SSE: # %bb.0: ; SSE-NEXT: movdqa (%rdi), %xmm0 ; SSE-NEXT: movdqa 16(%rdi), %xmm1 -; SSE-NEXT: paddusw (%rsi), %xmm0 ; SSE-NEXT: paddusw 16(%rsi), %xmm1 -; SSE-NEXT: movq %xmm1, 16(%rdx) +; SSE-NEXT: paddusw (%rsi), %xmm0 ; SSE-NEXT: movdqa %xmm0, (%rdx) +; SSE-NEXT: movq %xmm1, 16(%rdx) ; SSE-NEXT: retq ; ; AVX1-LABEL: v12i16: diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll index d455a034f0c6..9d3a519d6b67 100644 --- a/llvm/test/CodeGen/X86/usub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll @@ -380,10 +380,10 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind ; SSE: # %bb.0: ; SSE-NEXT: movdqa (%rdi), %xmm0 ; SSE-NEXT: movdqa 16(%rdi), %xmm1 -; SSE-NEXT: psubusw (%rsi), %xmm0 ; SSE-NEXT: psubusw 16(%rsi), %xmm1 -; SSE-NEXT: movq %xmm1, 16(%rdx) +; SSE-NEXT: psubusw (%rsi), %xmm0 ; SSE-NEXT: movdqa %xmm0, (%rdx) +; SSE-NEXT: movq %xmm1, 16(%rdx) ; SSE-NEXT: retq ; ; AVX1-LABEL: v12i16: diff --git a/llvm/test/CodeGen/X86/widen_cast-2.ll b/llvm/test/CodeGen/X86/widen_cast-2.ll index e7780912cd96..94820406f49b 100644 --- a/llvm/test/CodeGen/X86/widen_cast-2.ll +++ b/llvm/test/CodeGen/X86/widen_cast-2.ll @@ -19,12 +19,12 @@ define void @convert(<7 x i32>* %dst, <14 x i16>* %src) nounwind { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movdqa (%edx,%eax), %xmm1 ; CHECK-NEXT: movdqa 16(%edx,%eax), %xmm2 -; CHECK-NEXT: psubw %xmm0, %xmm1 ; CHECK-NEXT: psubw %xmm0, %xmm2 +; CHECK-NEXT: psubw %xmm0, %xmm1 +; CHECK-NEXT: movdqa %xmm1, (%ecx,%eax) ; CHECK-NEXT: movd %xmm2, 16(%ecx,%eax) ; CHECK-NEXT: pextrd $1, %xmm2, 20(%ecx,%eax) ; CHECK-NEXT: pextrd $2, %xmm2, 24(%ecx,%eax) -; CHECK-NEXT: movdqa %xmm1, (%ecx,%eax) ; CHECK-NEXT: incl (%esp) ; CHECK-NEXT: cmpl $3, (%esp) ; CHECK-NEXT: jle .LBB0_2 diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll index a816dd0d9b91..4ca71ceb2e0f 100644 --- a/llvm/test/CodeGen/X86/widen_load-2.ll +++ b/llvm/test/CodeGen/X86/widen_load-2.ll @@ -47,9 +47,9 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { ; X86-NEXT: pinsrd $1, 4(%ecx), %xmm1 ; X86-NEXT: pinsrd $2, 8(%ecx), %xmm1 ; X86-NEXT: paddd %xmm0, %xmm1 +; X86-NEXT: movd %xmm1, (%eax) ; X86-NEXT: pextrd $1, %xmm1, 4(%eax) ; X86-NEXT: pextrd $2, %xmm1, 8(%eax) -; X86-NEXT: movd %xmm1, (%eax) ; X86-NEXT: retl $4 ; ; X64-LABEL: add3i32_2: