[InstCombine] Revert D125845
Reverting D125845 `[InstCombine] Canonicalize GEP of GEP by swapping constant-indexed GEP to the back` because multiple users reported performance regression Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D138950
This commit is contained in:
parent
3a37c112b1
commit
be4b1dd35b
|
@ -1961,14 +1961,6 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
|
|||
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
|
||||
return nullptr;
|
||||
|
||||
// LICM moves a GEP with constant indices to the front, while canonicalization
|
||||
// swaps it to the back of a non-constant GEP. If both transformations can be
|
||||
// applied, LICM takes priority because it generally provides greater
|
||||
// optimization by reducing instruction count in the loop body, but performing
|
||||
// canonicalization swapping first negates the LICM opportunity while it does
|
||||
// not necessarily reduce instruction count.
|
||||
bool ShouldCanonicalizeSwap = true;
|
||||
|
||||
if (Src->getResultElementType() == GEP.getSourceElementType() &&
|
||||
Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
|
||||
Src->hasOneUse()) {
|
||||
|
@ -1978,12 +1970,6 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
|
|||
if (LI) {
|
||||
// Try to reassociate loop invariant GEP chains to enable LICM.
|
||||
if (Loop *L = LI->getLoopFor(GEP.getParent())) {
|
||||
// If SO1 is invariant and GO1 is variant, they should not be swapped by
|
||||
// canonicalization even if it can be applied, otherwise it triggers
|
||||
// LICM swapping in the next iteration, causing an infinite loop.
|
||||
if (!L->isLoopInvariant(GO1) && L->isLoopInvariant(SO1))
|
||||
ShouldCanonicalizeSwap = false;
|
||||
|
||||
// Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
|
||||
// invariant: this breaks the dependence between GEPs and allows LICM
|
||||
// to hoist the invariant part out of the loop.
|
||||
|
@ -2008,32 +1994,12 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
|
|||
}
|
||||
}
|
||||
|
||||
// Canonicalize swapping. Swap GEP with constant index suffix to the back if
|
||||
// it doesn't violate def-use relations or contradict with loop invariant
|
||||
// swap above. This allows more potential applications of constant-indexed GEP
|
||||
// optimizations below.
|
||||
if (ShouldCanonicalizeSwap && Src->hasOneUse() &&
|
||||
Src->getPointerOperandType() == GEP.getPointerOperandType() &&
|
||||
Src->getPointerOperandType() == GEP.getType() &&
|
||||
Src->getType()->isVectorTy() == GEP.getType()->isVectorTy() &&
|
||||
!isa<GlobalValue>(Src->getPointerOperand())) {
|
||||
// When swapping, GEP with all constant indices are more prioritized than
|
||||
// GEP with only the last few indices (but not all) being constant because
|
||||
// it may be merged with GEP with all constant indices.
|
||||
if ((isa<ConstantInt>(*(Src->indices().end() - 1)) &&
|
||||
!isa<ConstantInt>(*(GEP.indices().end() - 1))) ||
|
||||
(Src->hasAllConstantIndices() && !GEP.hasAllConstantIndices())) {
|
||||
// Cannot guarantee inbounds after swapping because the non-const GEP can
|
||||
// have arbitrary sign.
|
||||
Value *NewSrc = Builder.CreateGEP(
|
||||
GEP.getSourceElementType(), Src->getOperand(0),
|
||||
SmallVector<Value *>(GEP.indices()), Src->getName());
|
||||
GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
|
||||
Src->getSourceElementType(), NewSrc,
|
||||
SmallVector<Value *>(Src->indices()), GEP.getName());
|
||||
return NewGEP;
|
||||
}
|
||||
}
|
||||
// Note that if our source is a gep chain itself then we wait for that
|
||||
// chain to be resolved before we perform this transformation. This
|
||||
// avoids us creating a TON of code in some cases.
|
||||
if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
|
||||
if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
|
||||
return nullptr; // Wait until our source is folded to completion.
|
||||
|
||||
// For constant GEPs, use a more general offset-based folding approach.
|
||||
// Only do this for opaque pointers, as the result element type may change.
|
||||
|
|
|
@ -1,19 +1,21 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -passes='require<loops>,instcombine' -opaque-pointers -S | FileCheck %s
|
||||
; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s
|
||||
|
||||
; Constant-indexed GEP instructions in a chain of GEP instructions should be
|
||||
; swapped to the end whenever such transformation is valid. This allows them to
|
||||
; be merged.
|
||||
|
||||
declare void @use(i1)
|
||||
|
||||
|
||||
; The constant-indexed GEP instruction should be swapped to the end, even
|
||||
; without merging.
|
||||
; result = (((i32*) p + a) + b) + 1
|
||||
; result = (((ptr) p + a) + b) + 1
|
||||
define ptr @basic(ptr %p, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @basic(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[B:%.*]]
|
||||
; CHECK-NEXT: ret ptr [[TMP3]]
|
||||
;
|
||||
%1 = getelementptr inbounds i32, ptr %p, i64 1
|
||||
|
@ -25,34 +27,33 @@ define ptr @basic(ptr %p, i64 %a, i64 %b) {
|
|||
; GEP with the last index being a constant should also be swapped.
|
||||
define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @partialConstant1(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 1
|
||||
; CHECK-NEXT: ret ptr [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]]
|
||||
; CHECK-NEXT: ret ptr [[TMP1]]
|
||||
;
|
||||
%1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1
|
||||
%2 = getelementptr inbounds i32, ptr %1, i64 %b
|
||||
%2 = getelementptr inbounds i32, ptr %p, i64 %b
|
||||
ret ptr %2
|
||||
}
|
||||
|
||||
; Negative test. GEP should not be swapped if the last index is not a constant.
|
||||
define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @partialConstant2(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 1, i64 [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]]
|
||||
; CHECK-NEXT: ret ptr [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]]
|
||||
; CHECK-NEXT: ret ptr [[TMP1]]
|
||||
;
|
||||
%1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a
|
||||
%2 = getelementptr inbounds i32, ptr %1, i64 %b
|
||||
%2 = getelementptr inbounds i32, ptr %p, i64 %b
|
||||
ret ptr %2
|
||||
}
|
||||
|
||||
; Constant-indexed GEP are merged after swapping.
|
||||
; result = ((i32*) p + a) + 3
|
||||
; Constant-indexed GEP are merged after swawpping.
|
||||
; result = ((ptr) p + a) + 3
|
||||
define ptr @merge(ptr %p, i64 %a) {
|
||||
; CHECK-LABEL: @merge(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 3
|
||||
; CHECK-NEXT: ret ptr [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2
|
||||
; CHECK-NEXT: ret ptr [[TMP3]]
|
||||
;
|
||||
%1 = getelementptr inbounds i32, ptr %p, i64 1
|
||||
%2 = getelementptr inbounds i32, ptr %1, i64 %a
|
||||
|
@ -63,14 +64,16 @@ define ptr @merge(ptr %p, i64 %a) {
|
|||
; Multiple constant-indexed GEP. Note that the first two cannot be merged at
|
||||
; first, but after the second and third are merged, the result can be merged
|
||||
; with the first one on the next pass.
|
||||
; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9
|
||||
; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9
|
||||
define ptr @nested(ptr %p, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @nested(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <3 x i32>, ptr [[TMP3]], i64 10
|
||||
; CHECK-NEXT: ret ptr [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1
|
||||
; CHECK-NEXT: ret ptr [[TMP6]]
|
||||
;
|
||||
%1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1
|
||||
%2 = getelementptr inbounds i8, ptr %1, i64 %a
|
||||
|
@ -84,9 +87,9 @@ define ptr @nested(ptr %p, i64 %a, i64 %b) {
|
|||
; It is valid to swap if the source operand of the first GEP has multiple uses.
|
||||
define ptr @multipleUses1(ptr %p) {
|
||||
; CHECK-LABEL: @multipleUses1(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: ret ptr [[TMP3]]
|
||||
;
|
||||
%1 = getelementptr inbounds i32, ptr %p, i64 1
|
||||
|
@ -95,10 +98,24 @@ define ptr @multipleUses1(ptr %p) {
|
|||
ret ptr %3
|
||||
}
|
||||
|
||||
; Negative test. It is not valid to swap if the first GEP has multiple uses.
|
||||
define ptr @multipleUses2(ptr %p) {
|
||||
; It is valid to swap if the second GEP has multiple uses.
|
||||
define ptr @multipleUses2(ptr %p, i64 %a) {
|
||||
; CHECK-LABEL: @multipleUses2(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
|
||||
; CHECK-NEXT: call void @use(ptr nonnull [[TMP2]])
|
||||
; CHECK-NEXT: ret ptr [[TMP2]]
|
||||
;
|
||||
%1 = getelementptr inbounds i32, ptr %p, i64 1
|
||||
%2 = getelementptr inbounds i32, ptr %1, i64 %a
|
||||
call void @use(ptr %2)
|
||||
ret ptr %2
|
||||
}
|
||||
|
||||
; Negative test. It is not valid to swap if the first GEP has multiple uses.
|
||||
define ptr @multipleUses3(ptr %p) {
|
||||
; CHECK-LABEL: @multipleUses3(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: ret ptr [[TMP3]]
|
||||
|
@ -108,40 +125,3 @@ define ptr @multipleUses2(ptr %p) {
|
|||
%3 = getelementptr inbounds i32, ptr %1, i64 %2
|
||||
ret ptr %3
|
||||
}
|
||||
|
||||
; Negative test. LICM should take priority over canonicalization, so the first
|
||||
; GEP should not be swapped, even if it contains a constant index.
|
||||
define i64 @licm(ptr %p) {
|
||||
; CHECK-LABEL: @licm(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INEXT:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 4
|
||||
; CHECK-NEXT: [[P2:%.*]] = getelementptr i64, ptr [[P1]], i64 [[I]]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[P2]], align 4
|
||||
; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[LOAD]]
|
||||
; CHECK-NEXT: [[INEXT]] = add nuw nsw i64 [[I]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[I]], 1000000
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret i64 [[ADD]]
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
|
||||
%sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
||||
%p1 = getelementptr i64, ptr %p, i64 4
|
||||
%p2 = getelementptr i64, ptr %p1, i64 %i
|
||||
%load = load i64, ptr %p2
|
||||
%add = add nsw i64 %sum, %load
|
||||
%inext = add nuw nsw i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i, 1000000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i64 %add
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@ target datalayout = "i24:8:8"
|
|||
%struct.B = type { i8, [3 x i16], %struct.A, float }
|
||||
%struct.C = type { i8, i32, i32 }
|
||||
|
||||
; result = (i32*) p + 3
|
||||
; result = (ptr) p + 3
|
||||
define ptr @mergeBasic(ptr %p) {
|
||||
; CHECK-LABEL: @mergeBasic(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3
|
||||
|
@ -20,8 +20,8 @@ define ptr @mergeBasic(ptr %p) {
|
|||
ret ptr %2
|
||||
}
|
||||
|
||||
; Converted to i8* and merged.
|
||||
; result = (i8*) p + 10
|
||||
; Converted to ptr and merged.
|
||||
; result = (ptr) p + 10
|
||||
define ptr @mergeDifferentTypes(ptr %p) {
|
||||
; CHECK-LABEL: @mergeDifferentTypes(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
|
||||
|
@ -32,8 +32,8 @@ define ptr @mergeDifferentTypes(ptr %p) {
|
|||
ret ptr %2
|
||||
}
|
||||
|
||||
; Converted to i8* and merged.
|
||||
; result = (i8*) p + 10
|
||||
; Converted to ptr and merged.
|
||||
; result = (ptr) p + 10
|
||||
define ptr @mergeReverse(ptr %p) {
|
||||
; CHECK-LABEL: @mergeReverse(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
|
||||
|
@ -55,7 +55,7 @@ define ptr @zeroSum(ptr %p) {
|
|||
ret ptr %2
|
||||
}
|
||||
|
||||
; result = (i8*) (([20 x i8]*) p + 1) + 17
|
||||
; result = (ptr) ((ptr) p + 1) + 17
|
||||
define ptr @array1(ptr %p) {
|
||||
; CHECK-LABEL: @array1(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 17
|
||||
|
@ -66,8 +66,8 @@ define ptr @array1(ptr %p) {
|
|||
ret ptr %2
|
||||
}
|
||||
|
||||
; Converted to i8* and merged.
|
||||
; result = (i8*) p + 20
|
||||
; Converted to ptr and merged.
|
||||
; result = (ptr) p + 20
|
||||
define ptr @array2(ptr %p) {
|
||||
; CHECK-LABEL: @array2(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 20
|
||||
|
@ -78,8 +78,8 @@ define ptr @array2(ptr %p) {
|
|||
ret ptr %2
|
||||
}
|
||||
|
||||
; Converted to i8* and merged.
|
||||
; result = (i8*) p + 36
|
||||
; Converted to ptr and merged.
|
||||
; result = (ptr) p + 36
|
||||
define ptr @struct1(ptr %p) {
|
||||
; CHECK-LABEL: @struct1(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 36
|
||||
|
@ -101,7 +101,7 @@ define ptr @struct2(ptr %p) {
|
|||
ret ptr %2
|
||||
}
|
||||
|
||||
; result = (i8*) &((struct.B) p)[0].member2.member0 + 7
|
||||
; result = (ptr) &((struct.B) p)[0].member2.member0 + 7
|
||||
define ptr @structStruct(ptr %p) {
|
||||
; CHECK-LABEL: @structStruct(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 7
|
||||
|
@ -115,7 +115,7 @@ define ptr @structStruct(ptr %p) {
|
|||
; First GEP offset is not divisible by last GEP's source element size, but first
|
||||
; GEP points to an array such that the last GEP offset is divisible by the
|
||||
; array's element size, so the first GEP can be rewritten with an extra index.
|
||||
; result = (i16*) &((struct.B*) p)[i].member1 + 2
|
||||
; result = (ptr) &((struct.B*) p)[i].member1 + 2
|
||||
define ptr @appendIndex(ptr %p, i64 %i) {
|
||||
; CHECK-LABEL: @appendIndex(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 1, i64 2
|
||||
|
@ -126,23 +126,10 @@ define ptr @appendIndex(ptr %p, i64 %i) {
|
|||
ret ptr %2
|
||||
}
|
||||
|
||||
; After canonicalizing, the second GEP is moved to the front, and then merged
|
||||
; with the first one with rewritten indices.
|
||||
; result = (i8*) &((struct.A*) &((struct.B*) p)[i].member2).member0 + 2
|
||||
define ptr @appendIndexReverse(ptr %p, i64 %i) {
|
||||
; CHECK-LABEL: @appendIndexReverse(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 2, i32 0, i64 2
|
||||
; CHECK-NEXT: ret ptr [[TMP1]]
|
||||
;
|
||||
%1 = getelementptr inbounds i64, ptr %p, i64 1
|
||||
%2 = getelementptr inbounds %struct.B, ptr %1, i64 %i, i32 1
|
||||
ret ptr %2
|
||||
}
|
||||
|
||||
; Offset of either GEP is not divisible by the other's size, converted to i8*
|
||||
; Offset of either GEP is not divisible by the other's size, converted to ptr
|
||||
; and merged.
|
||||
; Here i24 is 8-bit aligned.
|
||||
; result = (i8*) p + 7
|
||||
; result = (ptr) p + 7
|
||||
define ptr @notDivisible(ptr %p) {
|
||||
; CHECK-LABEL: @notDivisible(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7
|
||||
|
@ -157,8 +144,8 @@ define ptr @notDivisible(ptr %p) {
|
|||
; or divisible by the other's size.
|
||||
define ptr @partialConstant2(ptr %p, i64 %a) {
|
||||
; CHECK-LABEL: @partialConstant2(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [4 x i64], ptr [[P:%.*]], i64 [[A:%.*]], i64 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[A:%.*]], i64 2
|
||||
; CHECK-NEXT: ret ptr [[TMP2]]
|
||||
;
|
||||
%1 = getelementptr inbounds i32, ptr %p, i64 1
|
||||
|
|
|
@ -1743,10 +1743,10 @@ define void @ashr_out_of_range(ptr %A) {
|
|||
define void @ashr_out_of_range_1(ptr %A) {
|
||||
; CHECK-LABEL: @ashr_out_of_range_1(
|
||||
; CHECK-NEXT: [[L:%.*]] = load i177, ptr [[A:%.*]], align 4
|
||||
; CHECK-NEXT: [[G11:%.*]] = getelementptr i177, ptr [[A]], i64 -1
|
||||
; CHECK-NEXT: [[B24_LOBIT:%.*]] = ashr i177 [[L]], 175
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i177 [[B24_LOBIT]] to i64
|
||||
; CHECK-NEXT: [[G111:%.*]] = getelementptr i177, ptr [[A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G111]], i64 -1
|
||||
; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G11]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i177 0, ptr [[G62]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
|
|
@ -38,7 +38,8 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
|
|||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>*
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>*
|
||||
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8
|
||||
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
|
@ -49,7 +50,8 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
|
|||
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP10]], i64 -7
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3
|
||||
; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
|
||||
|
|
|
@ -762,9 +762,11 @@ define void @mixed_load3_store3(i32* nocapture %A) {
|
|||
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
|
||||
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>*
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>*
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
|
||||
|
|
|
@ -11,19 +11,19 @@ define i16 @helper(i16 %0, i64 %x) {
|
|||
; CHECK-NEXT: start:
|
||||
; CHECK-NEXT: [[DATA:%.*]] = alloca [2 x i8], align 2
|
||||
; CHECK-NEXT: store i16 [[TMP0:%.*]], ptr [[DATA]], align 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
|
||||
; CHECK-NEXT: br label [[BB6_I_I:%.*]]
|
||||
; CHECK: bb6.i.i:
|
||||
; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP1:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ]
|
||||
; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ]
|
||||
; CHECK-NEXT: [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]]
|
||||
; CHECK-NEXT: [[TMP1]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1
|
||||
; CHECK-NEXT: [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1
|
||||
; CHECK-NEXT: [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [0 x i8], ptr [[DATA]], i64 0, i64 [[_40_I_I]]
|
||||
; CHECK-NEXT: [[_39_I_I:%.*]] = getelementptr i8, ptr [[TMP2]], i64 1
|
||||
; CHECK-NEXT: [[_39_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[TMP1]], i64 0, i64 [[_40_I_I]]
|
||||
; CHECK-NEXT: [[TMP_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_34_I_I]], align 1
|
||||
; CHECK-NEXT: [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1
|
||||
; CHECK-NEXT: store i8 [[TMP2_0_COPYLOAD_I_I_I_I]], ptr [[_34_I_I]], align 1
|
||||
; CHECK-NEXT: store i8 [[TMP_0_COPYLOAD_I_I_I_I]], ptr [[_39_I_I]], align 1
|
||||
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP1]], [[X:%.*]]
|
||||
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP2]], [[X:%.*]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[EXIT:%.*]], label [[BB6_I_I]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[DOTSROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[DATA]], align 2
|
||||
|
|
Loading…
Reference in New Issue