[IR] Require intrinsic struct return type to be anonymous

This is an alternative to D122376. Rather than working around the
problem, this patch requires that struct return types in intrinsics
are anonymous/literal and adds auto-upgrade code to convert
existing uses of intrinsics with named struct types.

This ensures that the mapping between intrinsic name and
intrinsic function type is actually bijective, as it is supposed
to be.

This also fixes https://github.com/llvm/llvm-project/issues/37891.

Differential Revision: https://reviews.llvm.org/D122471
This commit is contained in:
Nikita Popov 2022-03-25 10:50:13 +01:00
parent f54f448525
commit 8a72391f60
7 changed files with 88 additions and 38 deletions

View File

@ -5,6 +5,9 @@
// RUN: echo "GPU binary would be here" > %t
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
// Accessing nvvm intrinsics in this way no longer works.
// XFAIL: *
struct textureReference {
int desc;
};

View File

@ -576,19 +576,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
F->arg_begin()->getType());
return true;
}
static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vldRegex.match(Name)) {
auto fArgs = F->getFunctionType()->params();
SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
// Can't use Intrinsic::getDeclaration here as the return types might
// then only be structurally equal.
FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
StringRef Suffix =
F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
"llvm." + Name + "." + Suffix, F->getParent());
return true;
}
static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vstRegex.match(Name)) {
static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
@ -1017,6 +1004,25 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
}
if (auto *ST = dyn_cast<StructType>(F->getReturnType())) {
if (!ST->isLiteral() || ST->isPacked()) {
// Replace return type with literal non-packed struct.
auto *FT = F->getFunctionType();
auto *NewST = StructType::get(ST->getContext(), ST->elements());
auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
std::string Name = F->getName().str();
rename(F);
NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
Name, F->getParent());
// The new function may also need remangling.
if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
NewFn = *Result;
return true;
}
}
// Remangle our intrinsic since we upgrade the mangling
auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
if (Result != None) {
@ -3784,12 +3790,33 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
return;
}
const auto &DefaultCase = [&NewFn, &CI]() -> void {
// Handle generic mangling change, but nothing else
assert(
(CI->getCalledFunction()->getName() != NewFn->getName()) &&
"Unknown function for CallBase upgrade and isn't just a name change");
CI->setCalledFunction(NewFn);
const auto &DefaultCase = [&]() -> void {
if (CI->getFunctionType() == NewFn->getFunctionType()) {
// Handle generic mangling change.
assert(
(CI->getCalledFunction()->getName() != NewFn->getName()) &&
"Unknown function for CallBase upgrade and isn't just a name change");
CI->setCalledFunction(NewFn);
return;
}
// This must be an upgrade from a named to a literal struct.
auto *OldST = cast<StructType>(CI->getType());
auto *NewST = cast<StructType>(NewFn->getReturnType());
assert(OldST != NewST && "Return type must have changed");
assert(OldST->getNumElements() == NewST->getNumElements() &&
"Must have same number of elements");
SmallVector<Value *> Args(CI->args());
Value *NewCI = Builder.CreateCall(NewFn, Args);
Value *Res = PoisonValue::get(OldST);
for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
Res = Builder.CreateInsertValue(Res, Elem, Idx);
}
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
return;
};
CallInst *NewCall = nullptr;
switch (NewFn->getIntrinsicID()) {
@ -3797,13 +3824,6 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
DefaultCase();
return;
}
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:

View File

@ -1475,9 +1475,19 @@ static bool matchIntrinsicType(
PointerType *PT = dyn_cast<PointerType>(Ty);
if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace)
return true;
if (!PT->isOpaque())
if (!PT->isOpaque()) {
/* Manually consume a pointer to empty struct descriptor, which is
* used for externref. We don't want to enforce that the struct is
* anonymous in this case. (This renders externref intrinsics
* non-unique, but this will go away with opaque pointers anyway.) */
if (Infos.front().Kind == IITDescriptor::Struct &&
Infos.front().Struct_NumElements == 0) {
Infos = Infos.slice(1);
return false;
}
return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos,
ArgTys, DeferredChecks, IsDeferredCheck);
}
// Consume IIT descriptors relating to the pointer element type.
// FIXME: Intrinsic type matching of nested single value types or even
// aggregates doesn't work properly with opaque pointers but hopefully
@ -1491,7 +1501,8 @@ static bool matchIntrinsicType(
case IITDescriptor::Struct: {
StructType *ST = dyn_cast<StructType>(Ty);
if (!ST || ST->getNumElements() != D.Struct_NumElements)
if (!ST || !ST->isLiteral() || ST->isPacked() ||
ST->getNumElements() != D.Struct_NumElements)
return true;
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)

View File

@ -0,0 +1,18 @@
; RUN: llvm-dis < %s.bc | FileCheck %s
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
; CHECK-LABEL: define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) {
; CHECK: %1 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
; CHECK: %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
; CHECK: %3 = insertvalue %struct.__neon_int8x8x2_t poison, <8 x i8> %2, 0
; CHECK: %4 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
; CHECK: %5 = insertvalue %struct.__neon_int8x8x2_t %3, <8 x i8> %4, 1
; CHECK: ret %struct.__neon_int8x8x2_t %5
define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) {
%val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
ret %struct.__neon_int8x8x2_t %val
}

Binary file not shown.

View File

@ -4,9 +4,8 @@
; RUN: llc < %s -O0 -mcpu=x86-64 -mattr=+avx512f | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin10"
%0 = type { i32, i1 } ; type %0
declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
define fastcc i32 @test() nounwind {
entry:
@ -16,12 +15,12 @@ entry:
; CHECK-NEXT: addl $0, [[REG]]
; CHECK-NEXT: seto {{%[a-z]+l}}
; CHECK: jo LBB0_2
%tmp1 = call %0 @llvm.sadd.with.overflow.i32(i32 1, i32 0)
%tmp2 = extractvalue %0 %tmp1, 1
%tmp1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 1, i32 0)
%tmp2 = extractvalue { i32, i1 } %tmp1, 1
br i1 %tmp2, label %.backedge, label %BB3
BB3:
%tmp4 = extractvalue %0 %tmp1, 0
%tmp4 = extractvalue { i32, i1 } %tmp1, 0
br label %.backedge
.backedge:

View File

@ -1,7 +1,6 @@
; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort=1 | FileCheck %s
%struct.x = type { i64, i64 }
%addovf = type { i32, i1 }
declare %struct.x @f()
define void @test1(i64*) nounwind ssp {
@ -28,13 +27,13 @@ define void @test2(i64*) nounwind ssp {
; CHECK: addq $10, %rdx
}
declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
define void @test3(i32 %x, i32 %y, i32* %z) {
%r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
%sum = extractvalue %addovf %r, 0
%r = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
%sum = extractvalue { i32, i1 } %r, 0
%sum3 = mul i32 %sum, 3
%bit = extractvalue %addovf %r, 1
%bit = extractvalue { i32, i1 } %r, 1
br i1 %bit, label %then, label %end
then: