[Polly] Insert !dbg metadata for emitted CallInsts.

The IR Verifier requires that every call instruction to an inlineable
function (among other things, its implementation must be visible in the
translation unit) must also have !dbg metadata attached to it. When
parallelizing, Polly emits calls to OpenMP runtime function out of thin
air, or at least not directly derived from a bounded list of previous
instruction. While we could search for instructions in the SCoP that has
some debug info attached to it, there is no guarantee that we find any.
Our solution is to generate a new DILocation that points to line 0 to
represent optimized code.

The OpenMP function implementation is usually not available in the
user's translation unit, but can become visible in an LTO build. For
the bug to appear, libomp must also be built with debug symbols.

IMHO, the IR verifier rule is too strict. Runtime functions can
also be inserted by other optimization passes, such as
LoopIdiomRecognize. When inserting a call to e.g. memset, it uses the
DebugLoc from a StoreInst from the unoptimized code. It is not
required to have !dbg metadata attached either.

Fixes #56692
This commit is contained in:
Michael Kruse 2022-07-26 19:02:21 -05:00
parent 9981afdd42
commit fe0e5b3e43
5 changed files with 220 additions and 15 deletions

View File

@ -78,6 +78,13 @@ Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
ScopAnnotator *Annotator = nullptr, bool Parallel = false,
bool UseGuard = true, bool LoopVectDisabled = false);
/// Create a DebugLoc representing generated instructions.
///
/// The IR verifier requires !dbg metadata to be set in some situations. For
/// instance, if an (inlinable) function has debug info, all its call site must
/// have debug info as well.
llvm::DebugLoc createDebugLocForGeneratedCode(Function *F);
/// The ParallelLoopGenerator allows to create parallelized loops
///
/// To parallelize a loop, we perform the following steps:
@ -126,7 +133,9 @@ public:
: Builder(Builder), LI(LI), DT(DT),
LongType(
Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
M(Builder.GetInsertBlock()->getParent()->getParent()) {}
M(Builder.GetInsertBlock()->getParent()->getParent()),
DLGenerated(createDebugLocForGeneratedCode(
Builder.GetInsertBlock()->getParent())) {}
virtual ~ParallelLoopGenerator() {}
@ -167,6 +176,13 @@ protected:
/// The current module
Module *M;
/// Debug location for generated code without direct link to any specific
/// line.
///
/// We only set the DebugLoc where the IR Verifier requires us to. Otherwise,
/// absent debug location for optimized code should be fine.
llvm::DebugLoc DLGenerated;
public:
/// Create a struct for all @p Values and store them in there.
///

View File

@ -16,6 +16,7 @@
#include "polly/ScopDetection.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
@ -251,3 +252,15 @@ void ParallelLoopGenerator::extractValuesFromStruct(
Map[OldValues[i]] = NewValue;
}
}
DebugLoc polly::createDebugLocForGeneratedCode(Function *F) {
if (!F)
return DebugLoc();
LLVMContext &Ctx = F->getContext();
DISubprogram *DILScope =
dyn_cast_or_null<DISubprogram>(F->getMetadata(LLVMContext::MD_dbg));
if (!DILScope)
return DebugLoc();
return DILocation::get(Ctx, 0, 0, DILScope);
}

View File

@ -44,7 +44,8 @@ void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn,
Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads),
LB, UB, Stride};
Builder.CreateCall(F, Args);
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
}
void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn,
@ -53,7 +54,8 @@ void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn,
Value *Stride) {
// Tell the runtime we start a parallel loop
createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
Builder.CreateCall(SubFn, SubFnParam);
CallInst *Call = Builder.CreateCall(SubFn, SubFnParam);
Call->setDebugLoc(DLGenerated);
createCallJoinThreads();
}
@ -184,9 +186,10 @@ Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr,
}
Value *Args[] = {LBPtr, UBPtr};
Value *Return = Builder.CreateCall(F, Args);
Return = Builder.CreateICmpNE(
Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
Value *Return = Builder.CreateICmpNE(
Call, Builder.CreateZExt(Builder.getFalse(), Call->getType()));
return Return;
}
@ -203,7 +206,8 @@ void ParallelLoopGeneratorGOMP::createCallJoinThreads() {
F = Function::Create(Ty, Linkage, Name, M);
}
Builder.CreateCall(F, {});
CallInst *Call = Builder.CreateCall(F, {});
Call->setDebugLoc(DLGenerated);
}
void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
@ -219,5 +223,6 @@ void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
F = Function::Create(Ty, Linkage, Name, M);
}
Builder.CreateCall(F, {});
CallInst *Call = Builder.CreateCall(F, {});
Call->setDebugLoc(DLGenerated);
}

View File

@ -57,7 +57,8 @@ void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn,
Stride,
SubFnParam};
Builder.CreateCall(F, Args);
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
}
void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn,
@ -329,7 +330,9 @@ Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
F = Function::Create(Ty, Linkage, Name, M);
}
return Builder.CreateCall(F, {SourceLocationInfo});
CallInst *Call = Builder.CreateCall(F, {SourceLocationInfo});
Call->setDebugLoc(DLGenerated);
return Call;
}
void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
@ -352,7 +355,8 @@ void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads};
Builder.CreateCall(F, Args);
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
}
void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
@ -397,7 +401,8 @@ void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
ConstantInt::get(LongType, 1),
ChunkSize};
Builder.CreateCall(F, Args);
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
}
void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
@ -416,7 +421,8 @@ void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
Value *Args[] = {SourceLocationInfo, GlobalThreadID};
Builder.CreateCall(F, Args);
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
}
void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
@ -456,7 +462,8 @@ void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
Inc,
ChunkSize};
Builder.CreateCall(F, Args);
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
}
Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
@ -488,7 +495,9 @@ Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr,
StridePtr};
return Builder.CreateCall(F, Args);
CallInst *Call = Builder.CreateCall(F, Args);
Call->setDebugLoc(DLGenerated);
return Call;
}
// TODO: This function currently creates a source location dummy. It might be

View File

@ -0,0 +1,162 @@
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-omp-backend=LLVM -polly-codegen-verify -polly-codegen -S < %s | FileCheck %s
; https://github.com/llvm/llvm-project/issues/56692
;
; CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call({{.*}}), !dbg ![[OPTLOC:[0-9]+]]
; CHECK: call void @__kmpc_dispatch_init_8({{.*}}), !dbg ![[OPTLOC]]
;
; CHECK: ![[OPTLOC]] = !DILocation(line: 0, scope: !{{[0-9]+}})
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind uwtable
define dso_local void @foo(i32 noundef %n, ptr noalias noundef nonnull align 8 %A) #0 !dbg !9 {
entry:
call void @llvm.dbg.value(metadata i32 %n, metadata !18, metadata !DIExpression()), !dbg !22
call void @llvm.dbg.value(metadata ptr %A, metadata !19, metadata !DIExpression()), !dbg !22
call void @llvm.dbg.value(metadata i32 0, metadata !20, metadata !DIExpression()), !dbg !23
%cmp3 = icmp sgt i32 %n, 0, !dbg !24
br i1 %cmp3, label %for.body.lr.ph, label %for.end, !dbg !26
for.body.lr.ph: ; preds = %entry
%wide.trip.count = zext i32 %n to i64, !dbg !24
br label %for.body, !dbg !26
for.body: ; preds = %for.body.lr.ph, %for.body
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !20, metadata !DIExpression()), !dbg !23
%arrayidx = getelementptr inbounds double, ptr %A, i64 %indvars.iv, !dbg !27
%0 = load double, ptr %arrayidx, align 8, !dbg !27, !tbaa !29
%mul = fmul double %0, 4.200000e+01, !dbg !33
store double %mul, ptr %arrayidx, align 8, !dbg !34, !tbaa !29
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !35
call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !20, metadata !DIExpression()), !dbg !23
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count, !dbg !24
br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !26, !llvm.loop !36
for.cond.for.end_crit_edge: ; preds = %for.body
br label %for.end, !dbg !26
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
ret void, !dbg !41
}
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
; Function Attrs: nounwind uwtable
define internal void @__kmpc_fork_call(ptr noundef %q, i32 noundef %nargs, ptr noundef %microtask, ...) #0 !dbg !42 {
entry:
call void @llvm.dbg.value(metadata ptr %q, metadata !52, metadata !DIExpression()), !dbg !55
call void @llvm.dbg.value(metadata i32 %nargs, metadata !53, metadata !DIExpression()), !dbg !55
call void @llvm.dbg.value(metadata ptr %microtask, metadata !54, metadata !DIExpression()), !dbg !55
ret void, !dbg !56
}
; Function Attrs: nounwind uwtable
define internal i32 @__kmpc_dispatch_next_8(ptr noundef %loc, i32 noundef %gtid, ptr noundef %p_last, ptr noundef %p_lb, ptr noundef %p_ub, ptr noundef %p_st) #0 !dbg !57 {
entry:
call void @llvm.dbg.value(metadata ptr %loc, metadata !70, metadata !DIExpression()), !dbg !76
call void @llvm.dbg.value(metadata i32 %gtid, metadata !71, metadata !DIExpression()), !dbg !76
call void @llvm.dbg.value(metadata ptr %p_last, metadata !72, metadata !DIExpression()), !dbg !76
call void @llvm.dbg.value(metadata ptr %p_lb, metadata !73, metadata !DIExpression()), !dbg !76
call void @llvm.dbg.value(metadata ptr %p_ub, metadata !74, metadata !DIExpression()), !dbg !76
call void @llvm.dbg.value(metadata ptr %p_st, metadata !75, metadata !DIExpression()), !dbg !76
ret i32 0, !dbg !77
}
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
attributes #2 = { argmemonly nocallback nofree nosync nounwind willreturn }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
!llvm.ident = !{!8}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 15.0.0 (/home/meinersbur/src/llvm-project/clang 4e94f6653150511de434fa7e29b684ae7f0e52b6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "parallel.c", directory: "/home/meinersbur/build/llvm-project/release_clang", checksumkind: CSK_MD5, checksum: "f66d96502f5555302321720f0cab6b0d")
!2 = !{i32 7, !"Dwarf Version", i32 5}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"PIC Level", i32 2}
!6 = !{i32 7, !"PIE Level", i32 2}
!7 = !{i32 7, !"uwtable", i32 2}
!8 = !{!"clang version 15.0.0 (/home/meinersbur/src/llvm-project/clang 4e94f6653150511de434fa7e29b684ae7f0e52b6)"}
!9 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 18, type: !10, scopeLine: 18, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !17)
!10 = !DISubroutineType(types: !11)
!11 = !{null, !12, !13}
!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
!14 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !15)
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64)
!16 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
!17 = !{!18, !19, !20}
!18 = !DILocalVariable(name: "n", arg: 1, scope: !9, file: !1, line: 18, type: !12)
!19 = !DILocalVariable(name: "A", arg: 2, scope: !9, file: !1, line: 18, type: !13)
!20 = !DILocalVariable(name: "i", scope: !21, file: !1, line: 19, type: !12)
!21 = distinct !DILexicalBlock(scope: !9, file: !1, line: 19, column: 5)
!22 = !DILocation(line: 0, scope: !9)
!23 = !DILocation(line: 0, scope: !21)
!24 = !DILocation(line: 19, column: 23, scope: !25)
!25 = distinct !DILexicalBlock(scope: !21, file: !1, line: 19, column: 5)
!26 = !DILocation(line: 19, column: 5, scope: !21)
!27 = !DILocation(line: 20, column: 21, scope: !28)
!28 = distinct !DILexicalBlock(scope: !25, file: !1, line: 19, column: 33)
!29 = !{!30, !30, i64 0}
!30 = !{!"double", !31, i64 0}
!31 = !{!"omnipotent char", !32, i64 0}
!32 = !{!"Simple C/C++ TBAA"}
!33 = !DILocation(line: 20, column: 19, scope: !28)
!34 = !DILocation(line: 20, column: 14, scope: !28)
!35 = !DILocation(line: 19, column: 28, scope: !25)
!36 = distinct !{!36, !26, !37, !38}
!37 = !DILocation(line: 21, column: 5, scope: !21)
!38 = !{!"llvm.loop.mustprogress"}
!39 = !DILocation(line: 23, column: 5, scope: !9)
!40 = !DILocation(line: 24, column: 5, scope: !9)
!41 = !DILocation(line: 25, column: 1, scope: !9)
!42 = distinct !DISubprogram(name: "__kmpc_fork_call", scope: !1, file: !1, line: 9, type: !43, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !51)
!43 = !DISubroutineType(types: !44)
!44 = !{null, !45, !47, !45, null}
!45 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !46, size: 64)
!46 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
!47 = !DIDerivedType(tag: DW_TAG_typedef, name: "int32_t", file: !48, line: 26, baseType: !49)
!48 = !DIFile(filename: "/usr/include/x86_64-linux-gnu/bits/stdint-intn.h", directory: "", checksumkind: CSK_MD5, checksum: "55bcbdc3159515ebd91d351a70d505f4")
!49 = !DIDerivedType(tag: DW_TAG_typedef, name: "__int32_t", file: !50, line: 41, baseType: !12)
!50 = !DIFile(filename: "/usr/include/x86_64-linux-gnu/bits/types.h", directory: "", checksumkind: CSK_MD5, checksum: "d108b5f93a74c50510d7d9bc0ab36df9")
!51 = !{!52, !53, !54}
!52 = !DILocalVariable(name: "q", arg: 1, scope: !42, file: !1, line: 9, type: !45)
!53 = !DILocalVariable(name: "nargs", arg: 2, scope: !42, file: !1, line: 9, type: !47)
!54 = !DILocalVariable(name: "microtask", arg: 3, scope: !42, file: !1, line: 9, type: !45)
!55 = !DILocation(line: 0, scope: !42)
!56 = !DILocation(line: 10, column: 1, scope: !42)
!57 = distinct !DISubprogram(name: "__kmpc_dispatch_next_8", scope: !1, file: !1, line: 12, type: !58, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !69)
!58 = !DISubroutineType(types: !59)
!59 = !{!12, !60, !62, !63, !64, !64, !64}
!60 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !61, size: 64)
!61 = !DICompositeType(tag: DW_TAG_structure_type, name: "ident_t", file: !1, line: 5, flags: DIFlagFwdDecl)
!62 = !DIDerivedType(tag: DW_TAG_typedef, name: "kmp_int32", file: !1, line: 6, baseType: !47)
!63 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !62, size: 64)
!64 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !65, size: 64)
!65 = !DIDerivedType(tag: DW_TAG_typedef, name: "kmp_int64", file: !1, line: 7, baseType: !66)
!66 = !DIDerivedType(tag: DW_TAG_typedef, name: "int64_t", file: !48, line: 27, baseType: !67)
!67 = !DIDerivedType(tag: DW_TAG_typedef, name: "__int64_t", file: !50, line: 44, baseType: !68)
!68 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed)
!69 = !{!70, !71, !72, !73, !74, !75}
!70 = !DILocalVariable(name: "loc", arg: 1, scope: !57, file: !1, line: 12, type: !60)
!71 = !DILocalVariable(name: "gtid", arg: 2, scope: !57, file: !1, line: 12, type: !62)
!72 = !DILocalVariable(name: "p_last", arg: 3, scope: !57, file: !1, line: 13, type: !63)
!73 = !DILocalVariable(name: "p_lb", arg: 4, scope: !57, file: !1, line: 13, type: !64)
!74 = !DILocalVariable(name: "p_ub", arg: 5, scope: !57, file: !1, line: 14, type: !64)
!75 = !DILocalVariable(name: "p_st", arg: 6, scope: !57, file: !1, line: 14, type: !64)
!76 = !DILocation(line: 0, scope: !57)
!77 = !DILocation(line: 15, column: 37, scope: !57)