[Attributor][OpenMP] Add assumption for non-call assembly instructions

Inline assembly is scary but we need to support it for the OpenMP GPU
device runtime. The new assumption expresses the fact that it may not
have call semantics, that is, it will not call another function but
simply perform an operation or side-effect. This is important for
reachability in the presence of inline assembly.

Differential Revision: https://reviews.llvm.org/D109986
This commit is contained in:
Johannes Doerfert 2021-09-11 18:34:47 -05:00
parent bb0b23174e
commit 7df2eba7fa
5 changed files with 67 additions and 1 deletions

View File

@ -34,6 +34,10 @@ extern StringSet<> KnownAssumptionStrings;
/// Helper that allows to insert a new assumption string in the known assumption
/// set by creating a (static) object.
struct KnownAssumptionString {
KnownAssumptionString(const char *AssumptionStr)
: AssumptionStr(AssumptionStr) {
KnownAssumptionStrings.insert(AssumptionStr);
}
KnownAssumptionString(StringRef AssumptionStr)
: AssumptionStr(AssumptionStr) {
KnownAssumptionStrings.insert(AssumptionStr);

View File

@ -107,4 +107,5 @@ StringSet<> llvm::KnownAssumptionStrings({
"omp_no_openmp_routines", // OpenMP 5.1
"omp_no_parallelism", // OpenMP 5.1
"ompx_spmd_amenable", // OpenMPOpt extension
"ompx_no_call_asm", // OpenMPOpt extension
});

View File

@ -32,6 +32,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@ -9540,7 +9541,9 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
CallBase *CB = cast<CallBase>(getCtxI());
if (CB->isInlineAsm()) {
setHasUnknownCallee(false, Change);
if (!hasAssumption(*CB->getCaller(), "ompx_no_call_asm") &&
!hasAssumption(*CB, "ompx_no_call_asm"))
setHasUnknownCallee(false, Change);
return Change;
}

View File

@ -0,0 +1,49 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
define void @non_recursive_asm_fn() #0 {
; CHECK: Function Attrs: norecurse
; CHECK-LABEL: define {{[^@]+}}@non_recursive_asm_fn
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void asm sideeffect "barrier.sync $0
; CHECK-NEXT: ret void
;
entry:
call void asm sideeffect "barrier.sync $0;", "r,~{memory},~{dirflag},~{fpsr},~{flags}"(i32 1)
ret void
}
define void @non_recursive_asm_cs() {
; CHECK: Function Attrs: norecurse
; CHECK-LABEL: define {{[^@]+}}@non_recursive_asm_cs
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void asm sideeffect "barrier.sync $0
; CHECK-NEXT: ret void
;
entry:
call void asm sideeffect "barrier.sync $0;", "r,~{memory},~{dirflag},~{fpsr},~{flags}"(i32 1) #0
ret void
}
define void @recursive_asm() {
; CHECK-LABEL: define {{[^@]+}}@recursive_asm() {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void asm sideeffect "barrier.sync $0
; CHECK-NEXT: ret void
;
entry:
call void asm sideeffect "barrier.sync $0;", "r,~{memory},~{dirflag},~{fpsr},~{flags}"(i32 1)
ret void
}
attributes #0 = { "llvm.assume"="ompx_no_call_asm" }
;.
; CHECK: attributes #[[ATTR0]] = { norecurse "llvm.assume"="ompx_no_call_asm" }
; CHECK: attributes #[[ATTR1]] = { norecurse }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { "llvm.assume"="ompx_no_call_asm" }
;.

View File

@ -12,6 +12,15 @@
#ifndef OMPTARGET_TYPES_H
#define OMPTARGET_TYPES_H
// Tell the compiler that we do not have any "call-like" inline assembly in the
// device rutime. That means we cannot have inline assembly which will call
// another function but only inline assembly that performs some operation or
// side-effect and then continues execution with something on the existing call
// stack.
//
// TODO: Find a good place for this
#pragma omp assumes ext_no_call_asm
/// Base type declarations for freestanding mode
///
///{