mirror of https://github.com/microsoft/clang.git
Introduce __builtin_nontemporal_store and __builtin_nontemporal_load.
Summary: Currently clang provides no general way to generate nontemporal loads/stores. There are some architecture specific builtins for doing so (e.g. in x86), but there is no way to generate non-temporal store on, e.g. AArch64. This patch adds generic builtins which are expanded to a simple store with '!nontemporal' attribute in IR. Differential Revision: http://reviews.llvm.org/D12313 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@247104 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
17c95f6490
commit
2db8497948
|
@ -1245,6 +1245,10 @@ BUILTIN(__builtin_operator_delete, "vv*", "n")
|
||||||
BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn")
|
BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn")
|
||||||
BUILTIN(__builtin___get_unsafe_stack_ptr, "v*", "Fn")
|
BUILTIN(__builtin___get_unsafe_stack_ptr, "v*", "Fn")
|
||||||
|
|
||||||
|
// Nontemporal loads/stores builtins
|
||||||
|
BUILTIN(__builtin_nontemporal_store, "v.", "t")
|
||||||
|
BUILTIN(__builtin_nontemporal_load, "v.", "t")
|
||||||
|
|
||||||
#undef BUILTIN
|
#undef BUILTIN
|
||||||
#undef LIBBUILTIN
|
#undef LIBBUILTIN
|
||||||
#undef LANGBUILTIN
|
#undef LANGBUILTIN
|
||||||
|
|
|
@ -6200,6 +6200,12 @@ def err_atomic_load_store_uses_lib : Error<
|
||||||
"atomic %select{load|store}0 requires runtime support that is not "
|
"atomic %select{load|store}0 requires runtime support that is not "
|
||||||
"available for this target">;
|
"available for this target">;
|
||||||
|
|
||||||
|
def err_nontemporal_builtin_must_be_pointer : Error<
|
||||||
|
"address argument to nontemporal builtin must be a pointer (%0 invalid)">;
|
||||||
|
def err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector : Error<
|
||||||
|
"address argument to nontemporal builtin must be a pointer to integer, float, "
|
||||||
|
"pointer, or a vector of such types (%0 invalid)">;
|
||||||
|
|
||||||
def err_deleted_function_use : Error<"attempt to use a deleted function">;
|
def err_deleted_function_use : Error<"attempt to use a deleted function">;
|
||||||
|
|
||||||
def err_kern_type_not_void_return : Error<
|
def err_kern_type_not_void_return : Error<
|
||||||
|
|
|
@ -8851,6 +8851,7 @@ private:
|
||||||
bool SemaBuiltinLongjmp(CallExpr *TheCall);
|
bool SemaBuiltinLongjmp(CallExpr *TheCall);
|
||||||
bool SemaBuiltinSetjmp(CallExpr *TheCall);
|
bool SemaBuiltinSetjmp(CallExpr *TheCall);
|
||||||
ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult);
|
ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult);
|
||||||
|
ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult);
|
||||||
ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult,
|
ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult,
|
||||||
AtomicExpr::AtomicOp Op);
|
AtomicExpr::AtomicOp Op);
|
||||||
bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
|
bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
|
||||||
|
|
|
@ -111,6 +111,28 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
|
||||||
return EmitFromInt(CGF, Result, T, ValueType);
|
return EmitFromInt(CGF, Result, T, ValueType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
|
||||||
|
Value *Val = CGF.EmitScalarExpr(E->getArg(0));
|
||||||
|
Value *Address = CGF.EmitScalarExpr(E->getArg(1));
|
||||||
|
|
||||||
|
// Convert the type of the pointer to a pointer to the stored type.
|
||||||
|
Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
|
||||||
|
Value *BC = CGF.Builder.CreateBitCast(
|
||||||
|
Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
|
||||||
|
LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
|
||||||
|
LV.setNontemporal(true);
|
||||||
|
CGF.EmitStoreOfScalar(Val, LV, false);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
|
||||||
|
Value *Address = CGF.EmitScalarExpr(E->getArg(0));
|
||||||
|
|
||||||
|
LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
|
||||||
|
LV.setNontemporal(true);
|
||||||
|
return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
|
||||||
|
}
|
||||||
|
|
||||||
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
|
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
|
||||||
llvm::AtomicRMWInst::BinOp Kind,
|
llvm::AtomicRMWInst::BinOp Kind,
|
||||||
const CallExpr *E) {
|
const CallExpr *E) {
|
||||||
|
@ -1143,6 +1165,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
||||||
return RValue::get(nullptr);
|
return RValue::get(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case Builtin::BI__builtin_nontemporal_load:
|
||||||
|
return RValue::get(EmitNontemporalLoad(*this, E));
|
||||||
|
case Builtin::BI__builtin_nontemporal_store:
|
||||||
|
return RValue::get(EmitNontemporalStore(*this, E));
|
||||||
case Builtin::BI__c11_atomic_is_lock_free:
|
case Builtin::BI__c11_atomic_is_lock_free:
|
||||||
case Builtin::BI__atomic_is_lock_free: {
|
case Builtin::BI__atomic_is_lock_free: {
|
||||||
// Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
|
// Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
|
||||||
|
|
|
@ -1160,7 +1160,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue,
|
||||||
return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
|
return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
|
||||||
lvalue.getType(), Loc, lvalue.getAlignmentSource(),
|
lvalue.getType(), Loc, lvalue.getAlignmentSource(),
|
||||||
lvalue.getTBAAInfo(),
|
lvalue.getTBAAInfo(),
|
||||||
lvalue.getTBAABaseType(), lvalue.getTBAAOffset());
|
lvalue.getTBAABaseType(), lvalue.getTBAAOffset(),
|
||||||
|
lvalue.isNontemporal());
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool hasBooleanRepresentation(QualType Ty) {
|
static bool hasBooleanRepresentation(QualType Ty) {
|
||||||
|
@ -1226,7 +1227,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
|
||||||
AlignmentSource AlignSource,
|
AlignmentSource AlignSource,
|
||||||
llvm::MDNode *TBAAInfo,
|
llvm::MDNode *TBAAInfo,
|
||||||
QualType TBAABaseType,
|
QualType TBAABaseType,
|
||||||
uint64_t TBAAOffset) {
|
uint64_t TBAAOffset,
|
||||||
|
bool isNontemporal) {
|
||||||
// For better performance, handle vector loads differently.
|
// For better performance, handle vector loads differently.
|
||||||
if (Ty->isVectorType()) {
|
if (Ty->isVectorType()) {
|
||||||
const llvm::Type *EltTy = Addr.getElementType();
|
const llvm::Type *EltTy = Addr.getElementType();
|
||||||
|
@ -1258,6 +1260,11 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
|
llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
|
||||||
|
if (isNontemporal) {
|
||||||
|
llvm::MDNode *Node = llvm::MDNode::get(
|
||||||
|
Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
|
||||||
|
Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
|
||||||
|
}
|
||||||
if (TBAAInfo) {
|
if (TBAAInfo) {
|
||||||
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
|
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
|
||||||
TBAAOffset);
|
TBAAOffset);
|
||||||
|
@ -1330,7 +1337,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
|
||||||
AlignmentSource AlignSource,
|
AlignmentSource AlignSource,
|
||||||
llvm::MDNode *TBAAInfo,
|
llvm::MDNode *TBAAInfo,
|
||||||
bool isInit, QualType TBAABaseType,
|
bool isInit, QualType TBAABaseType,
|
||||||
uint64_t TBAAOffset) {
|
uint64_t TBAAOffset,
|
||||||
|
bool isNontemporal) {
|
||||||
|
|
||||||
// Handle vectors differently to get better performance.
|
// Handle vectors differently to get better performance.
|
||||||
if (Ty->isVectorType()) {
|
if (Ty->isVectorType()) {
|
||||||
|
@ -1365,6 +1373,12 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
|
llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
|
||||||
|
if (isNontemporal) {
|
||||||
|
llvm::MDNode *Node =
|
||||||
|
llvm::MDNode::get(Store->getContext(),
|
||||||
|
llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
|
||||||
|
Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
|
||||||
|
}
|
||||||
if (TBAAInfo) {
|
if (TBAAInfo) {
|
||||||
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
|
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
|
||||||
TBAAOffset);
|
TBAAOffset);
|
||||||
|
@ -1378,7 +1392,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
|
||||||
EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
|
EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
|
||||||
lvalue.getType(), lvalue.getAlignmentSource(),
|
lvalue.getType(), lvalue.getAlignmentSource(),
|
||||||
lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
|
lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
|
||||||
lvalue.getTBAAOffset());
|
lvalue.getTBAAOffset(), lvalue.isNontemporal());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
|
/// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
|
||||||
|
|
|
@ -202,6 +202,10 @@ class LValue {
|
||||||
|
|
||||||
unsigned AlignSource : 2;
|
unsigned AlignSource : 2;
|
||||||
|
|
||||||
|
// This flag shows if a nontemporal load/stores should be used when accessing
|
||||||
|
// this lvalue.
|
||||||
|
bool Nontemporal : 1;
|
||||||
|
|
||||||
Expr *BaseIvarExp;
|
Expr *BaseIvarExp;
|
||||||
|
|
||||||
/// Used by struct-path-aware TBAA.
|
/// Used by struct-path-aware TBAA.
|
||||||
|
@ -228,6 +232,7 @@ private:
|
||||||
// Initialize Objective-C flags.
|
// Initialize Objective-C flags.
|
||||||
this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
|
this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
|
||||||
this->ImpreciseLifetime = false;
|
this->ImpreciseLifetime = false;
|
||||||
|
this->Nontemporal = false;
|
||||||
this->ThreadLocalRef = false;
|
this->ThreadLocalRef = false;
|
||||||
this->BaseIvarExp = nullptr;
|
this->BaseIvarExp = nullptr;
|
||||||
|
|
||||||
|
@ -277,6 +282,8 @@ public:
|
||||||
void setARCPreciseLifetime(ARCPreciseLifetime_t value) {
|
void setARCPreciseLifetime(ARCPreciseLifetime_t value) {
|
||||||
ImpreciseLifetime = (value == ARCImpreciseLifetime);
|
ImpreciseLifetime = (value == ARCImpreciseLifetime);
|
||||||
}
|
}
|
||||||
|
bool isNontemporal() const { return Nontemporal; }
|
||||||
|
void setNontemporal(bool Value) { Nontemporal = Value; }
|
||||||
|
|
||||||
bool isObjCWeak() const {
|
bool isObjCWeak() const {
|
||||||
return Quals.getObjCGCAttr() == Qualifiers::Weak;
|
return Quals.getObjCGCAttr() == Qualifiers::Weak;
|
||||||
|
|
|
@ -2449,7 +2449,8 @@ public:
|
||||||
AlignmentSource::Type,
|
AlignmentSource::Type,
|
||||||
llvm::MDNode *TBAAInfo = nullptr,
|
llvm::MDNode *TBAAInfo = nullptr,
|
||||||
QualType TBAABaseTy = QualType(),
|
QualType TBAABaseTy = QualType(),
|
||||||
uint64_t TBAAOffset = 0);
|
uint64_t TBAAOffset = 0,
|
||||||
|
bool isNontemporal = false);
|
||||||
|
|
||||||
/// EmitLoadOfScalar - Load a scalar value from an address, taking
|
/// EmitLoadOfScalar - Load a scalar value from an address, taking
|
||||||
/// care to appropriately convert from the memory representation to
|
/// care to appropriately convert from the memory representation to
|
||||||
|
@ -2465,7 +2466,7 @@ public:
|
||||||
AlignmentSource AlignSource = AlignmentSource::Type,
|
AlignmentSource AlignSource = AlignmentSource::Type,
|
||||||
llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
|
llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
|
||||||
QualType TBAABaseTy = QualType(),
|
QualType TBAABaseTy = QualType(),
|
||||||
uint64_t TBAAOffset = 0);
|
uint64_t TBAAOffset = 0, bool isNontemporal = false);
|
||||||
|
|
||||||
/// EmitStoreOfScalar - Store a scalar value to an address, taking
|
/// EmitStoreOfScalar - Store a scalar value to an address, taking
|
||||||
/// care to appropriately convert from the memory representation to
|
/// care to appropriately convert from the memory representation to
|
||||||
|
|
|
@ -441,6 +441,9 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
|
||||||
case Builtin::BI__sync_swap_8:
|
case Builtin::BI__sync_swap_8:
|
||||||
case Builtin::BI__sync_swap_16:
|
case Builtin::BI__sync_swap_16:
|
||||||
return SemaBuiltinAtomicOverloaded(TheCallResult);
|
return SemaBuiltinAtomicOverloaded(TheCallResult);
|
||||||
|
case Builtin::BI__builtin_nontemporal_load:
|
||||||
|
case Builtin::BI__builtin_nontemporal_store:
|
||||||
|
return SemaBuiltinNontemporalOverloaded(TheCallResult);
|
||||||
#define BUILTIN(ID, TYPE, ATTRS)
|
#define BUILTIN(ID, TYPE, ATTRS)
|
||||||
#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
|
#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
|
||||||
case Builtin::BI##ID: \
|
case Builtin::BI##ID: \
|
||||||
|
@ -2210,6 +2213,78 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) {
|
||||||
return TheCallResult;
|
return TheCallResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// SemaBuiltinNontemporalOverloaded - We have a call to
|
||||||
|
/// __builtin_nontemporal_store or __builtin_nontemporal_load, which is an
|
||||||
|
/// overloaded function based on the pointer type of its last argument.
|
||||||
|
///
|
||||||
|
/// This function goes through and does final semantic checking for these
|
||||||
|
/// builtins.
|
||||||
|
ExprResult Sema::SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult) {
|
||||||
|
CallExpr *TheCall = (CallExpr *)TheCallResult.get();
|
||||||
|
DeclRefExpr *DRE =
|
||||||
|
cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
|
||||||
|
FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl());
|
||||||
|
unsigned BuiltinID = FDecl->getBuiltinID();
|
||||||
|
assert((BuiltinID == Builtin::BI__builtin_nontemporal_store ||
|
||||||
|
BuiltinID == Builtin::BI__builtin_nontemporal_load) &&
|
||||||
|
"Unexpected nontemporal load/store builtin!");
|
||||||
|
bool isStore = BuiltinID == Builtin::BI__builtin_nontemporal_store;
|
||||||
|
unsigned numArgs = isStore ? 2 : 1;
|
||||||
|
|
||||||
|
// Ensure that we have the proper number of arguments.
|
||||||
|
if (checkArgCount(*this, TheCall, numArgs))
|
||||||
|
return ExprError();
|
||||||
|
|
||||||
|
// Inspect the last argument of the nontemporal builtin. This should always
|
||||||
|
// be a pointer type, from which we imply the type of the memory access.
|
||||||
|
// Because it is a pointer type, we don't have to worry about any implicit
|
||||||
|
// casts here.
|
||||||
|
Expr *PointerArg = TheCall->getArg(numArgs - 1);
|
||||||
|
ExprResult PointerArgResult =
|
||||||
|
DefaultFunctionArrayLvalueConversion(PointerArg);
|
||||||
|
|
||||||
|
if (PointerArgResult.isInvalid())
|
||||||
|
return ExprError();
|
||||||
|
PointerArg = PointerArgResult.get();
|
||||||
|
TheCall->setArg(numArgs - 1, PointerArg);
|
||||||
|
|
||||||
|
const PointerType *pointerType = PointerArg->getType()->getAs<PointerType>();
|
||||||
|
if (!pointerType) {
|
||||||
|
Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_must_be_pointer)
|
||||||
|
<< PointerArg->getType() << PointerArg->getSourceRange();
|
||||||
|
return ExprError();
|
||||||
|
}
|
||||||
|
|
||||||
|
QualType ValType = pointerType->getPointeeType();
|
||||||
|
|
||||||
|
// Strip any qualifiers off ValType.
|
||||||
|
ValType = ValType.getUnqualifiedType();
|
||||||
|
if (!ValType->isIntegerType() && !ValType->isAnyPointerType() &&
|
||||||
|
!ValType->isBlockPointerType() && !ValType->isFloatingType() &&
|
||||||
|
!ValType->isVectorType()) {
|
||||||
|
Diag(DRE->getLocStart(),
|
||||||
|
diag::err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector)
|
||||||
|
<< PointerArg->getType() << PointerArg->getSourceRange();
|
||||||
|
return ExprError();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isStore) {
|
||||||
|
TheCall->setType(ValType);
|
||||||
|
return TheCallResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
ExprResult ValArg = TheCall->getArg(0);
|
||||||
|
InitializedEntity Entity = InitializedEntity::InitializeParameter(
|
||||||
|
Context, ValType, /*consume*/ false);
|
||||||
|
ValArg = PerformCopyInitialization(Entity, SourceLocation(), ValArg);
|
||||||
|
if (ValArg.isInvalid())
|
||||||
|
return ExprError();
|
||||||
|
|
||||||
|
TheCall->setArg(0, ValArg.get());
|
||||||
|
TheCall->setType(Context.VoidTy);
|
||||||
|
return TheCallResult;
|
||||||
|
}
|
||||||
|
|
||||||
/// CheckObjCString - Checks that the argument to the builtin
|
/// CheckObjCString - Checks that the argument to the builtin
|
||||||
/// CFString constructor is correct
|
/// CFString constructor is correct
|
||||||
/// Note: It might also make sense to do the UTF-16 conversion here (would
|
/// Note: It might also make sense to do the UTF-16 conversion here (would
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
// Test frontend handling of nontemporal builtins.
|
||||||
|
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
signed char sc;
|
||||||
|
unsigned char uc;
|
||||||
|
signed short ss;
|
||||||
|
unsigned short us;
|
||||||
|
signed int si;
|
||||||
|
unsigned int ui;
|
||||||
|
signed long long sll;
|
||||||
|
unsigned long long ull;
|
||||||
|
float f1, f2;
|
||||||
|
double d1, d2;
|
||||||
|
float __attribute__((vector_size(16))) vf1, vf2;
|
||||||
|
char __attribute__((vector_size(8))) vc1, vc2;
|
||||||
|
bool b1, b2;
|
||||||
|
|
||||||
|
void test_all_sizes(void) // CHECK-LABEL: test_all_sizes
|
||||||
|
{
|
||||||
|
__builtin_nontemporal_store(true, &b1); // CHECK: store i8 1, i8* @b1, align 1, !nontemporal
|
||||||
|
__builtin_nontemporal_store(b1, &b2); // CHECK: store i8{{.*}}, align 1, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1, &uc); // CHECK: store i8{{.*}}align 1, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1, &sc); // CHECK: store i8{{.*}}align 1, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1, &us); // CHECK: store i16{{.*}}align 2, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1, &ss); // CHECK: store i16{{.*}}align 2, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1, &ui); // CHECK: store i32{{.*}}align 4, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1, &si); // CHECK: store i32{{.*}}align 4, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1, &ull); // CHECK: store i64{{.*}}align 8, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1, &sll); // CHECK: store i64{{.*}}align 8, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1.0, &f1); // CHECK: store float{{.*}}align 4, !nontemporal
|
||||||
|
__builtin_nontemporal_store(1.0, &d1); // CHECK: store double{{.*}}align 8, !nontemporal
|
||||||
|
__builtin_nontemporal_store(vf1, &vf2); // CHECK: store <4 x float>{{.*}}align 16, !nontemporal
|
||||||
|
__builtin_nontemporal_store(vc1, &vc2); // CHECK: store <8 x i8>{{.*}}align 8, !nontemporal
|
||||||
|
|
||||||
|
b1 = __builtin_nontemporal_load(&b2); // CHECK: load i8{{.*}}align 1, !nontemporal
|
||||||
|
uc = __builtin_nontemporal_load(&sc); // CHECK: load i8{{.*}}align 1, !nontemporal
|
||||||
|
sc = __builtin_nontemporal_load(&uc); // CHECK: load i8{{.*}}align 1, !nontemporal
|
||||||
|
us = __builtin_nontemporal_load(&ss); // CHECK: load i16{{.*}}align 2, !nontemporal
|
||||||
|
ss = __builtin_nontemporal_load(&us); // CHECK: load i16{{.*}}align 2, !nontemporal
|
||||||
|
ui = __builtin_nontemporal_load(&si); // CHECK: load i32{{.*}}align 4, !nontemporal
|
||||||
|
si = __builtin_nontemporal_load(&ui); // CHECK: load i32{{.*}}align 4, !nontemporal
|
||||||
|
ull = __builtin_nontemporal_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal
|
||||||
|
sll = __builtin_nontemporal_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal
|
||||||
|
f1 = __builtin_nontemporal_load(&f2); // CHECK: load float{{.*}}align 4, !nontemporal
|
||||||
|
d1 = __builtin_nontemporal_load(&d2); // CHECK: load double{{.*}}align 8, !nontemporal
|
||||||
|
vf2 = __builtin_nontemporal_load(&vf1); // CHECK: load <4 x float>{{.*}}align 16, !nontemporal
|
||||||
|
vc2 = __builtin_nontemporal_load(&vc1); // CHECK: load <8 x i8>{{.*}}align 8, !nontemporal
|
||||||
|
}
|
Loading…
Reference in New Issue