[X86] Adding avx512_vpopcntdq feature set and its intrinsics

AVX512_VPOPCNTDQ is a new feature set that was published by Intel.
The patch represents the Clang side of the addition of six intrinsics for two new machine instructions (vpopcntd and vpopcntq).
It also includes the addition of the new feature set.

Differential Revision: https://reviews.llvm.org/D33170



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@303857 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Oren Ben Simhon 2017-05-25 13:44:11 +00:00
parent 5e606f16ee
commit 0197dcf96b
9 changed files with 170 additions and 36 deletions

View File

@ -1107,6 +1107,9 @@ TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "", "avx5
TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd")
TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd")
TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")

View File

@ -1741,6 +1741,7 @@ def mno_avx : Flag<["-"], "mno-avx">, Group<m_x86_Features_Group>;
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
@ -1941,6 +1942,7 @@ def mavx : Flag<["-"], "mavx">, Group<m_x86_Features_Group>;
def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>;
def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;

View File

@ -2624,6 +2624,7 @@ class X86TargetInfo : public TargetInfo {
bool HasFMA = false;
bool HasF16C = false;
bool HasAVX512CD = false;
bool HasAVX512VPOPCNTDQ = false;
bool HasAVX512ER = false;
bool HasAVX512PF = false;
bool HasAVX512DQ = false;
@ -3504,9 +3505,9 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
LLVM_FALLTHROUGH;
case AVX512F:
Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
Features["avx512vl"] = Features["avx512vbmi"] =
Features["avx512ifma"] = false;
Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
Features["avx512vl"] = Features["avx512vbmi"] =
Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
}
}
@ -3616,7 +3617,8 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
setSSELevel(Features, AVX512F, Enabled);
} else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
Name == "avx512vbmi" || Name == "avx512ifma") {
Name == "avx512vbmi" || Name == "avx512ifma" ||
Name == "avx512vpopcntdq") {
if (Enabled)
setSSELevel(Features, AVX512F, Enabled);
// Enable BWI instruction if VBMI is being enabled.
@ -3700,6 +3702,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasF16C = true;
} else if (Feature == "+avx512cd") {
HasAVX512CD = true;
} else if (Feature == "+avx512vpopcntdq") {
HasAVX512VPOPCNTDQ = true;
} else if (Feature == "+avx512er") {
HasAVX512ER = true;
} else if (Feature == "+avx512pf") {
@ -4037,6 +4041,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasAVX512CD)
Builder.defineMacro("__AVX512CD__");
if (HasAVX512VPOPCNTDQ)
Builder.defineMacro("__AVX512VPOPCNTDQ__");
if (HasAVX512ER)
Builder.defineMacro("__AVX512ER__");
if (HasAVX512PF)
@ -4168,6 +4174,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx2", SSELevel >= AVX2)
.Case("avx512f", SSELevel >= AVX512F)
.Case("avx512cd", HasAVX512CD)
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512er", HasAVX512ER)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
@ -4253,6 +4260,7 @@ bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
.Case("avx512bw", true)
.Case("avx512dq", true)
.Case("avx512cd", true)
.Case("avx512vpopcntdq", true)
.Case("avx512er", true)
.Case("avx512pf", true)
.Case("avx512vbmi", true)

View File

@ -7332,39 +7332,42 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
AVX512PF,
AVX512VBMI,
AVX512IFMA,
AVX512VPOPCNTDQ,
MAX
};
X86Features Feature = StringSwitch<X86Features>(FeatureStr)
.Case("cmov", X86Features::CMOV)
.Case("mmx", X86Features::MMX)
.Case("popcnt", X86Features::POPCNT)
.Case("sse", X86Features::SSE)
.Case("sse2", X86Features::SSE2)
.Case("sse3", X86Features::SSE3)
.Case("ssse3", X86Features::SSSE3)
.Case("sse4.1", X86Features::SSE4_1)
.Case("sse4.2", X86Features::SSE4_2)
.Case("avx", X86Features::AVX)
.Case("avx2", X86Features::AVX2)
.Case("sse4a", X86Features::SSE4_A)
.Case("fma4", X86Features::FMA4)
.Case("xop", X86Features::XOP)
.Case("fma", X86Features::FMA)
.Case("avx512f", X86Features::AVX512F)
.Case("bmi", X86Features::BMI)
.Case("bmi2", X86Features::BMI2)
.Case("aes", X86Features::AES)
.Case("pclmul", X86Features::PCLMUL)
.Case("avx512vl", X86Features::AVX512VL)
.Case("avx512bw", X86Features::AVX512BW)
.Case("avx512dq", X86Features::AVX512DQ)
.Case("avx512cd", X86Features::AVX512CD)
.Case("avx512er", X86Features::AVX512ER)
.Case("avx512pf", X86Features::AVX512PF)
.Case("avx512vbmi", X86Features::AVX512VBMI)
.Case("avx512ifma", X86Features::AVX512IFMA)
.Default(X86Features::MAX);
X86Features Feature =
StringSwitch<X86Features>(FeatureStr)
.Case("cmov", X86Features::CMOV)
.Case("mmx", X86Features::MMX)
.Case("popcnt", X86Features::POPCNT)
.Case("sse", X86Features::SSE)
.Case("sse2", X86Features::SSE2)
.Case("sse3", X86Features::SSE3)
.Case("ssse3", X86Features::SSSE3)
.Case("sse4.1", X86Features::SSE4_1)
.Case("sse4.2", X86Features::SSE4_2)
.Case("avx", X86Features::AVX)
.Case("avx2", X86Features::AVX2)
.Case("sse4a", X86Features::SSE4_A)
.Case("fma4", X86Features::FMA4)
.Case("xop", X86Features::XOP)
.Case("fma", X86Features::FMA)
.Case("avx512f", X86Features::AVX512F)
.Case("bmi", X86Features::BMI)
.Case("bmi2", X86Features::BMI2)
.Case("aes", X86Features::AES)
.Case("pclmul", X86Features::PCLMUL)
.Case("avx512vl", X86Features::AVX512VL)
.Case("avx512bw", X86Features::AVX512BW)
.Case("avx512dq", X86Features::AVX512DQ)
.Case("avx512cd", X86Features::AVX512CD)
.Case("avx512er", X86Features::AVX512ER)
.Case("avx512pf", X86Features::AVX512PF)
.Case("avx512vbmi", X86Features::AVX512VBMI)
.Case("avx512ifma", X86Features::AVX512IFMA)
.Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
.Default(X86Features::MAX);
assert(Feature != X86Features::MAX && "Invalid feature!");
// Matching the struct layout from the compiler-rt/libgcc structure that is
@ -7517,7 +7520,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storesd128_mask: {
return EmitX86MaskedStore(*this, Ops, 16);
}
case X86::BI__builtin_ia32_vpopcntd_512:
case X86::BI__builtin_ia32_vpopcntq_512: {
llvm::Type *ResultType = ConvertType(E->getType());
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
return Builder.CreateCall(F, Ops);
}
case X86::BI__builtin_ia32_cvtmask2b128:
case X86::BI__builtin_ia32_cvtmask2b256:
case X86::BI__builtin_ia32_cvtmask2b512:

View File

@ -7,6 +7,7 @@ set(files
avx2intrin.h
avx512bwintrin.h
avx512cdintrin.h
avx512vpopcntdqintrin.h
avx512dqintrin.h
avx512erintrin.h
avx512fintrin.h

View File

@ -0,0 +1,70 @@
/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
*------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error \
"Never use <avx512vpopcntdqintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VPOPCNTDQINTRIN_H
#define __AVX512VPOPCNTDQINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \
"q")))
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_selectq_512(
(__mmask8)__U, (__v8di)_mm512_popcnt_epi64(__A), (__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
return (__m512i)__builtin_ia32_selectd_512(
(__mmask16)__U, (__v16si)_mm512_popcnt_epi32(__A), (__v16si)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
return _mm512_mask_popcnt_epi32((__m512i)_mm512_setzero_si512(), __U, __A);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -146,6 +146,10 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512cdintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
#include <avx512vpopcntdqintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
#include <avx512dqintrin.h>
#endif

View File

@ -36,7 +36,7 @@ int __attribute__((target("arch=lakemont"))) lake(int a) { return 4; }
// CHECK: lake{{.*}} #6
// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
// CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
// CHECK: #4 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes"
// CHECK: #5 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+sse,+sse2,+x87,-3dnow,-3dnowa,-mmx"

View File

@ -0,0 +1,38 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
__m512i test_mm512_popcnt_epi64(__m512i __A) {
// CHECK-LABEL: @test_mm512_popcnt_epi64
// CHECK: @llvm.ctpop.v8i64
return _mm512_popcnt_epi64(__A);
}
__m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi64
// CHECK: @llvm.ctpop.v8i64
// CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}}, <8 x i64> {{.*}}
return _mm512_mask_popcnt_epi64(__W, __U, __A);
}
__m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi64
// CHECK: @llvm.ctpop.v8i64
// CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}}, <8 x i64> {{.*}}
return _mm512_maskz_popcnt_epi64(__U, __A);
}
__m512i test_mm512_popcnt_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_popcnt_epi32
// CHECK: @llvm.ctpop.v16i32
return _mm512_popcnt_epi32(__A);
}
__m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi32
// CHECK: @llvm.ctpop.v16i32
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <16 x i32> {{.*}}
return _mm512_mask_popcnt_epi32(__W, __U, __A);
}
__m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi32
// CHECK: @llvm.ctpop.v16i32
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <16 x i32> {{.*}}
return _mm512_maskz_popcnt_epi32(__U, __A);
}