Revert "[X86] Don't imply -mprfchw when -m3dnow is specified. Enable prefetchw in the backend with 3dnow feature."

This is failing on the bots. This reverts commit 636d31a5c3.
2020-06-25 11:43:02 -07:00 · 2020-06-25 11:43:02 -07:00 · 01c18f9199
parent 0bfb4c2506
commit 01c18f9199
6 changed files with 27 additions and 18 deletions
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@ -335,7 +335,6 @@ SkylakeCommon:
    setFeatureEnabledImpl(Features, "lzcnt", true);
    setFeatureEnabledImpl(Features, "popcnt", true);
    setFeatureEnabledImpl(Features, "sahf", true);
-    setFeatureEnabledImpl(Features, "prfchw", true);
    LLVM_FALLTHROUGH;
  case CK_K8SSE3:
    setFeatureEnabledImpl(Features, "sse3", true);
@ -451,6 +450,12 @@ SkylakeCommon:
      llvm::find(FeaturesVec, "-popcnt") == FeaturesVec.end())
    Features["popcnt"] = true;

+  // Enable prfchw if 3DNow! is enabled and prfchw is not explicitly disabled.
+  I = Features.find("3dnow");
+  if (I != Features.end() && I->getValue() &&
+      llvm::find(FeaturesVec, "-prfchw") == FeaturesVec.end())
+    Features["prfchw"] = true;
+
  // Additionally, if SSE is enabled and mmx is not explicitly disabled,
  // then enable MMX.
  I = Features.find("sse");
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@ -2379,7 +2379,6 @@
 // CHECK_AMDFAM10_M32: #define __LZCNT__ 1
 // CHECK_AMDFAM10_M32: #define __MMX__ 1
 // CHECK_AMDFAM10_M32: #define __POPCNT__ 1
-// CHECK_AMDFAM10_M32: #define __PRFCHW__ 1
 // CHECK_AMDFAM10_M32: #define __SSE2_MATH__ 1
 // CHECK_AMDFAM10_M32: #define __SSE2__ 1
 // CHECK_AMDFAM10_M32: #define __SSE3__ 1
@ -2400,7 +2399,6 @@
 // CHECK_AMDFAM10_M64: #define __LZCNT__ 1
 // CHECK_AMDFAM10_M64: #define __MMX__ 1
 // CHECK_AMDFAM10_M64: #define __POPCNT__ 1
-// CHECK_AMDFAM10_M64: #define __PRFCHW__ 1
 // CHECK_AMDFAM10_M64: #define __SSE2_MATH__ 1
 // CHECK_AMDFAM10_M64: #define __SSE2__ 1
 // CHECK_AMDFAM10_M64: #define __SSE3__ 1
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@ -847,7 +847,6 @@ def ProcessorFeatures {
                                                         FeatureFXSR,
                                                         FeatureNOPL,
                                                         FeatureCMPXCHG16B,
-                                                         FeaturePRFCHW,
                                                         FeatureLZCNT,
                                                         FeaturePOPCNT,
                                                         FeatureSlowSHLD,
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@ -928,10 +928,11 @@ def HasRTM       : Predicate<"Subtarget->hasRTM()">;
 def HasADX       : Predicate<"Subtarget->hasADX()">;
 def HasSHA       : Predicate<"Subtarget->hasSHA()">;
 def HasSGX       : Predicate<"Subtarget->hasSGX()">;
+def HasPRFCHW    : Predicate<"Subtarget->hasPRFCHW()">;
 def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
 def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
 def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
-def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
+def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
 def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
 def HasLAHFSAHF  : Predicate<"Subtarget->hasLAHFSAHF()">;
 def HasMWAITX    : Predicate<"Subtarget->hasMWAITX()">;
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@ -647,15 +647,8 @@ public:
  bool hasRTM() const { return HasRTM; }
  bool hasADX() const { return HasADX; }
  bool hasSHA() const { return HasSHA; }
-  bool hasPRFCHW() const { return HasPRFCHW; }
+  bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
  bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
-  bool hasPrefetchW() const {
-    // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
-    // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
-    // it and KNL has another that prefetches to L2 cache. We assume the
-    // L1 version exists if the L2 version does.
-    return has3DNow() || hasPRFCHW() || hasPREFETCHWT1();
-  }
  bool hasSSEPrefetch() const {
    // We implicitly enable these when we have a write prefix supporting cache
    // level OR if we have prfchw, but don't already have a read prefetch from
--- a/llvm/test/CodeGen/X86/prefetch.ll
+++ b/llvm/test/CodeGen/X86/prefetch.ll
@ -10,7 +10,7 @@
 ; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
 ; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
 ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW
-; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=3DNOW
+; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=PRFCHW3DNOW

 ; Rules:
 ; 3dnow by itself get you just the single prefetch instruction with no hints
@ -68,11 +68,24 @@ define void @t(i8* %ptr) nounwind  {
 ; 3DNOW-NEXT:    prefetch (%eax)
 ; 3DNOW-NEXT:    prefetch (%eax)
 ; 3DNOW-NEXT:    prefetch (%eax)
-; 3DNOW-NEXT:    prefetchw (%eax)
-; 3DNOW-NEXT:    prefetchw (%eax)
-; 3DNOW-NEXT:    prefetchw (%eax)
-; 3DNOW-NEXT:    prefetchw (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
 ; 3DNOW-NEXT:    retl
+;
+; PRFCHW3DNOW-LABEL: t:
+; PRFCHW3DNOW:       # %bb.0: # %entry
+; PRFCHW3DNOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; PRFCHW3DNOW-NEXT:    prefetch (%eax)
+; PRFCHW3DNOW-NEXT:    prefetch (%eax)
+; PRFCHW3DNOW-NEXT:    prefetch (%eax)
+; PRFCHW3DNOW-NEXT:    prefetch (%eax)
+; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
+; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
+; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
+; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
+; PRFCHW3DNOW-NEXT:    retl
 entry:
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )