[SimpleLoopUnswitch] Skip non-trivial unswitching of cold functions

In the current main branch, all cold loops will not be applied non-trivial unswitch. As reported in D129599, skipping these cold loops will incur regression in SPEC benchmark.
Thus, instead of skipping cold loops, now only skipping loops in cold functions.

Reviewed By: alexgatea, aeubanks

Differential Revision: https://reviews.llvm.org/D133275
This commit is contained in:
Ruobing Han 2022-09-04 12:56:25 -04:00
parent bb6d12b5ce
commit fb45f3c948
3 changed files with 51 additions and 102 deletions

View File

@ -3086,7 +3086,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// Skip cold loops, as unswitching them brings little benefit // Skip cold loops, as unswitching them brings little benefit
// but increases the code size // but increases the code size
if (PSI && PSI->hasProfileSummary() && BFI && if (PSI && PSI->hasProfileSummary() && BFI &&
PSI->isColdBlock(L.getHeader(), BFI)) { PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n"); LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
return false; return false;
} }

View File

@ -6,89 +6,27 @@
declare i32 @a() declare i32 @a()
declare i32 @b() declare i32 @b()
; Check loops in cold functions will not be applied non-trivial loop unswitch
define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 { define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 {
; CHECK-LABEL: @f1( ; CHECK-LABEL: @f1(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[ENTRY_HOT_LOOP:%.*]]
; CHECK: entry_hot_loop:
; CHECK-NEXT: br i1 [[HOT_COND:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER:%.*]], label [[HOT_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]]
; CHECK: hot_loop_begin.preheader:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
; CHECK: hot_loop_begin.preheader.split.us:
; CHECK-NEXT: br label [[HOT_LOOP_BEGIN_US:%.*]]
; CHECK: hot_loop_begin.us:
; CHECK-NEXT: br label [[HOT_LOOP_A_US:%.*]]
; CHECK: hot_loop_a.us:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[HOT_LOOP_LATCH_US:%.*]]
; CHECK: hot_loop_latch.us:
; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1
; CHECK-NEXT: br i1 [[V1_US]], label [[HOT_LOOP_BEGIN_US]], label [[HOT_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
; CHECK: hot_loop_exit.loopexit.split.us:
; CHECK-NEXT: br label [[HOT_LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: hot_loop_begin.preheader.split:
; CHECK-NEXT: br label [[HOT_LOOP_BEGIN:%.*]]
; CHECK: hot_loop_begin:
; CHECK-NEXT: br label [[HOT_LOOP_B:%.*]]
; CHECK: hot_loop_b:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
; CHECK-NEXT: br label [[HOT_LOOP_LATCH:%.*]]
; CHECK: hot_loop_latch:
; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1
; CHECK-NEXT: br i1 [[V1]], label [[HOT_LOOP_BEGIN]], label [[HOT_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
; CHECK: hot_loop_exit.loopexit.split:
; CHECK-NEXT: br label [[HOT_LOOP_EXIT_LOOPEXIT]]
; CHECK: hot_loop_exit.loopexit:
; CHECK-NEXT: br label [[HOT_LOOP_EXIT]]
; CHECK: hot_loop_exit:
; CHECK-NEXT: br label [[ENTRY_COLD_LOOP:%.*]]
; CHECK: entry_cold_loop:
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]]
; CHECK: cold_loop_begin.preheader:
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]] ; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
; CHECK: cold_loop_begin: ; CHECK: cold_loop_begin:
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]] ; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
; CHECK: cold_loop_a: ; CHECK: cold_loop_a:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a() ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]] ; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
; CHECK: cold_loop_b: ; CHECK: cold_loop_b:
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @b() ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]] ; CHECK-NEXT: br label [[COLD_LOOP_LATCH]]
; CHECK: cold_loop_latch: ; CHECK: cold_loop_latch:
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 ; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR:%.*]], align 1
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT:%.*]]
; CHECK: cold_loop_exit.loopexit:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
; CHECK: cold_loop_exit: ; CHECK: cold_loop_exit:
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
entry: entry:
br label %entry_hot_loop br label %cold_loop_begin
entry_hot_loop:
br i1 %hot_cond, label %hot_loop_begin, label %hot_loop_exit, !prof !15
hot_loop_begin:
br i1 %cond, label %hot_loop_a, label %hot_loop_b
hot_loop_a:
call i32 @a()
br label %hot_loop_latch
hot_loop_b:
call i32 @b()
br label %hot_loop_latch
hot_loop_latch:
%v1 = load i1, i1* %ptr
br i1 %v1, label %hot_loop_begin, label %hot_loop_exit
hot_loop_exit:
br label %entry_cold_loop
entry_cold_loop:
br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !16
cold_loop_begin: cold_loop_begin:
br i1 %cond, label %cold_loop_a, label %cold_loop_b br i1 %cond, label %cold_loop_a, label %cold_loop_b
@ -110,7 +48,7 @@ cold_loop_exit:
} }
!llvm.module.flags = !{!1} !llvm.module.flags = !{!1}
!0 = !{!"function_entry_count", i64 400} !0 = !{!"function_entry_count", i64 0}
!1 = !{i32 1, !"ProfileSummary", !2} !1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"} !3 = !{!"ProfileFormat", !"InstrProf"}
@ -125,5 +63,3 @@ cold_loop_exit:
!12 = !{i32 10000, i64 100, i32 1} !12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1} !13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2} !14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"branch_weights", i32 100, i32 0}
!16 = !{!"branch_weights", i32 0, i32 100}

View File

@ -8,25 +8,38 @@ declare i32 @b()
; Check loops will be applied non-trivial loop unswitch in a non-cold function, ; Check loops will be applied non-trivial loop unswitch in a non-cold function,
; even loop headers are cold ; even loop headers are cold
define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 { define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !14 {
; CHECK-LABEL: @f1( ; CHECK-LABEL: @f1(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[ENTRY_COLD_LOOP:%.*]] ; CHECK-NEXT: br label [[ENTRY_COLD_LOOP:%.*]]
; CHECK: entry_cold_loop: ; CHECK: entry_cold_loop:
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]] ; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]]
; CHECK: cold_loop_begin.preheader: ; CHECK: cold_loop_begin.preheader:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
; CHECK: cold_loop_begin.preheader.split.us:
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN_US:%.*]]
; CHECK: cold_loop_begin.us:
; CHECK-NEXT: br label [[COLD_LOOP_A_US:%.*]]
; CHECK: cold_loop_a.us:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH_US:%.*]]
; CHECK: cold_loop_latch.us:
; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1
; CHECK-NEXT: br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
; CHECK: cold_loop_exit.loopexit.split.us:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: cold_loop_begin.preheader.split:
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]] ; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
; CHECK: cold_loop_begin: ; CHECK: cold_loop_begin:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]] ; CHECK-NEXT: br label [[COLD_LOOP_B:%.*]]
; CHECK: cold_loop_a:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
; CHECK: cold_loop_b: ; CHECK: cold_loop_b:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b() ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]] ; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
; CHECK: cold_loop_latch: ; CHECK: cold_loop_latch:
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR:%.*]], align 1 ; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
; CHECK: cold_loop_exit.loopexit.split:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT]]
; CHECK: cold_loop_exit.loopexit: ; CHECK: cold_loop_exit.loopexit:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]] ; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
; CHECK: cold_loop_exit: ; CHECK: cold_loop_exit:
@ -36,17 +49,17 @@ entry:
br label %entry_cold_loop br label %entry_cold_loop
entry_cold_loop: entry_cold_loop:
br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !16 br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !15
cold_loop_begin: cold_loop_begin:
br i1 %cond, label %cold_loop_a, label %cold_loop_b br i1 %cond, label %cold_loop_a, label %cold_loop_b
cold_loop_a: cold_loop_a:
call i32 @a() %0 = call i32 @a()
br label %cold_loop_latch br label %cold_loop_latch
cold_loop_b: cold_loop_b:
call i32 @b() %1 = call i32 @b()
br label %cold_loop_latch br label %cold_loop_latch
cold_loop_latch: cold_loop_latch:
@ -57,21 +70,21 @@ cold_loop_exit:
ret void ret void
} }
!llvm.module.flags = !{!1} !llvm.module.flags = !{!0}
!0 = !{!"function_entry_count", i64 400}
!1 = !{i32 1, !"ProfileSummary", !2} !0 = !{i32 1, !"ProfileSummary", !1}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!3 = !{!"ProfileFormat", !"InstrProf"} !2 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 10000} !3 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 10} !4 = !{!"MaxCount", i64 10}
!6 = !{!"MaxInternalCount", i64 1} !5 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000} !6 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3} !7 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3} !8 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11} !9 = !{!"DetailedSummary", !10}
!11 = !{!12, !13, !14} !10 = !{!11, !12, !13}
!12 = !{i32 10000, i64 100, i32 1} !11 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1} !12 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2} !13 = !{i32 999999, i64 1, i32 2}
!15 = !{!"branch_weights", i32 100, i32 0} !14 = !{!"function_entry_count", i64 400}
!16 = !{!"branch_weights", i32 0, i32 100} !15 = !{!"branch_weights", i32 0, i32 100}