forked from OSchip/llvm-project
[SimpleLoopUnswitch] Skip non-trivial unswitching of cold functions
In the current main branch, all cold loops will not be applied non-trivial unswitch. As reported in D129599, skipping these cold loops will incur regression in SPEC benchmark. Thus, instead of skipping cold loops, now only skipping loops in cold functions. Reviewed By: alexgatea, aeubanks Differential Revision: https://reviews.llvm.org/D133275
This commit is contained in:
parent
bb6d12b5ce
commit
fb45f3c948
|
@ -3086,7 +3086,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
|
|||
// Skip cold loops, as unswitching them brings little benefit
|
||||
// but increases the code size
|
||||
if (PSI && PSI->hasProfileSummary() && BFI &&
|
||||
PSI->isColdBlock(L.getHeader(), BFI)) {
|
||||
PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
|
||||
LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -6,89 +6,27 @@
|
|||
|
||||
declare i32 @a()
|
||||
declare i32 @b()
|
||||
|
||||
; Check loops in cold functions will not be applied non-trivial loop unswitch
|
||||
define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 {
|
||||
; CHECK-LABEL: @f1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[ENTRY_HOT_LOOP:%.*]]
|
||||
; CHECK: entry_hot_loop:
|
||||
; CHECK-NEXT: br i1 [[HOT_COND:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER:%.*]], label [[HOT_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]]
|
||||
; CHECK: hot_loop_begin.preheader:
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
|
||||
; CHECK: hot_loop_begin.preheader.split.us:
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_BEGIN_US:%.*]]
|
||||
; CHECK: hot_loop_begin.us:
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_A_US:%.*]]
|
||||
; CHECK: hot_loop_a.us:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_LATCH_US:%.*]]
|
||||
; CHECK: hot_loop_latch.us:
|
||||
; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1
|
||||
; CHECK-NEXT: br i1 [[V1_US]], label [[HOT_LOOP_BEGIN_US]], label [[HOT_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
|
||||
; CHECK: hot_loop_exit.loopexit.split.us:
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_EXIT_LOOPEXIT:%.*]]
|
||||
; CHECK: hot_loop_begin.preheader.split:
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_BEGIN:%.*]]
|
||||
; CHECK: hot_loop_begin:
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_B:%.*]]
|
||||
; CHECK: hot_loop_b:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_LATCH:%.*]]
|
||||
; CHECK: hot_loop_latch:
|
||||
; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1
|
||||
; CHECK-NEXT: br i1 [[V1]], label [[HOT_LOOP_BEGIN]], label [[HOT_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
|
||||
; CHECK: hot_loop_exit.loopexit.split:
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_EXIT_LOOPEXIT]]
|
||||
; CHECK: hot_loop_exit.loopexit:
|
||||
; CHECK-NEXT: br label [[HOT_LOOP_EXIT]]
|
||||
; CHECK: hot_loop_exit:
|
||||
; CHECK-NEXT: br label [[ENTRY_COLD_LOOP:%.*]]
|
||||
; CHECK: entry_cold_loop:
|
||||
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]]
|
||||
; CHECK: cold_loop_begin.preheader:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
|
||||
; CHECK: cold_loop_begin:
|
||||
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
|
||||
; CHECK: cold_loop_a:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
|
||||
; CHECK: cold_loop_b:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @b()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]]
|
||||
; CHECK: cold_loop_latch:
|
||||
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1
|
||||
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
|
||||
; CHECK: cold_loop_exit.loopexit:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
|
||||
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR:%.*]], align 1
|
||||
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT:%.*]]
|
||||
; CHECK: cold_loop_exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %entry_hot_loop
|
||||
|
||||
entry_hot_loop:
|
||||
br i1 %hot_cond, label %hot_loop_begin, label %hot_loop_exit, !prof !15
|
||||
|
||||
hot_loop_begin:
|
||||
br i1 %cond, label %hot_loop_a, label %hot_loop_b
|
||||
|
||||
hot_loop_a:
|
||||
call i32 @a()
|
||||
br label %hot_loop_latch
|
||||
|
||||
hot_loop_b:
|
||||
call i32 @b()
|
||||
br label %hot_loop_latch
|
||||
|
||||
hot_loop_latch:
|
||||
%v1 = load i1, i1* %ptr
|
||||
br i1 %v1, label %hot_loop_begin, label %hot_loop_exit
|
||||
|
||||
hot_loop_exit:
|
||||
br label %entry_cold_loop
|
||||
|
||||
entry_cold_loop:
|
||||
br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !16
|
||||
br label %cold_loop_begin
|
||||
|
||||
cold_loop_begin:
|
||||
br i1 %cond, label %cold_loop_a, label %cold_loop_b
|
||||
|
@ -110,7 +48,7 @@ cold_loop_exit:
|
|||
}
|
||||
|
||||
!llvm.module.flags = !{!1}
|
||||
!0 = !{!"function_entry_count", i64 400}
|
||||
!0 = !{!"function_entry_count", i64 0}
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
|
@ -125,5 +63,3 @@ cold_loop_exit:
|
|||
!12 = !{i32 10000, i64 100, i32 1}
|
||||
!13 = !{i32 999000, i64 100, i32 1}
|
||||
!14 = !{i32 999999, i64 1, i32 2}
|
||||
!15 = !{!"branch_weights", i32 100, i32 0}
|
||||
!16 = !{!"branch_weights", i32 0, i32 100}
|
||||
|
|
|
@ -8,25 +8,38 @@ declare i32 @b()
|
|||
; Check loops will be applied non-trivial loop unswitch in a non-cold function,
|
||||
; even loop headers are cold
|
||||
|
||||
define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 {
|
||||
define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !14 {
|
||||
; CHECK-LABEL: @f1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[ENTRY_COLD_LOOP:%.*]]
|
||||
; CHECK: entry_cold_loop:
|
||||
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]]
|
||||
; CHECK: cold_loop_begin.preheader:
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
|
||||
; CHECK: cold_loop_begin.preheader.split.us:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN_US:%.*]]
|
||||
; CHECK: cold_loop_begin.us:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_A_US:%.*]]
|
||||
; CHECK: cold_loop_a.us:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH_US:%.*]]
|
||||
; CHECK: cold_loop_latch.us:
|
||||
; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1
|
||||
; CHECK-NEXT: br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
|
||||
; CHECK: cold_loop_exit.loopexit.split.us:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
|
||||
; CHECK: cold_loop_begin.preheader.split:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
|
||||
; CHECK: cold_loop_begin:
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
|
||||
; CHECK: cold_loop_a:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_B:%.*]]
|
||||
; CHECK: cold_loop_b:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]]
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
|
||||
; CHECK: cold_loop_latch:
|
||||
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR:%.*]], align 1
|
||||
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
|
||||
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1
|
||||
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
|
||||
; CHECK: cold_loop_exit.loopexit.split:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT]]
|
||||
; CHECK: cold_loop_exit.loopexit:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
|
||||
; CHECK: cold_loop_exit:
|
||||
|
@ -36,17 +49,17 @@ entry:
|
|||
br label %entry_cold_loop
|
||||
|
||||
entry_cold_loop:
|
||||
br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !16
|
||||
br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !15
|
||||
|
||||
cold_loop_begin:
|
||||
br i1 %cond, label %cold_loop_a, label %cold_loop_b
|
||||
|
||||
cold_loop_a:
|
||||
call i32 @a()
|
||||
%0 = call i32 @a()
|
||||
br label %cold_loop_latch
|
||||
|
||||
cold_loop_b:
|
||||
call i32 @b()
|
||||
%1 = call i32 @b()
|
||||
br label %cold_loop_latch
|
||||
|
||||
cold_loop_latch:
|
||||
|
@ -57,21 +70,21 @@ cold_loop_exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!1}
|
||||
!0 = !{!"function_entry_count", i64 400}
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxCount", i64 10}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumCounts", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 100, i32 1}
|
||||
!13 = !{i32 999000, i64 100, i32 1}
|
||||
!14 = !{i32 999999, i64 1, i32 2}
|
||||
!15 = !{!"branch_weights", i32 100, i32 0}
|
||||
!16 = !{!"branch_weights", i32 0, i32 100}
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 400}
|
||||
!15 = !{!"branch_weights", i32 0, i32 100}
|
||||
|
|
Loading…
Reference in New Issue