From be192b51bfdfb7bd223e7ca3354eb91641cc75cc Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 29 Oct 2020 17:18:24 +0100 Subject: [PATCH] UPSTREAM: sched/fair: Prefer prev cpu in asymmetric wakeup path During fast wakeup path, scheduler always check whether local or prev cpus are good candidates for the task before looking for other cpus in the domain. With commit b7a331615d25 ("sched/fair: Add asymmetric CPU capacity wakeup scan") the heterogenous system gains a dedicated path but doesn't try to reuse prev cpu whenever possible. If the previous cpu is idle and belong to the LLC domain, we should check it 1st before looking for another cpu because it stays one of the best candidate and this also stabilizes task placement on the system. This change aligns asymmetric path behavior with symmetric one and reduces cases where the task migrates across all cpus of the sd_asym_cpucapacity domains at wakeup. This change does not impact normal EAS mode but only the overloaded case or when EAS is not used. - On hikey960 with performance governor (EAS disable) ./perf bench sched pipe -T -l 50000 mainline w/ patch ops/sec 149313(+/-0.28%) 182587(+/- 0.40) +22% - On hikey with performance governor ./perf bench sched pipe -T -l 50000 mainline w/ patch ops/sec 47721(+/-0.76%) 47899(+/- 0.56) +0.4% According to test on hikey, the patch doesn't impact symmetric system compared to current implementation (only tested on arm64) Also read the uclamped value of task's utilization at most twice instead instead each time we compare task's utilization with cpu's capacity. Fixes: b7a331615d25 ("sched/fair: Add asymmetric CPU capacity wakeup scan") Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Dietmar Eggemann Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20201029161824.26389-1-vincent.guittot@linaro.org (cherry picked from commit b4c9c9f15649c98a5b45408919d1ff4fd7f5531c) Bug: 187129171 Signed-off-by: Connor O'Brien Change-Id: Icaba6ae29d5bd23f87ca5492b778869be9e0d9c9 --- kernel/sched/fair.c | 89 +++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c319f4fac43e0..ec1fccef1484c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6085,21 +6085,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t static int select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) { - unsigned long best_cap = 0; + unsigned long task_util, best_cap = 0; int cpu, best_cpu = -1; struct cpumask *cpus; - sync_entity_load_avg(&p->se); - cpus = this_cpu_cpumask_var_ptr(select_idle_mask); cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + task_util = uclamp_task_util(p); + for_each_cpu_wrap(cpu, cpus, target) { unsigned long cpu_cap = capacity_of(cpu); if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) continue; - if (task_fits_capacity(p, cpu_cap)) + if (fits_capacity(task_util, cpu_cap)) return cpu; if (cpu_cap > best_cap) { @@ -6111,14 +6111,60 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) return best_cpu; } +static inline bool asym_fits_capacity(int task_util, int cpu) +{ + if (static_branch_unlikely(&sched_asym_cpucapacity)) + return fits_capacity(task_util, capacity_of(cpu)); + + return true; +} + /* * Try and locate an idle core/thread in the LLC cache domain. */ static int select_idle_sibling(struct task_struct *p, int prev, int target) { struct sched_domain *sd; + unsigned long task_util; int i, recent_used_cpu; + /* + * On asymmetric system, update task utilization because we will check + * that the task fits with cpu's capacity. + */ + if (static_branch_unlikely(&sched_asym_cpucapacity)) { + sync_entity_load_avg(&p->se); + task_util = uclamp_task_util(p); + } + + if ((available_idle_cpu(target) || sched_idle_cpu(target)) && + asym_fits_capacity(task_util, target)) + return target; + + /* + * If the previous CPU is cache affine and idle, don't be stupid: + */ + if (prev != target && cpus_share_cache(prev, target) && + (available_idle_cpu(prev) || sched_idle_cpu(prev)) && + asym_fits_capacity(task_util, prev)) + return prev; + + /* Check a recently used CPU as a potential idle candidate: */ + recent_used_cpu = p->recent_used_cpu; + if (recent_used_cpu != prev && + recent_used_cpu != target && + cpus_share_cache(recent_used_cpu, target) && + (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && + cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && + asym_fits_capacity(task_util, recent_used_cpu)) { + /* + * Replace recent_used_cpu with prev as it is a potential + * candidate for the next wake: + */ + p->recent_used_cpu = prev; + return recent_used_cpu; + } + /* * For asymmetric CPU capacity systems, our domain of interest is * sd_asym_cpucapacity rather than sd_llc. @@ -6133,37 +6179,10 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric * capacity path. */ - if (!sd) - goto symmetric; - - i = select_idle_capacity(p, sd, target); - return ((unsigned)i < nr_cpumask_bits) ? i : target; - } - -symmetric: - if (available_idle_cpu(target) || sched_idle_cpu(target)) - return target; - - /* - * If the previous CPU is cache affine and idle, don't be stupid: - */ - if (prev != target && cpus_share_cache(prev, target) && - (available_idle_cpu(prev) || sched_idle_cpu(prev))) - return prev; - - /* Check a recently used CPU as a potential idle candidate: */ - recent_used_cpu = p->recent_used_cpu; - if (recent_used_cpu != prev && - recent_used_cpu != target && - cpus_share_cache(recent_used_cpu, target) && - (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && - cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { - /* - * Replace recent_used_cpu with prev as it is a potential - * candidate for the next wake: - */ - p->recent_used_cpu = prev; - return recent_used_cpu; + if (sd) { + i = select_idle_capacity(p, sd, target); + return ((unsigned)i < nr_cpumask_bits) ? i : target; + } } sd = rcu_dereference(per_cpu(sd_llc, target));