FROMLIST: sched/fair: Use wake_q length as a hint for wake_wide
This patch adds a parameter to select_task_rq, sibling_count_hint allowing the caller, where it has this information, to inform the sched_class the number of tasks that are being woken up as part of the same event. The wake_q mechanism is one case where this information is available. select_task_rq_fair can then use the information to detect that it needs to widen the search space for task placement in order to avoid overloading the last-level cache domain's CPUs. * * * The reason I am investigating this change is the following use case on ARM big.LITTLE (asymmetrical CPU capacity): 1 task per CPU, which all repeatedly do X amount of work then pthread_barrier_wait (i.e. sleep until the last task finishes its X and hits the barrier). On big.LITTLE, the tasks which get a "big" CPU finish faster, and then those CPUs pull over the tasks that are still running: v CPU v ->time-> ------------- 0 (big) 11111 /333 ------------- 1 (big) 22222 /444| ------------- 2 (LITTLE) 333333/ ------------- 3 (LITTLE) 444444/ ------------- Now when task 4 hits the barrier (at |) and wakes the others up, there are 4 tasks with prev_cpu=<big> and 0 tasks with prev_cpu=<little>. want_affine therefore means that we'll only look in CPUs 0 and 1 (sd_llc), so tasks will be unnecessarily coscheduled on the bigs until the next load balance, something like this: v CPU v ->time-> ------------------------ 0 (big) 11111 /333 31313\33333 ------------------------ 1 (big) 22222 /444|424\4444444 ------------------------ 2 (LITTLE) 333333/ \222222 ------------------------ 3 (LITTLE) 444444/ \1111 ------------------------ ^^^ underutilization So, I'm trying to get want_affine = 0 for these tasks. I don't _think_ any incarnation of the wakee_flips mechanism can help us here because which task is waker and which tasks are wakees generally changes with each iteration. However pthread_barrier_wait (or more accurately FUTEX_WAKE) has the nice property that we know exactly how many tasks are being woken, so we can cheat. It might be a disadvantage that we "widen" _every_ task that's woken in an event, while select_idle_sibling would work fine for the first sd_llc_size - 1 tasks. IIUC, if wake_affine() behaves correctly this trick wouldn't be necessary on SMP systems, so it might be best guarded by the presence of SD_ASYM_CPUCAPACITY? * * * Final note.. In order to observe "perfect" behaviour for this use case, I also had to disable the TTWU_QUEUE sched feature. Suppose during the wakeup above we are working through the work queue and have placed tasks 3 and 2, and are about to place task 1: v CPU v ->time-> -------------- 0 (big) 11111 /333 3 -------------- 1 (big) 22222 /444|4 -------------- 2 (LITTLE) 333333/ 2 -------------- 3 (LITTLE) 444444/ <- Task 1 should go here -------------- If TTWU_QUEUE is enabled, we will not yet have enqueued task 2 (having instead sent a reschedule IPI) or attached its load to CPU 2. So we are likely to also place task 1 on cpu 2. Disabling TTWU_QUEUE means that we enqueue task 2 before placing task 1, solving this issue. TTWU_QUEUE is there to minimise rq lock contention, and I guess that this contention is less of an issue on big.LITTLE systems since they have relatively few CPUs, which suggests the trade-off makes sense here. Signed-off-by: Brendan Jackman <brendan.jackman@arm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Josef Bacik <josef@toxicpanda.com> Cc: Joel Fernandes <joelaf@google.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Matt Fleming <matt@codeblueprint.co.uk> ( - Applied from https://patchwork.kernel.org/patch/9895261/ - Fixed trivial conflict in kernel/sched/core.c - Fixed select_task_rq_idle, now in kernel/sched/idle.c - Fixed trivial conflict in select_task_rq_fair ) Signed-off-by: Quentin Perret <quentin.perret@arm.com> Change-Id: I3cfc4bf48c3d7feef969db4d22449f4fbb4f795d [satyap@codeaurora.org: port to 5.4 and fix trivial merge conflicts] Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
This commit is contained in:
parent
cb858f1167
commit
aa8b37baac
@ -38,6 +38,9 @@
|
||||
struct wake_q_head {
|
||||
struct wake_q_node *first;
|
||||
struct wake_q_node **lastp;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
int count;
|
||||
#endif
|
||||
};
|
||||
|
||||
#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
|
||||
@ -49,6 +52,9 @@ static inline void wake_q_init(struct wake_q_head *head)
|
||||
{
|
||||
head->first = WAKE_Q_TAIL;
|
||||
head->lastp = &head->first;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
head->count = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool wake_q_empty(struct wake_q_head *head)
|
||||
|
@ -431,6 +431,9 @@ static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
|
||||
/*
|
||||
* The head is context local, there can be no concurrency.
|
||||
*/
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
head->count++;
|
||||
#endif
|
||||
*head->lastp = node;
|
||||
head->lastp = &node->next;
|
||||
return true;
|
||||
@ -477,6 +480,10 @@ void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
|
||||
put_task_struct(task);
|
||||
}
|
||||
|
||||
static int
|
||||
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
|
||||
int sibling_count_hint);
|
||||
|
||||
void wake_up_q(struct wake_q_head *head)
|
||||
{
|
||||
struct wake_q_node *node = head->first;
|
||||
@ -491,10 +498,14 @@ void wake_up_q(struct wake_q_head *head)
|
||||
task->wake_q.next = NULL;
|
||||
|
||||
/*
|
||||
* wake_up_process() executes a full barrier, which pairs with
|
||||
* try_to_wake_up() executes a full barrier, which pairs with
|
||||
* the queueing in wake_q_add() so as not to miss wakeups.
|
||||
*/
|
||||
wake_up_process(task);
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
try_to_wake_up(task, TASK_NORMAL, 0, head->count);
|
||||
#else
|
||||
try_to_wake_up(task, TASK_NORMAL, 0, 1);
|
||||
#endif
|
||||
put_task_struct(task);
|
||||
}
|
||||
}
|
||||
@ -2142,14 +2153,16 @@ out:
|
||||
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
|
||||
*/
|
||||
static inline
|
||||
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags,
|
||||
int sibling_count_hint)
|
||||
{
|
||||
bool allow_isolated = (p->flags & PF_KTHREAD);
|
||||
|
||||
lockdep_assert_held(&p->pi_lock);
|
||||
|
||||
if (p->nr_cpus_allowed > 1)
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags,
|
||||
sibling_count_hint);
|
||||
else
|
||||
cpu = cpumask_any(p->cpus_ptr);
|
||||
|
||||
@ -2544,6 +2557,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
|
||||
* @p: the thread to be awakened
|
||||
* @state: the mask of task states that can be woken
|
||||
* @wake_flags: wake modifier flags (WF_*)
|
||||
* @sibling_count_hint: A hint at the number of threads that are being woken up
|
||||
* in this event.
|
||||
*
|
||||
* If (@state & @p->state) @p->state = TASK_RUNNING.
|
||||
*
|
||||
@ -2559,7 +2574,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
|
||||
* %false otherwise.
|
||||
*/
|
||||
static int
|
||||
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
|
||||
int sibling_count_hint)
|
||||
{
|
||||
unsigned long flags;
|
||||
int cpu, success = 0;
|
||||
@ -2672,7 +2688,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
atomic_dec(&task_rq(p)->nr_iowait);
|
||||
}
|
||||
|
||||
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
|
||||
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags,
|
||||
sibling_count_hint);
|
||||
if (task_cpu(p) != cpu) {
|
||||
wake_flags |= WF_MIGRATED;
|
||||
psi_ttwu_dequeue(p);
|
||||
@ -2723,13 +2740,13 @@ out:
|
||||
*/
|
||||
int wake_up_process(struct task_struct *p)
|
||||
{
|
||||
return try_to_wake_up(p, TASK_NORMAL, 0);
|
||||
return try_to_wake_up(p, TASK_NORMAL, 0, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(wake_up_process);
|
||||
|
||||
int wake_up_state(struct task_struct *p, unsigned int state)
|
||||
{
|
||||
return try_to_wake_up(p, state, 0);
|
||||
return try_to_wake_up(p, state, 0, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3026,7 +3043,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
* as we're not fully set-up yet.
|
||||
*/
|
||||
p->recent_used_cpu = task_cpu(p);
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1));
|
||||
#endif
|
||||
rq = __task_rq_lock(p, &rf);
|
||||
update_rq_clock(rq);
|
||||
@ -3571,7 +3588,7 @@ void sched_exec(void)
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
|
||||
dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1);
|
||||
if (dest_cpu == smp_processor_id())
|
||||
goto unlock;
|
||||
|
||||
@ -4500,7 +4517,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
|
||||
int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
|
||||
void *key)
|
||||
{
|
||||
return try_to_wake_up(curr->private, mode, wake_flags);
|
||||
return try_to_wake_up(curr->private, mode, wake_flags, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(default_wake_function);
|
||||
|
||||
|
@ -1602,7 +1602,8 @@ static void yield_task_dl(struct rq *rq)
|
||||
static int find_later_rq(struct task_struct *task);
|
||||
|
||||
static int
|
||||
select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags,
|
||||
int sibling_count_hint)
|
||||
{
|
||||
struct task_struct *curr;
|
||||
struct rq *rq;
|
||||
|
@ -5590,15 +5590,18 @@ static void record_wakee(struct task_struct *p)
|
||||
* whatever is irrelevant, spread criteria is apparent partner count exceeds
|
||||
* socket size.
|
||||
*/
|
||||
static int wake_wide(struct task_struct *p)
|
||||
static int wake_wide(struct task_struct *p, int sibling_count_hint)
|
||||
{
|
||||
unsigned int master = current->wakee_flips;
|
||||
unsigned int slave = p->wakee_flips;
|
||||
int factor = this_cpu_read(sd_llc_size);
|
||||
int llc_size = this_cpu_read(sd_llc_size);
|
||||
|
||||
if (sibling_count_hint >= llc_size)
|
||||
return 1;
|
||||
|
||||
if (master < slave)
|
||||
swap(master, slave);
|
||||
if (slave < factor || master < slave * factor)
|
||||
if (slave < llc_size || master < slave * llc_size)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
@ -7220,7 +7223,8 @@ eas_not_ready:
|
||||
* preempt must be disabled.
|
||||
*/
|
||||
static int
|
||||
select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
|
||||
select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags,
|
||||
int sibling_count_hint)
|
||||
{
|
||||
struct sched_domain *tmp, *sd = NULL;
|
||||
int cpu = smp_processor_id();
|
||||
@ -7247,7 +7251,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
||||
new_cpu = prev_cpu;
|
||||
}
|
||||
|
||||
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) &&
|
||||
want_affine = !wake_wide(p, sibling_count_hint) &&
|
||||
!wake_cap(p, cpu, prev_cpu) &&
|
||||
cpumask_test_cpu(cpu, p->cpus_ptr);
|
||||
}
|
||||
|
||||
|
@ -364,7 +364,8 @@ void cpu_startup_entry(enum cpuhp_state state)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags,
|
||||
int sibling_count_hint)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
|
@ -1481,7 +1481,8 @@ task_may_not_preempt(struct task_struct *task, int cpu)
|
||||
}
|
||||
|
||||
static int
|
||||
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
|
||||
int sibling_count_hint)
|
||||
{
|
||||
struct task_struct *curr;
|
||||
struct rq *rq;
|
||||
|
@ -1883,7 +1883,8 @@ struct sched_class {
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
|
||||
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
|
||||
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags,
|
||||
int subling_count_hint);
|
||||
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
|
||||
|
||||
void (*task_woken)(struct rq *this_rq, struct task_struct *task);
|
||||
|
@ -12,7 +12,8 @@
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags,
|
||||
int sibling_count_hint)
|
||||
{
|
||||
return task_cpu(p); /* stop tasks as never migrate */
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user