From 372242b532a5437275192476422cd7169dab9010 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Tue, 1 May 2018 20:20:20 +0530 Subject: [PATCH] mm: oom_kill: reap memory of a task that receives SIGKILL Free the pages parallely for a task that receives SIGKILL using the oom_reaper. This freeing of pages will help to give the pages to buddy system well advance. This reaps for the process which received SIGKILL through either sys_kill from user or kill_pid from kernel and that sending process has CAP_KILL capability. Also sysctl interface, reap_mem_on_sigkill, is added to turn on/off this feature. Change-Id: I21adb95de5e380a80d7eb0b87d9b5b553f52e28a Signed-off-by: Charan Teja Reddy [swatsrid@codeaurora.org: Fix merge conflicts] Signed-off-by: Swathi Sridhar [isaacm@codeaurora.org: Fix merge conflicts] Signed-off-by: Isaac J. Manjarres --- Documentation/admin-guide/sysctl/vm.rst | 17 +++++++++ include/linux/oom.h | 4 ++ kernel/signal.c | 6 ++- kernel/sysctl.c | 7 ++++ mm/oom_kill.c | 49 +++++++++++++++++++++---- 5 files changed, 74 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 9e8470008227..94b5d0878b7a 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -57,6 +57,7 @@ Currently, these files are in /proc/sys/vm: - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks +- reap_mem_on_sigkill - oom_kill_allocating_task - overcommit_kbytes - overcommit_memory @@ -669,6 +670,22 @@ OOM killer actually kills a memory-hogging task. The default value is 1 (enabled). +reap_mem_on_sigkill +=================== + +This enables or disables the memory reaping for a SIGKILL received +process and that the sending process must have the CAP_KILL capabilities. + +If this is set to 1, when a process receives SIGKILL from a process +that has the capability, CAP_KILL, the process is added into the oom_reaper +queue which can be picked up by the oom_reaper thread to reap the memory of +that process. This reaps for the process which received SIGKILL through +either sys_kill from user or kill_pid from kernel. + +If this is set to 0, we are not reaping memory of a SIGKILL, sent through +either sys_kill from user or kill_pid from kernel, received process. + +The default value is 0 (disabled). oom_kill_allocating_task ======================== diff --git a/include/linux/oom.h b/include/linux/oom.h index c696c265f019..40862dd4f223 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -126,4 +126,8 @@ extern struct task_struct *find_lock_task_mm(struct task_struct *p); extern int sysctl_oom_dump_tasks; extern int sysctl_oom_kill_allocating_task; extern int sysctl_panic_on_oom; +extern int sysctl_reap_mem_on_sigkill; + +/* calls for LMK reaper */ +extern void add_to_oom_reaper(struct task_struct *p); #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/kernel/signal.c b/kernel/signal.c index c4da1ef56fdf..98b7f11b9132 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -46,6 +46,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -1404,8 +1405,11 @@ int group_send_sig_info(int sig, struct kernel_siginfo *info, ret = check_kill_permission(sig, info, p); rcu_read_unlock(); - if (!ret && sig) + if (!ret && sig) { ret = do_send_sig_info(sig, info, p, type); + if (capable(CAP_KILL) && sig == SIGKILL) + add_to_oom_reaper(p); + } return ret; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f5ee2d6dc1c1..68869fba5130 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1327,6 +1327,13 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "reap_mem_on_sigkill", + .data = &sysctl_reap_mem_on_sigkill, + .maxlen = sizeof(sysctl_reap_mem_on_sigkill), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "overcommit_ratio", .data = &sysctl_overcommit_ratio, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3d24305f758f..6993791a517b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -54,6 +54,7 @@ int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks = 1; +int sysctl_reap_mem_on_sigkill; /* * Serializes oom killer invocations (out_of_memory()) from all contexts to @@ -661,13 +662,22 @@ static int oom_reaper(void *unused) static void wake_oom_reaper(struct task_struct *tsk) { + /* + * Move the lock here to avoid scenario of queuing + * the same task by both OOM killer and any other SIGKILL + * path. + */ + spin_lock(&oom_reaper_lock); + /* mm is already queued? */ - if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, + &tsk->signal->oom_mm->flags)) { + spin_unlock(&oom_reaper_lock); return; + } get_task_struct(tsk); - spin_lock(&oom_reaper_lock); tsk->oom_reaper_list = oom_reaper_list; oom_reaper_list = tsk; spin_unlock(&oom_reaper_lock); @@ -687,6 +697,16 @@ static inline void wake_oom_reaper(struct task_struct *tsk) } #endif /* CONFIG_MMU */ +static void __mark_oom_victim(struct task_struct *tsk) +{ + struct mm_struct *mm = tsk->mm; + + if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { + mmgrab(tsk->signal->oom_mm); + set_bit(MMF_OOM_VICTIM, &mm->flags); + } +} + /** * mark_oom_victim - mark the given task as OOM victim * @tsk: task to mark @@ -699,18 +719,13 @@ static inline void wake_oom_reaper(struct task_struct *tsk) */ static void mark_oom_victim(struct task_struct *tsk) { - struct mm_struct *mm = tsk->mm; - WARN_ON(oom_killer_disabled); /* OOM killer might race with memcg OOM */ if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) return; /* oom_mm is bound to the signal struct life time. */ - if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { - mmgrab(tsk->signal->oom_mm); - set_bit(MMF_OOM_VICTIM, &mm->flags); - } + __mark_oom_victim(tsk); /* * Make sure that the task is woken up from uninterruptible sleep @@ -1140,3 +1155,21 @@ void pagefault_out_of_memory(void) out_of_memory(&oc); mutex_unlock(&oom_lock); } + +void add_to_oom_reaper(struct task_struct *p) +{ + if (!sysctl_reap_mem_on_sigkill) + return; + + p = find_lock_task_mm(p); + if (!p) + return; + + get_task_struct(p); + if (task_will_free_mem(p)) { + __mark_oom_victim(p); + wake_oom_reaper(p); + } + task_unlock(p); + put_task_struct(p); +}