b56a2d8af9
This patch was initially posted by Kelley Nielsen. Reposting the patch with all review comments addressed and with minor modifications and optimizations. Also, folding in the fixes offered by Hugh Dickins and Huang Ying. Tests were rerun and commit message updated with new results. try_to_unuse() is of quadratic complexity, with a lot of wasted effort. It unuses swap entries one by one, potentially iterating over all the page tables for all the processes in the system for each one. This new proposed implementation of try_to_unuse simplifies its complexity to linear. It iterates over the system's mms once, unusing all the affected entries as it walks each set of page tables. It also makes similar changes to shmem_unuse. Improvement swapoff was called on a swap partition containing about 6G of data, in a VM(8cpu, 16G RAM), and calls to unuse_pte_range() were counted. Present implementation....about 1200M calls(8min, avg 80% cpu util). Prototype.................about 9.0K calls(3min, avg 5% cpu util). Details In shmem_unuse(), iterate over the shmem_swaplist and, for each shmem_inode_info that contains a swap entry, pass it to shmem_unuse_inode(), along with the swap type. In shmem_unuse_inode(), iterate over its associated xarray, and store the index and value of each swap entry in an array for passing to shmem_swapin_page() outside of the RCU critical section. In try_to_unuse(), instead of iterating over the entries in the type and unusing them one by one, perhaps walking all the page tables for all the processes for each one, iterate over the mmlist, making one pass. Pass each mm to unuse_mm() to begin its page table walk, and during the walk, unuse all the ptes that have backing store in the swap type received by try_to_unuse(). After the walk, check the type for orphaned swap entries with find_next_to_unuse(), and remove them from the swap cache. If find_next_to_unuse() starts over at the beginning of the type, repeat the check of the shmem_swaplist and the walk a maximum of three times. Change unuse_mm() and the intervening walk functions down to unuse_pte_range() to take the type as a parameter, and to iterate over their entire range, calling the next function down on every iteration. In unuse_pte_range(), make a swap entry from each pte in the range using the passed in type. If it has backing store in the type, call swapin_readahead() to retrieve the page and pass it to unuse_pte(). Pass the count of pages_to_unuse down the page table walks in try_to_unuse(), and return from the walk when the desired number of pages has been swapped back in. Link: http://lkml.kernel.org/r/20190114153129.4852-2-vpillai@digitalocean.com Signed-off-by: Vineeth Remanan Pillai <vpillai@digitalocean.com> Signed-off-by: Kelley Nielsen <kelleynnn@gmail.com> Signed-off-by: Huang Ying <ying.huang@intel.com> Acked-by: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@surriel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
123 lines
3.0 KiB
C
123 lines
3.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_FRONTSWAP_H
|
|
#define _LINUX_FRONTSWAP_H
|
|
|
|
#include <linux/swap.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/jump_label.h>
|
|
|
|
/*
|
|
* Return code to denote that requested number of
|
|
* frontswap pages are unused(moved to page cache).
|
|
* Used in in shmem_unuse and try_to_unuse.
|
|
*/
|
|
#define FRONTSWAP_PAGES_UNUSED 2
|
|
|
|
struct frontswap_ops {
|
|
void (*init)(unsigned); /* this swap type was just swapon'ed */
|
|
int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
|
|
int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
|
|
void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
|
|
void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
|
|
struct frontswap_ops *next; /* private pointer to next ops */
|
|
};
|
|
|
|
extern void frontswap_register_ops(struct frontswap_ops *ops);
|
|
extern void frontswap_shrink(unsigned long);
|
|
extern unsigned long frontswap_curr_pages(void);
|
|
extern void frontswap_writethrough(bool);
|
|
#define FRONTSWAP_HAS_EXCLUSIVE_GETS
|
|
extern void frontswap_tmem_exclusive_gets(bool);
|
|
|
|
extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
|
|
extern void __frontswap_init(unsigned type, unsigned long *map);
|
|
extern int __frontswap_store(struct page *page);
|
|
extern int __frontswap_load(struct page *page);
|
|
extern void __frontswap_invalidate_page(unsigned, pgoff_t);
|
|
extern void __frontswap_invalidate_area(unsigned);
|
|
|
|
#ifdef CONFIG_FRONTSWAP
|
|
extern struct static_key_false frontswap_enabled_key;
|
|
|
|
static inline bool frontswap_enabled(void)
|
|
{
|
|
return static_branch_unlikely(&frontswap_enabled_key);
|
|
}
|
|
|
|
static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
|
|
{
|
|
return __frontswap_test(sis, offset);
|
|
}
|
|
|
|
static inline void frontswap_map_set(struct swap_info_struct *p,
|
|
unsigned long *map)
|
|
{
|
|
p->frontswap_map = map;
|
|
}
|
|
|
|
static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
|
|
{
|
|
return p->frontswap_map;
|
|
}
|
|
#else
|
|
/* all inline routines become no-ops and all externs are ignored */
|
|
|
|
static inline bool frontswap_enabled(void)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline void frontswap_map_set(struct swap_info_struct *p,
|
|
unsigned long *map)
|
|
{
|
|
}
|
|
|
|
static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
|
|
{
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
static inline int frontswap_store(struct page *page)
|
|
{
|
|
if (frontswap_enabled())
|
|
return __frontswap_store(page);
|
|
|
|
return -1;
|
|
}
|
|
|
|
static inline int frontswap_load(struct page *page)
|
|
{
|
|
if (frontswap_enabled())
|
|
return __frontswap_load(page);
|
|
|
|
return -1;
|
|
}
|
|
|
|
static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset)
|
|
{
|
|
if (frontswap_enabled())
|
|
__frontswap_invalidate_page(type, offset);
|
|
}
|
|
|
|
static inline void frontswap_invalidate_area(unsigned type)
|
|
{
|
|
if (frontswap_enabled())
|
|
__frontswap_invalidate_area(type);
|
|
}
|
|
|
|
static inline void frontswap_init(unsigned type, unsigned long *map)
|
|
{
|
|
#ifdef CONFIG_FRONTSWAP
|
|
__frontswap_init(type, map);
|
|
#endif
|
|
}
|
|
|
|
#endif /* _LINUX_FRONTSWAP_H */
|