mm: make faultaround produce old ptes
Based on Kirill's patch [1]. Currently, faultaround code produces young pte. This can screw up vmscan behaviour[2], as it makes vmscan think that these pages are hot and not push them out on first round. During sparse file access faultaround gets more pages mapped and all of them are young. Under memory pressure, this makes vmscan swap out anon pages instead, or to drop other page cache pages which otherwise stay resident. Modify faultaround to produce old ptes if sysctl 'want_old_faultaround_pte' is set, so they can easily be reclaimed under memory pressure. This can to some extend defeat the purpose of faultaround on machines without hardware accessed bit as it will not help us with reducing the number of minor page faults. Making the faultaround ptes old results in a unixbench regression for some architectures [3][4]. But on some architectures like arm64 it is not found to cause any regression. unixbench shell8 scores on arm64 v8.2 hardware with CONFIG_ARM64_HW_AFDBM enabled (5 runs min, max, avg): Base: (741,748,744) With this patch: (739,748,743) So by default produce young ptes and provide a sysctl option to make the ptes old. [1] https://marc.info/?l=linux-mm&m=146348837703148 [2] https://lkml.org/lkml/2016/4/18/612 [3] https://marc.info/?l=linux-kernel&m=146582237922378&w=2 [4] https://marc.info/?l=linux-mm&m=146589376909424&w=2 Change-Id: I193185cc953bc33a44fc24963a9df9e555906d95 Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Patch-mainline: linux-mm @ Fri, 19 Jan 2018 17:24:54 [vinmenon@codeaurora.org: enable by default since arm works well with old fault_around ptes + edit the links in commit message to fix checkpatch issues] Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org> [swatsrid@codeaurora.org: Fix merge conflicts] Signed-off-by: Swathi Sridhar <swatsrid@codeaurora.org> Signed-off-by: Chris Goldsworthy <cgoldswo@codeaurora.org>
This commit is contained in:
parent
c5d20367bc
commit
39c6b7d296
@ -75,7 +75,7 @@ Currently, these files are in /proc/sys/vm:
|
|||||||
- watermark_boost_factor
|
- watermark_boost_factor
|
||||||
- watermark_scale_factor
|
- watermark_scale_factor
|
||||||
- zone_reclaim_mode
|
- zone_reclaim_mode
|
||||||
|
- want_old_faultaround_pte
|
||||||
|
|
||||||
admin_reserve_kbytes
|
admin_reserve_kbytes
|
||||||
====================
|
====================
|
||||||
@ -995,3 +995,24 @@ of other processes running on other nodes will not be affected.
|
|||||||
Allowing regular swap effectively restricts allocations to the local
|
Allowing regular swap effectively restricts allocations to the local
|
||||||
node unless explicitly overridden by memory policies or cpuset
|
node unless explicitly overridden by memory policies or cpuset
|
||||||
configurations.
|
configurations.
|
||||||
|
|
||||||
|
|
||||||
|
want_old_faultaround_pte:
|
||||||
|
=========================
|
||||||
|
|
||||||
|
By default faultaround code produces young pte. When want_old_faultaround_pte is
|
||||||
|
set to 1, faultaround produces old ptes.
|
||||||
|
|
||||||
|
During sparse file access faultaround gets more pages mapped and when all of
|
||||||
|
them are young (default), under memory pressure, this makes vmscan swap out anon
|
||||||
|
pages instead, or to drop other page cache pages which otherwise stay resident.
|
||||||
|
Setting want_old_faultaround_pte to 1 avoids this.
|
||||||
|
|
||||||
|
Making the faultaround ptes old can result in performance regression on some
|
||||||
|
architectures. This is due to cycles spent in micro-faults which would take page
|
||||||
|
walk to set young bit in the pte. One such known test that shows a regression on
|
||||||
|
x86 is unixbench shell8. Set want_old_faultaround_pte to 1 on architectures
|
||||||
|
which does not show this regression or if the workload shows overall performance
|
||||||
|
benefit with old faultaround ptes.
|
||||||
|
|
||||||
|
The default value is 0.
|
||||||
|
@ -392,6 +392,7 @@ extern pgprot_t protection_map[16];
|
|||||||
#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
|
#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
|
||||||
#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
|
#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
|
||||||
#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */
|
#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */
|
||||||
|
#define FAULT_FLAG_PREFAULT_OLD 0x400 /* Make faultaround ptes old */
|
||||||
|
|
||||||
#define FAULT_FLAG_TRACE \
|
#define FAULT_FLAG_TRACE \
|
||||||
{ FAULT_FLAG_WRITE, "WRITE" }, \
|
{ FAULT_FLAG_WRITE, "WRITE" }, \
|
||||||
@ -2882,5 +2883,7 @@ static inline int pages_identical(struct page *page1, struct page *page2)
|
|||||||
return !memcmp_pages(page1, page2);
|
return !memcmp_pages(page1, page2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int want_old_faultaround_pte;
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
#endif /* _LINUX_MM_H */
|
#endif /* _LINUX_MM_H */
|
||||||
|
@ -1671,6 +1671,15 @@ static struct ctl_table vm_table[] = {
|
|||||||
.extra1 = SYSCTL_ZERO,
|
.extra1 = SYSCTL_ZERO,
|
||||||
.extra2 = &one_hundred,
|
.extra2 = &one_hundred,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "want_old_faultaround_pte",
|
||||||
|
.data = &want_old_faultaround_pte,
|
||||||
|
.maxlen = sizeof(want_old_faultaround_pte),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec_minmax,
|
||||||
|
.extra1 = SYSCTL_ZERO,
|
||||||
|
.extra2 = SYSCTL_ONE,
|
||||||
|
},
|
||||||
#ifdef CONFIG_HUGETLB_PAGE
|
#ifdef CONFIG_HUGETLB_PAGE
|
||||||
{
|
{
|
||||||
.procname = "nr_hugepages",
|
.procname = "nr_hugepages",
|
||||||
|
10
mm/filemap.c
10
mm/filemap.c
@ -53,6 +53,8 @@
|
|||||||
|
|
||||||
#include <asm/mman.h>
|
#include <asm/mman.h>
|
||||||
|
|
||||||
|
int want_old_faultaround_pte = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Shared mappings implemented 30.11.1994. It's not fully working yet,
|
* Shared mappings implemented 30.11.1994. It's not fully working yet,
|
||||||
* though.
|
* though.
|
||||||
@ -2685,6 +2687,14 @@ void filemap_map_pages(struct vm_fault *vmf,
|
|||||||
if (vmf->pte)
|
if (vmf->pte)
|
||||||
vmf->pte += xas.xa_index - last_pgoff;
|
vmf->pte += xas.xa_index - last_pgoff;
|
||||||
last_pgoff = xas.xa_index;
|
last_pgoff = xas.xa_index;
|
||||||
|
|
||||||
|
if (want_old_faultaround_pte) {
|
||||||
|
if (xas.xa_index == vmf->pgoff)
|
||||||
|
vmf->flags &= ~FAULT_FLAG_PREFAULT_OLD;
|
||||||
|
else
|
||||||
|
vmf->flags |= FAULT_FLAG_PREFAULT_OLD;
|
||||||
|
}
|
||||||
|
|
||||||
if (alloc_set_pte(vmf, NULL, page))
|
if (alloc_set_pte(vmf, NULL, page))
|
||||||
goto unlock;
|
goto unlock;
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
|
@ -3314,6 +3314,10 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
|
|||||||
entry = mk_pte(page, vma->vm_page_prot);
|
entry = mk_pte(page, vma->vm_page_prot);
|
||||||
if (write)
|
if (write)
|
||||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
||||||
|
|
||||||
|
if (vmf->flags & FAULT_FLAG_PREFAULT_OLD)
|
||||||
|
entry = pte_mkold(entry);
|
||||||
|
|
||||||
/* copy-on-write page */
|
/* copy-on-write page */
|
||||||
if (write && !(vma->vm_flags & VM_SHARED)) {
|
if (write && !(vma->vm_flags & VM_SHARED)) {
|
||||||
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
||||||
|
Loading…
Reference in New Issue
Block a user