Merge tag 'ASB-2024-05-05_11-5.4' of https://android.googlesource.com/kernel/common into android13-5.4-lahaina
https://source.android.com/docs/security/bulletin/2024-05-01 CVE-2023-4622 * tag 'ASB-2024-05-05_11-5.4' of https://android.googlesource.com/kernel/common: ANDROID: 16K: Fix show maps CFI failure ANDROID: 16K: Handle pad VMA splits and merges ANDROID: 16K: madvise_vma_pad_pages: Remove filemap_fault check ANDROID: 16K: Only madvise padding from dynamic linker context ANDROID: 16K: Separate padding from ELF LOAD segment mappings ANDROID: 16K: Exclude ELF padding for fault around range ANDROID: 16K: Use MADV_DONTNEED to save VMA padding pages. ANDROID: 16K: Introduce ELF padding representation for VMAs ANDROID: 16K: Introduce /sys/kernel/mm/pgsize_miration/enabled ANDROID: GKI: add snd_compr_stop_error to Xiaomi_abi UPSTREAM: netfilter: nf_tables: release mutex after nft_gc_seq_end from abort path UPSTREAM: netfilter: nf_tables: release batch on table validation from abort path UPSTREAM: netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout Conflicts: mm/Makefile mm/mlock.c mm/mprotect.c Change-Id: I559d13f0370fd2ede446df61fd1ce0550fa45155
This commit is contained in:
commit
1cba2b4252
@ -10,6 +10,7 @@
|
|||||||
snd_soc_get_volsw_range
|
snd_soc_get_volsw_range
|
||||||
snd_soc_info_volsw_range
|
snd_soc_info_volsw_range
|
||||||
snd_soc_put_volsw_range
|
snd_soc_put_volsw_range
|
||||||
|
snd_compr_stop_error
|
||||||
|
|
||||||
# required by cs35l45_dlkm.ko
|
# required by cs35l45_dlkm.ko
|
||||||
devm_snd_soc_register_component
|
devm_snd_soc_register_component
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <linux/ptrace.h>
|
#include <linux/ptrace.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
|
#include <linux/pgsize_migration.h>
|
||||||
#include <linux/mempolicy.h>
|
#include <linux/mempolicy.h>
|
||||||
#include <linux/rmap.h>
|
#include <linux/rmap.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
@ -421,7 +422,14 @@ done:
|
|||||||
|
|
||||||
static int show_map(struct seq_file *m, void *v)
|
static int show_map(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
show_map_vma(m, v);
|
struct vm_area_struct *pad_vma = get_pad_vma(v);
|
||||||
|
struct vm_area_struct *vma = get_data_vma(v);
|
||||||
|
|
||||||
|
if (vma_pages(vma))
|
||||||
|
show_map_vma(m, vma);
|
||||||
|
|
||||||
|
show_map_pad_vma(vma, pad_vma, m, show_map_vma, false);
|
||||||
|
|
||||||
m_cache_vma(m, v);
|
m_cache_vma(m, v);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -879,7 +887,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
|
|||||||
seq_puts(m, " kB\n");
|
seq_puts(m, " kB\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static int show_smap(struct seq_file *m, void *v)
|
static void show_smap_vma(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma = v;
|
struct vm_area_struct *vma = v;
|
||||||
struct mem_size_stats mss;
|
struct mem_size_stats mss;
|
||||||
@ -908,9 +916,19 @@ static int show_smap(struct seq_file *m, void *v)
|
|||||||
if (arch_pkeys_enabled())
|
if (arch_pkeys_enabled())
|
||||||
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
|
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
|
||||||
show_smap_vma_flags(m, vma);
|
show_smap_vma_flags(m, vma);
|
||||||
|
}
|
||||||
|
|
||||||
m_cache_vma(m, vma);
|
static int show_smap(struct seq_file *m, void *v)
|
||||||
|
{
|
||||||
|
struct vm_area_struct *pad_vma = get_pad_vma(v);
|
||||||
|
struct vm_area_struct *vma = get_data_vma(v);
|
||||||
|
|
||||||
|
if (vma_pages(vma))
|
||||||
|
show_smap_vma(m, vma);
|
||||||
|
|
||||||
|
show_map_pad_vma(vma, pad_vma, m, show_smap_vma, true);
|
||||||
|
|
||||||
|
m_cache_vma(m, v);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
133
include/linux/pgsize_migration.h
Normal file
133
include/linux/pgsize_migration.h
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _LINUX_PAGE_SIZE_MIGRATION_H
|
||||||
|
#define _LINUX_PAGE_SIZE_MIGRATION_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Page Size Migration
|
||||||
|
*
|
||||||
|
* Copyright (c) 2024, Google LLC.
|
||||||
|
* Author: Kalesh Singh <kaleshsingh@goole.com>
|
||||||
|
*
|
||||||
|
* This file contains the APIs for mitigations to ensure
|
||||||
|
* app compatibility during the transition from 4kB to 16kB
|
||||||
|
* page size in Android.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/sizes.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* vm_flags representation of VMA padding pages.
|
||||||
|
*
|
||||||
|
* This allows the kernel to identify the portion of an ELF LOAD segment VMA
|
||||||
|
* that is padding.
|
||||||
|
*
|
||||||
|
* 4 high bits of vm_flags [63,60] are used to represent ELF segment padding
|
||||||
|
* up to 60kB, which is sufficient for ELFs of both 16kB and 64kB segment
|
||||||
|
* alignment (p_align).
|
||||||
|
*
|
||||||
|
* The representation is illustrated below.
|
||||||
|
*
|
||||||
|
* 63 62 61 60
|
||||||
|
* _________ _________ _________ _________
|
||||||
|
* | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
|
||||||
|
* | of 4kB | of 4kB | of 4kB | of 4kB |
|
||||||
|
* | chunks | chunks | chunks | chunks |
|
||||||
|
* |_________|_________|_________|_________|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define VM_PAD_WIDTH 4
|
||||||
|
#define VM_PAD_SHIFT (BITS_PER_LONG - VM_PAD_WIDTH)
|
||||||
|
#define VM_TOTAL_PAD_PAGES ((1ULL << VM_PAD_WIDTH) - 1)
|
||||||
|
#define VM_PAD_MASK (VM_TOTAL_PAD_PAGES << VM_PAD_SHIFT)
|
||||||
|
#define VMA_PAD_START(vma) (vma->vm_end - (vma_pad_pages(vma) << PAGE_SHIFT))
|
||||||
|
|
||||||
|
#if PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT)
|
||||||
|
extern void vma_set_pad_pages(struct vm_area_struct *vma,
|
||||||
|
unsigned long nr_pages);
|
||||||
|
|
||||||
|
extern unsigned long vma_pad_pages(struct vm_area_struct *vma);
|
||||||
|
|
||||||
|
extern void madvise_vma_pad_pages(struct vm_area_struct *vma,
|
||||||
|
unsigned long start, unsigned long end);
|
||||||
|
|
||||||
|
extern struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma);
|
||||||
|
|
||||||
|
extern struct vm_area_struct *get_data_vma(struct vm_area_struct *vma);
|
||||||
|
|
||||||
|
extern void show_map_pad_vma(struct vm_area_struct *vma,
|
||||||
|
struct vm_area_struct *pad,
|
||||||
|
struct seq_file *m, void *func, bool smaps);
|
||||||
|
|
||||||
|
extern void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
|
||||||
|
unsigned long addr, int new_below);
|
||||||
|
#else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */
|
||||||
|
static inline void vma_set_pad_pages(struct vm_area_struct *vma,
|
||||||
|
unsigned long nr_pages)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned long vma_pad_pages(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void madvise_vma_pad_pages(struct vm_area_struct *vma,
|
||||||
|
unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct vm_area_struct *get_data_vma(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return vma;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void show_map_pad_vma(struct vm_area_struct *vma,
|
||||||
|
struct vm_area_struct *pad,
|
||||||
|
struct seq_file *m, void *func, bool smaps)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
|
||||||
|
unsigned long addr, int new_below)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */
|
||||||
|
|
||||||
|
static inline unsigned long vma_data_pages(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return vma_pages(vma) - vma_pad_pages(vma);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sets the correct padding bits / flags for a VMA split.
|
||||||
|
*/
|
||||||
|
static inline unsigned long vma_pad_fixup_flags(struct vm_area_struct *vma,
|
||||||
|
unsigned long newflags)
|
||||||
|
{
|
||||||
|
if (newflags & VM_PAD_MASK)
|
||||||
|
return (newflags & ~VM_PAD_MASK) | (vma->vm_flags & VM_PAD_MASK);
|
||||||
|
else
|
||||||
|
return newflags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Merging of padding VMAs is uncommon, as padding is only allowed
|
||||||
|
* from the linker context.
|
||||||
|
*
|
||||||
|
* To simplify the semantics, adjacent VMAs with padding are not
|
||||||
|
* allowed to merge.
|
||||||
|
*/
|
||||||
|
static inline bool is_mergable_pad_vma(struct vm_area_struct *vma,
|
||||||
|
unsigned long vm_flags)
|
||||||
|
{
|
||||||
|
/* Padding VMAs cannot be merged with other padding or real VMAs */
|
||||||
|
return !((vma->vm_flags | vm_flags) & VM_PAD_MASK);
|
||||||
|
}
|
||||||
|
#endif /* _LINUX_PAGE_SIZE_MIGRATION_H */
|
@ -42,7 +42,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
|
|||||||
mm_init.o mmu_context.o percpu.o slab_common.o \
|
mm_init.o mmu_context.o percpu.o slab_common.o \
|
||||||
compaction.o vmacache.o \
|
compaction.o vmacache.o \
|
||||||
interval_tree.o list_lru.o workingset.o \
|
interval_tree.o list_lru.o workingset.o \
|
||||||
debug.o gup.o $(mmu-y) showmem.o
|
debug.o gup.o pgsize_migration.o $(mmu-y) showmem.o
|
||||||
|
|
||||||
# Give 'page_alloc' its own module-parameter namespace
|
# Give 'page_alloc' its own module-parameter namespace
|
||||||
page-alloc-y := page_alloc.o
|
page-alloc-y := page_alloc.o
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/mempolicy.h>
|
#include <linux/mempolicy.h>
|
||||||
#include <linux/page-isolation.h>
|
#include <linux/page-isolation.h>
|
||||||
|
#include <linux/pgsize_migration.h>
|
||||||
#include <linux/page_idle.h>
|
#include <linux/page_idle.h>
|
||||||
#include <linux/userfaultfd_k.h>
|
#include <linux/userfaultfd_k.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
@ -773,6 +774,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
|
|||||||
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
|
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
|
||||||
unsigned long start, unsigned long end)
|
unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
|
madvise_vma_pad_pages(vma, start, end);
|
||||||
|
|
||||||
zap_page_range(vma, start, end - start);
|
zap_page_range(vma, start, end - start);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -57,6 +57,7 @@
|
|||||||
#include <linux/delayacct.h>
|
#include <linux/delayacct.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/pfn_t.h>
|
#include <linux/pfn_t.h>
|
||||||
|
#include <linux/pgsize_migration.h>
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
#include <linux/memcontrol.h>
|
#include <linux/memcontrol.h>
|
||||||
#include <linux/mmu_notifier.h>
|
#include <linux/mmu_notifier.h>
|
||||||
@ -3822,7 +3823,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
|
|||||||
end_pgoff = start_pgoff -
|
end_pgoff = start_pgoff -
|
||||||
((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
|
((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
|
||||||
PTRS_PER_PTE - 1;
|
PTRS_PER_PTE - 1;
|
||||||
end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
|
end_pgoff = min3(end_pgoff, vma_data_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
|
||||||
start_pgoff + nr_pages - 1);
|
start_pgoff + nr_pages - 1);
|
||||||
|
|
||||||
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
|
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
#include <linux/swapops.h>
|
#include <linux/swapops.h>
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
|
#include <linux/pgsize_migration.h>
|
||||||
#include <linux/pagevec.h>
|
#include <linux/pagevec.h>
|
||||||
#include <linux/mempolicy.h>
|
#include <linux/mempolicy.h>
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
@ -573,7 +574,7 @@ success:
|
|||||||
*/
|
*/
|
||||||
if (lock) {
|
if (lock) {
|
||||||
vm_write_begin(vma);
|
vm_write_begin(vma);
|
||||||
WRITE_ONCE(vma->vm_flags, newflags);
|
WRITE_ONCE(vma->vm_flags, vma_pad_fixup_flags(vma, newflags));
|
||||||
vm_write_end(vma);
|
vm_write_end(vma);
|
||||||
} else
|
} else
|
||||||
munlock_vma_pages_range(vma, start, end);
|
munlock_vma_pages_range(vma, start, end);
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
|
#include <linux/pgsize_migration.h>
|
||||||
#include <linux/personality.h>
|
#include <linux/personality.h>
|
||||||
#include <linux/security.h>
|
#include <linux/security.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
@ -1082,6 +1083,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
|
|||||||
return 0;
|
return 0;
|
||||||
if (vma_get_anon_name(vma) != anon_name)
|
if (vma_get_anon_name(vma) != anon_name)
|
||||||
return 0;
|
return 0;
|
||||||
|
if (!is_mergable_pad_vma(vma, vm_flags))
|
||||||
|
return 0;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2826,8 +2829,10 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||||||
err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
|
err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
|
||||||
|
|
||||||
/* Success. */
|
/* Success. */
|
||||||
if (!err)
|
if (!err) {
|
||||||
|
split_pad_vma(vma, new, addr, new_below);
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Clean everything up if vma_adjust failed. */
|
/* Clean everything up if vma_adjust failed. */
|
||||||
if (new->vm_ops && new->vm_ops->close)
|
if (new->vm_ops && new->vm_ops->close)
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
#include <linux/security.h>
|
#include <linux/security.h>
|
||||||
#include <linux/mempolicy.h>
|
#include <linux/mempolicy.h>
|
||||||
|
#include <linux/pgsize_migration.h>
|
||||||
#include <linux/personality.h>
|
#include <linux/personality.h>
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
@ -455,7 +456,7 @@ success:
|
|||||||
* held in write mode.
|
* held in write mode.
|
||||||
*/
|
*/
|
||||||
vm_write_begin(vma);
|
vm_write_begin(vma);
|
||||||
WRITE_ONCE(vma->vm_flags, newflags);
|
WRITE_ONCE(vma->vm_flags, vma_pad_fixup_flags(vma, newflags));
|
||||||
dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
|
dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
|
||||||
vma_set_page_prot(vma);
|
vma_set_page_prot(vma);
|
||||||
|
|
||||||
|
400
mm/pgsize_migration.c
Normal file
400
mm/pgsize_migration.c
Normal file
@ -0,0 +1,400 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* Page Size Migration
|
||||||
|
*
|
||||||
|
* This file contains the core logic of mitigations to ensure
|
||||||
|
* app compatibility during the transition from 4kB to 16kB
|
||||||
|
* page size in Android.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2024, Google LLC.
|
||||||
|
* Author: Kalesh Singh <kaleshsingh@goole.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/pgsize_migration.h>
|
||||||
|
|
||||||
|
#include <linux/init.h>
|
||||||
|
#include <linux/jump_label.h>
|
||||||
|
#include <linux/kobject.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/sched/task_stack.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/sysfs.h>
|
||||||
|
|
||||||
|
typedef void (*show_pad_maps_fn) (struct seq_file *m, struct vm_area_struct *vma);
|
||||||
|
typedef void (*show_pad_smaps_fn) (struct seq_file *m, void *v);
|
||||||
|
|
||||||
|
#ifdef CONFIG_64BIT
|
||||||
|
#if PAGE_SIZE == SZ_4K
|
||||||
|
DEFINE_STATIC_KEY_TRUE(pgsize_migration_enabled);
|
||||||
|
|
||||||
|
#define is_pgsize_migration_enabled() (static_branch_likely(&pgsize_migration_enabled))
|
||||||
|
#else /* PAGE_SIZE != SZ_4K */
|
||||||
|
DEFINE_STATIC_KEY_FALSE(pgsize_migration_enabled);
|
||||||
|
|
||||||
|
#define is_pgsize_migration_enabled() (static_branch_unlikely(&pgsize_migration_enabled))
|
||||||
|
#endif /* PAGE_SIZE == SZ_4K */
|
||||||
|
|
||||||
|
static ssize_t show_pgsize_migration_enabled(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
if (is_pgsize_migration_enabled())
|
||||||
|
return sprintf(buf, "%d\n", 1);
|
||||||
|
else
|
||||||
|
return sprintf(buf, "%d\n", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t store_pgsize_migration_enabled(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *attr,
|
||||||
|
const char *buf, size_t n)
|
||||||
|
{
|
||||||
|
unsigned long val;
|
||||||
|
|
||||||
|
/* Migration is only applicable to 4kB kernels */
|
||||||
|
if (PAGE_SIZE != SZ_4K)
|
||||||
|
return n;
|
||||||
|
|
||||||
|
if (kstrtoul(buf, 10, &val))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (val > 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (val == 1)
|
||||||
|
static_branch_enable(&pgsize_migration_enabled);
|
||||||
|
else if (val == 0)
|
||||||
|
static_branch_disable(&pgsize_migration_enabled);
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct kobj_attribute pgsize_migration_enabled_attr = __ATTR(
|
||||||
|
enabled,
|
||||||
|
0644,
|
||||||
|
show_pgsize_migration_enabled,
|
||||||
|
store_pgsize_migration_enabled
|
||||||
|
);
|
||||||
|
|
||||||
|
static struct attribute *pgsize_migration_attrs[] = {
|
||||||
|
&pgsize_migration_enabled_attr.attr,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct attribute_group pgsize_migration_attr_group = {
|
||||||
|
.name = "pgsize_migration",
|
||||||
|
.attrs = pgsize_migration_attrs,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What: /sys/kernel/mm/pgsize_migration/enabled
|
||||||
|
* Date: April 2024
|
||||||
|
* KernelVersion: v5.4+ (GKI kernels)
|
||||||
|
* Contact: Kalesh Singh <kaleshsingh@google.com>
|
||||||
|
* Description: /sys/kernel/mm/pgsize_migration/enabled
|
||||||
|
* allows for userspace to turn on or off page size
|
||||||
|
* migration mitigations necessary for app compatibility
|
||||||
|
* during Android's transition from 4kB to 16kB page size.
|
||||||
|
* Such mitigations include preserving /proc/<pid>/[s]maps
|
||||||
|
* output as if there was no segment extension by the
|
||||||
|
* dynamic loader; and preventing fault around in the padding
|
||||||
|
* sections of ELF LOAD segment mappings.
|
||||||
|
* Users: Bionic's dynamic linker
|
||||||
|
*/
|
||||||
|
static int __init init_pgsize_migration(void)
|
||||||
|
{
|
||||||
|
if (sysfs_create_group(mm_kobj, &pgsize_migration_attr_group))
|
||||||
|
pr_err("pgsize_migration: failed to create sysfs group\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
late_initcall(init_pgsize_migration);
|
||||||
|
|
||||||
|
#if PAGE_SIZE == SZ_4K
|
||||||
|
void vma_set_pad_pages(struct vm_area_struct *vma,
|
||||||
|
unsigned long nr_pages)
|
||||||
|
{
|
||||||
|
if (!is_pgsize_migration_enabled())
|
||||||
|
return;
|
||||||
|
|
||||||
|
vma->vm_flags &= ~VM_PAD_MASK;
|
||||||
|
vma->vm_flags |= (nr_pages << VM_PAD_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long vma_pad_pages(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
if (!is_pgsize_migration_enabled())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return vma->vm_flags >> VM_PAD_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline bool str_has_suffix(const char *str, const char *suffix)
|
||||||
|
{
|
||||||
|
size_t str_len = strlen(str);
|
||||||
|
size_t suffix_len = strlen(suffix);
|
||||||
|
|
||||||
|
if (str_len < suffix_len)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return !strncmp(str + str_len - suffix_len, suffix, suffix_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The dynamic linker, or interpreter, operates within the process context
|
||||||
|
* of the binary that necessitated dynamic linking.
|
||||||
|
*
|
||||||
|
* Consequently, process context identifiers; like PID, comm, ...; cannot
|
||||||
|
* be used to differentiate whether the execution context belongs to the
|
||||||
|
* dynamic linker or not.
|
||||||
|
*
|
||||||
|
* linker_ctx() deduces whether execution is currently in the dynamic linker's
|
||||||
|
* context by correlating the current userspace instruction pointer with the
|
||||||
|
* VMAs of the current task.
|
||||||
|
*
|
||||||
|
* Returns true if in linker context, otherwise false.
|
||||||
|
*
|
||||||
|
* Caller must hold mmap lock in read mode.
|
||||||
|
*/
|
||||||
|
static inline bool linker_ctx(void)
|
||||||
|
{
|
||||||
|
struct pt_regs *regs = task_pt_regs(current);
|
||||||
|
struct mm_struct *mm = current->mm;
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
struct file *file;
|
||||||
|
|
||||||
|
if (!regs)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
vma = find_vma(mm, instruction_pointer(regs));
|
||||||
|
|
||||||
|
/* Current execution context, the VMA must be present */
|
||||||
|
BUG_ON(!vma);
|
||||||
|
|
||||||
|
file = vma->vm_file;
|
||||||
|
if (!file)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if ((vma->vm_flags & VM_EXEC)) {
|
||||||
|
char buf[64];
|
||||||
|
const int bufsize = sizeof(buf);
|
||||||
|
char *path;
|
||||||
|
|
||||||
|
memset(buf, 0, bufsize);
|
||||||
|
path = d_path(&file->f_path, buf, bufsize);
|
||||||
|
|
||||||
|
if (!strcmp(path, "/system/bin/linker64"))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Saves the number of padding pages for an ELF segment mapping
|
||||||
|
* in vm_flags.
|
||||||
|
*
|
||||||
|
* The number of padding pages is deduced from the madvise DONTNEED range [start, end)
|
||||||
|
* if the following conditions are met:
|
||||||
|
* 1) The range is enclosed by a single VMA
|
||||||
|
* 2) The range ends at the end address of the VMA
|
||||||
|
* 3) The range starts at an address greater than the start address of the VMA
|
||||||
|
* 4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES.
|
||||||
|
* 5) The VMA is a file backed VMA.
|
||||||
|
* 6) The file backing the VMA is a shared library (*.so)
|
||||||
|
* 7) The madvise was requested by bionic's dynamic linker.
|
||||||
|
*/
|
||||||
|
void madvise_vma_pad_pages(struct vm_area_struct *vma,
|
||||||
|
unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
unsigned long nr_pad_pages;
|
||||||
|
|
||||||
|
if (!is_pgsize_migration_enabled())
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the madvise range is it at the end of the file save the number of
|
||||||
|
* pages in vm_flags (only need 4 bits are needed for up to 64kB aligned ELFs).
|
||||||
|
*/
|
||||||
|
if (start <= vma->vm_start || end != vma->vm_end)
|
||||||
|
return;
|
||||||
|
|
||||||
|
nr_pad_pages = (end - start) >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
if (!nr_pad_pages || nr_pad_pages > VM_TOTAL_PAD_PAGES)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Only handle this for file backed VMAs */
|
||||||
|
if (!vma->vm_file)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Limit this to only shared libraries (*.so) */
|
||||||
|
if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Only bionic's dynamic linker needs to hint padding pages. */
|
||||||
|
if (!linker_ctx())
|
||||||
|
return;
|
||||||
|
|
||||||
|
vma_set_pad_pages(vma, nr_pad_pages);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *pad_vma_name(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return "[page size compat]";
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct vm_operations_struct pad_vma_ops = {
|
||||||
|
.name = pad_vma_name,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns a new VMA representing the padding in @vma, if no padding
|
||||||
|
* in @vma returns NULL.
|
||||||
|
*/
|
||||||
|
struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
struct vm_area_struct *pad;
|
||||||
|
|
||||||
|
if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
pad = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
|
||||||
|
|
||||||
|
*pad = *vma;
|
||||||
|
|
||||||
|
/* Remove file */
|
||||||
|
pad->vm_file = NULL;
|
||||||
|
|
||||||
|
/* Add vm_ops->name */
|
||||||
|
pad->vm_ops = &pad_vma_ops;
|
||||||
|
|
||||||
|
/* Adjust the start to begin at the start of the padding section */
|
||||||
|
pad->vm_start = VMA_PAD_START(pad);
|
||||||
|
|
||||||
|
/* Make the pad vma PROT_NONE */
|
||||||
|
pad->vm_flags &= ~(VM_READ|VM_WRITE|VM_EXEC);
|
||||||
|
|
||||||
|
/* Remove padding bits */
|
||||||
|
pad->vm_flags &= ~VM_PAD_MASK;
|
||||||
|
|
||||||
|
return pad;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns a new VMA exclusing the padding from @vma; if no padding in
|
||||||
|
* @vma returns @vma.
|
||||||
|
*/
|
||||||
|
struct vm_area_struct *get_data_vma(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
struct vm_area_struct *data;
|
||||||
|
|
||||||
|
if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
|
||||||
|
return vma;
|
||||||
|
|
||||||
|
data = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
|
||||||
|
|
||||||
|
*data = *vma;
|
||||||
|
|
||||||
|
/* Adjust the end to the start of the padding section */
|
||||||
|
data->vm_end = VMA_PAD_START(data);
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calls the show_pad_vma_fn on the @pad VMA, and frees the copies of @vma
|
||||||
|
* and @pad.
|
||||||
|
*/
|
||||||
|
void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad,
|
||||||
|
struct seq_file *m, void *func, bool smaps)
|
||||||
|
{
|
||||||
|
if (!pad)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This cannot happen. If @pad vma was allocated the corresponding
|
||||||
|
* @vma should have the VM_PAD_MASK bit(s) set.
|
||||||
|
*/
|
||||||
|
BUG_ON(!(vma->vm_flags & VM_PAD_MASK));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This cannot happen. @pad is a section of the original VMA.
|
||||||
|
* Therefore @vma cannot be null if @pad is not null.
|
||||||
|
*/
|
||||||
|
BUG_ON(!vma);
|
||||||
|
|
||||||
|
if (smaps)
|
||||||
|
((show_pad_smaps_fn)func)(m, pad);
|
||||||
|
else
|
||||||
|
((show_pad_maps_fn)func)(m, pad);
|
||||||
|
|
||||||
|
kfree(pad);
|
||||||
|
kfree(vma);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When splitting a padding VMA there are a couple of cases to handle.
|
||||||
|
*
|
||||||
|
* Given:
|
||||||
|
*
|
||||||
|
* | DDDDPPPP |
|
||||||
|
*
|
||||||
|
* where:
|
||||||
|
* - D represents 1 page of data;
|
||||||
|
* - P represents 1 page of padding;
|
||||||
|
* - | represents the boundaries (start/end) of the VMA
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* 1) Split exactly at the padding boundary
|
||||||
|
*
|
||||||
|
* | DDDDPPPP | --> | DDDD | PPPP |
|
||||||
|
*
|
||||||
|
* - Remove padding flags from the first VMA.
|
||||||
|
* - The second VMA is all padding
|
||||||
|
*
|
||||||
|
* 2) Split within the padding area
|
||||||
|
*
|
||||||
|
* | DDDDPPPP | --> | DDDDPP | PP |
|
||||||
|
*
|
||||||
|
* - Subtract the length of the second VMA from the first VMA's padding.
|
||||||
|
* - The second VMA is all padding, adjust its padding length (flags)
|
||||||
|
*
|
||||||
|
* 3) Split within the data area
|
||||||
|
*
|
||||||
|
* | DDDDPPPP | --> | DD | DDPPPP |
|
||||||
|
*
|
||||||
|
* - Remove padding flags from the first VMA.
|
||||||
|
* - The second VMA is has the same padding as from before the split.
|
||||||
|
*/
|
||||||
|
void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
|
||||||
|
unsigned long addr, int new_below)
|
||||||
|
{
|
||||||
|
unsigned long nr_pad_pages = vma_pad_pages(vma);
|
||||||
|
unsigned long nr_vma2_pages;
|
||||||
|
struct vm_area_struct *first;
|
||||||
|
struct vm_area_struct *second;
|
||||||
|
|
||||||
|
if (!nr_pad_pages)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (new_below) {
|
||||||
|
first = new;
|
||||||
|
second = vma;
|
||||||
|
} else {
|
||||||
|
first = vma;
|
||||||
|
second = new;
|
||||||
|
}
|
||||||
|
|
||||||
|
nr_vma2_pages = vma_pages(second);
|
||||||
|
|
||||||
|
if (nr_vma2_pages >= nr_pad_pages) { /* Case 1 & 3 */
|
||||||
|
first->vm_flags &= ~VM_PAD_MASK;
|
||||||
|
vma_set_pad_pages(second, nr_pad_pages);
|
||||||
|
} else { /* Case 2 */
|
||||||
|
vma_set_pad_pages(first, nr_pad_pages - nr_vma2_pages);
|
||||||
|
vma_set_pad_pages(second, nr_vma2_pages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* PAGE_SIZE == SZ_4K */
|
||||||
|
#endif /* CONFIG_64BIT */
|
@ -4068,6 +4068,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
|
|||||||
|
|
||||||
if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
|
if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
|
||||||
list_del_rcu(&set->list);
|
list_del_rcu(&set->list);
|
||||||
|
set->dead = 1;
|
||||||
if (event)
|
if (event)
|
||||||
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
|
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
@ -7530,10 +7531,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
|
|||||||
struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
|
struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
|
||||||
struct nft_trans *trans, *next;
|
struct nft_trans *trans, *next;
|
||||||
struct nft_trans_elem *te;
|
struct nft_trans_elem *te;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
if (action == NFNL_ABORT_VALIDATE &&
|
if (action == NFNL_ABORT_VALIDATE &&
|
||||||
nf_tables_validate(net) < 0)
|
nf_tables_validate(net) < 0)
|
||||||
return -EAGAIN;
|
err = -EAGAIN;
|
||||||
|
|
||||||
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
|
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
|
||||||
list) {
|
list) {
|
||||||
@ -7661,12 +7663,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
|
|||||||
nf_tables_abort_release(trans);
|
nf_tables_abort_release(trans);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (action == NFNL_ABORT_AUTOLOAD)
|
return err;
|
||||||
nf_tables_module_autoload(net);
|
|
||||||
else
|
|
||||||
nf_tables_module_autoload_cleanup(net);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nf_tables_cleanup(struct net *net)
|
static void nf_tables_cleanup(struct net *net)
|
||||||
@ -7685,6 +7682,16 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
|
|||||||
ret = __nf_tables_abort(net, action);
|
ret = __nf_tables_abort(net, action);
|
||||||
nft_gc_seq_end(nft_net, gc_seq);
|
nft_gc_seq_end(nft_net, gc_seq);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
|
||||||
|
|
||||||
|
/* module autoload needs to happen after GC sequence update because it
|
||||||
|
* temporarily releases and grabs mutex again.
|
||||||
|
*/
|
||||||
|
if (action == NFNL_ABORT_AUTOLOAD)
|
||||||
|
nf_tables_module_autoload(net);
|
||||||
|
else
|
||||||
|
nf_tables_module_autoload_cleanup(net);
|
||||||
|
|
||||||
mutex_unlock(&nft_net->commit_mutex);
|
mutex_unlock(&nft_net->commit_mutex);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -8376,8 +8383,11 @@ static void __net_exit nf_tables_exit_net(struct net *net)
|
|||||||
|
|
||||||
gc_seq = nft_gc_seq_begin(nft_net);
|
gc_seq = nft_gc_seq_begin(nft_net);
|
||||||
|
|
||||||
if (!list_empty(&nft_net->commit_list))
|
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
|
||||||
__nf_tables_abort(net, NFNL_ABORT_NONE);
|
|
||||||
|
if (!list_empty(&nft_net->module_list))
|
||||||
|
nf_tables_module_autoload_cleanup(net);
|
||||||
|
|
||||||
__nft_release_tables(net);
|
__nft_release_tables(net);
|
||||||
|
|
||||||
nft_gc_seq_end(nft_net, gc_seq);
|
nft_gc_seq_end(nft_net, gc_seq);
|
||||||
|
Loading…
Reference in New Issue
Block a user