ANDROID: Re-enable fast mremap and fix UAF with SPF

SPF attempts page faults without taking the mmap lock, but takes the
PTL. If there is a concurrent fast mremap (at PMD/PUD level), this
can lead to a UAF as fast mremap will only take the PTL locks at the
PMD/PUD level. SPF cannot take the PTL locks at the larger subtree
granularity since this introduces much contention in the page fault
paths.

To address the race:
  1) Only try fast mremaps if there are no users of the VMA. Android
     is concerned with this optimization in the context of
     GC stop-the-world pause. So there are no other threads active
     and this should almost always succeed.
  2) Speculative faults detect ongoing fast mremaps and fallback
     to conventional fault handling (taking mmap read lock).

Bug: 263177905
Change-Id: I23917e493ddc8576de19883cac053dfde9982b7f
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
Git-commit: 529351c4c8202aa7f5bc4a8a100e583a70ab6110
Git-repo: https://android.googlesource.com/kernel/common/
[quic_c_spathi@quicinc.com: resolve merge conflicts]
Signed-off-by: Srinivasarao Pathipati <quic_c_spathi@quicinc.com>
This commit is contained in:
Kalesh Singh 2022-12-19 21:07:49 -08:00 committed by Srinivasarao Pathipati
parent 3aa1fadec5
commit ea5f9d7e7e
2 changed files with 56 additions and 5 deletions

View File

@ -2344,8 +2344,22 @@ struct vm_area_struct *get_vma(struct mm_struct *mm, unsigned long addr)
read_lock(&mm->mm_rb_lock); read_lock(&mm->mm_rb_lock);
vma = __find_vma(mm, addr); vma = __find_vma(mm, addr);
if (vma)
atomic_inc(&vma->vm_ref_count); /*
* If there is a concurrent fast mremap, bail out since the entire
* PMD/PUD subtree may have been remapped.
*
* This is usually safe for conventional mremap since it takes the
* PTE locks as does SPF. However fast mremap only takes the lock
* at the PMD/PUD level which is ok as it is done with the mmap
* write lock held. But since SPF, as the term implies forgoes,
* taking the mmap read lock and also cannot take PTL lock at the
* larger PMD/PUD granualrity, since it would introduce huge
* contention in the page fault path; fall back to regular fault
* handling.
*/
if (vma && !atomic_inc_unless_negative(&vma->vm_ref_count))
vma = NULL;
read_unlock(&mm->mm_rb_lock); read_unlock(&mm->mm_rb_lock);
return vma; return vma;

View File

@ -191,11 +191,39 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
drop_rmap_locks(vma); drop_rmap_locks(vma);
} }
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
static inline bool trylock_vma_ref_count(struct vm_area_struct *vma)
{
/* /*
* Speculative page fault handlers will not detect page table changes done * If we have the only reference, swap the refcount to -1. This
* without ptl locking. * will prevent other concurrent references by get_vma() for SPFs.
*/ */
#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) return atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1;
}
/*
* Restore the VMA reference count to 1 after a fast mremap.
*/
static inline void unlock_vma_ref_count(struct vm_area_struct *vma)
{
/*
* This should only be called after a corresponding,
* successful trylock_vma_ref_count().
*/
VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1,
vma);
}
#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */
static inline bool trylock_vma_ref_count(struct vm_area_struct *vma)
{
return true;
}
static inline void unlock_vma_ref_count(struct vm_area_struct *vma)
{
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
#ifdef CONFIG_HAVE_MOVE_PMD
static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end, unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd) pmd_t *old_pmd, pmd_t *new_pmd)
@ -215,6 +243,14 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON(!pmd_none(*new_pmd))) if (WARN_ON(!pmd_none(*new_pmd)))
return false; return false;
/*
* We hold both exclusive mmap_lock and rmap_lock at this point and
* cannot block. If we cannot immediately take exclusive ownership
* of the VMA fallback to the move_ptes().
*/
if (!trylock_vma_ref_count(vma))
return false;
/* /*
* We don't have to worry about the ordering of src and dst * We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_sem prevents deadlock. * ptlocks because exclusive mmap_sem prevents deadlock.
@ -237,6 +273,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
spin_unlock(new_ptl); spin_unlock(new_ptl);
spin_unlock(old_ptl); spin_unlock(old_ptl);
unlock_vma_ref_count(vma);
return true; return true;
} }
#endif #endif