4037d45220
Currently the slab allocators contain callbacks into the page allocator to perform the draining of pagesets on remote nodes. This requires SLUB to have a whole subsystem in order to be compatible with SLAB. Moving node draining out of the slab allocators avoids a section of code in SLUB. Move the node draining so that is is done when the vm statistics are updated. At that point we are already touching all the cachelines with the pagesets of a processor. Add a expire counter there. If we have to update per zone or global vm statistics then assume that the pageset will require subsequent draining. The expire counter will be decremented on each vm stats update pass until it reaches zero. Then we will drain one batch from the pageset. The draining will cause vm counter updates which will then cause another expiration until the pcp is empty. So we will drain a batch every 3 seconds. Note that remote node draining is a somewhat esoteric feature that is required on large NUMA systems because otherwise significant portions of system memory can become trapped in pcp queues. The number of pcp is determined by the number of processors and nodes in a system. A system with 4 processors and 2 nodes has 8 pcps which is okay. But a system with 1024 processors and 512 nodes has 512k pcps with a high potential for large amount of memory being caught in them. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
182 lines
6.1 KiB
C
182 lines
6.1 KiB
C
#ifndef __LINUX_GFP_H
|
|
#define __LINUX_GFP_H
|
|
|
|
#include <linux/mmzone.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/linkage.h>
|
|
|
|
struct vm_area_struct;
|
|
|
|
/*
|
|
* GFP bitmasks..
|
|
*
|
|
* Zone modifiers (see linux/mmzone.h - low three bits)
|
|
*
|
|
* Do not put any conditional on these. If necessary modify the definitions
|
|
* without the underscores and use the consistently. The definitions here may
|
|
* be used in bit comparisons.
|
|
*/
|
|
#define __GFP_DMA ((__force gfp_t)0x01u)
|
|
#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
|
|
#define __GFP_DMA32 ((__force gfp_t)0x04u)
|
|
|
|
/*
|
|
* Action modifiers - doesn't change the zoning
|
|
*
|
|
* __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
|
|
* _might_ fail. This depends upon the particular VM implementation.
|
|
*
|
|
* __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
|
|
* cannot handle allocation failures.
|
|
*
|
|
* __GFP_NORETRY: The VM implementation must not retry indefinitely.
|
|
*/
|
|
#define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */
|
|
#define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */
|
|
#define __GFP_IO ((__force gfp_t)0x40u) /* Can start physical IO? */
|
|
#define __GFP_FS ((__force gfp_t)0x80u) /* Can call down to low-level FS? */
|
|
#define __GFP_COLD ((__force gfp_t)0x100u) /* Cache-cold page required */
|
|
#define __GFP_NOWARN ((__force gfp_t)0x200u) /* Suppress page allocation failure warning */
|
|
#define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */
|
|
#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */
|
|
#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */
|
|
#define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */
|
|
#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
|
|
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
|
|
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
|
|
#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
|
|
|
|
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
|
|
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
|
|
|
|
/* if you forget to add the bitmask here kernel will crash, period */
|
|
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
|
|
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
|
|
__GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \
|
|
__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
|
|
|
|
/* This equals 0, but use constants in case they ever change */
|
|
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
|
|
/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
|
|
#define GFP_ATOMIC (__GFP_HIGH)
|
|
#define GFP_NOIO (__GFP_WAIT)
|
|
#define GFP_NOFS (__GFP_WAIT | __GFP_IO)
|
|
#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
|
|
#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
|
|
#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
|
|
__GFP_HIGHMEM)
|
|
|
|
#ifdef CONFIG_NUMA
|
|
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
|
|
#else
|
|
#define GFP_THISNODE ((__force gfp_t)0)
|
|
#endif
|
|
|
|
|
|
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
|
|
platforms, used as appropriate on others */
|
|
|
|
#define GFP_DMA __GFP_DMA
|
|
|
|
/* 4GB DMA on some platforms */
|
|
#define GFP_DMA32 __GFP_DMA32
|
|
|
|
|
|
static inline enum zone_type gfp_zone(gfp_t flags)
|
|
{
|
|
#ifdef CONFIG_ZONE_DMA
|
|
if (flags & __GFP_DMA)
|
|
return ZONE_DMA;
|
|
#endif
|
|
#ifdef CONFIG_ZONE_DMA32
|
|
if (flags & __GFP_DMA32)
|
|
return ZONE_DMA32;
|
|
#endif
|
|
#ifdef CONFIG_HIGHMEM
|
|
if (flags & __GFP_HIGHMEM)
|
|
return ZONE_HIGHMEM;
|
|
#endif
|
|
return ZONE_NORMAL;
|
|
}
|
|
|
|
/*
|
|
* There is only one page-allocator function, and two main namespaces to
|
|
* it. The alloc_page*() variants return 'struct page *' and as such
|
|
* can allocate highmem pages, the *get*page*() variants return
|
|
* virtual kernel addresses to the allocated page(s).
|
|
*/
|
|
|
|
/*
|
|
* We get the zone list from the current node and the gfp_mask.
|
|
* This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
|
|
*
|
|
* For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
|
|
* optimized to &contig_page_data at compile-time.
|
|
*/
|
|
|
|
#ifndef HAVE_ARCH_FREE_PAGE
|
|
static inline void arch_free_page(struct page *page, int order) { }
|
|
#endif
|
|
#ifndef HAVE_ARCH_ALLOC_PAGE
|
|
static inline void arch_alloc_page(struct page *page, int order) { }
|
|
#endif
|
|
|
|
extern struct page *
|
|
FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *));
|
|
|
|
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
|
|
unsigned int order)
|
|
{
|
|
if (unlikely(order >= MAX_ORDER))
|
|
return NULL;
|
|
|
|
/* Unknown node is current node */
|
|
if (nid < 0)
|
|
nid = numa_node_id();
|
|
|
|
return __alloc_pages(gfp_mask, order,
|
|
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
|
|
}
|
|
|
|
#ifdef CONFIG_NUMA
|
|
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
|
|
|
|
static inline struct page *
|
|
alloc_pages(gfp_t gfp_mask, unsigned int order)
|
|
{
|
|
if (unlikely(order >= MAX_ORDER))
|
|
return NULL;
|
|
|
|
return alloc_pages_current(gfp_mask, order);
|
|
}
|
|
extern struct page *alloc_page_vma(gfp_t gfp_mask,
|
|
struct vm_area_struct *vma, unsigned long addr);
|
|
#else
|
|
#define alloc_pages(gfp_mask, order) \
|
|
alloc_pages_node(numa_node_id(), gfp_mask, order)
|
|
#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
|
|
#endif
|
|
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
|
|
|
|
extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order));
|
|
extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask));
|
|
|
|
#define __get_free_page(gfp_mask) \
|
|
__get_free_pages((gfp_mask),0)
|
|
|
|
#define __get_dma_pages(gfp_mask, order) \
|
|
__get_free_pages((gfp_mask) | GFP_DMA,(order))
|
|
|
|
extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
|
|
extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
|
|
extern void FASTCALL(free_hot_page(struct page *page));
|
|
extern void FASTCALL(free_cold_page(struct page *page));
|
|
|
|
#define __free_page(page) __free_pages((page), 0)
|
|
#define free_page(addr) free_pages((addr),0)
|
|
|
|
void page_alloc_init(void);
|
|
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
|
|
|
|
#endif /* __LINUX_GFP_H */
|