8ff12cfc00
The statistics provided here allow the monitoring of allocator behavior but at the cost of some (minimal) loss of performance. Counters are placed in SLUB's per cpu data structure. The per cpu structure may be extended by the statistics to grow larger than one cacheline which will increase the cache footprint of SLUB. There is a compile option to enable/disable the inclusion of the runtime statistics and its off by default. The slabinfo tool is enhanced to support these statistics via two options: -D Switches the line of information displayed for a slab from size mode to activity mode. -A Sorts the slabs displayed by activity. This allows the display of the slabs most important to the performance of a certain load. -r Report option will report detailed statistics on Example (tbench load): slabinfo -AD ->Shows the most active slabs Name Objects Alloc Free %Fast skbuff_fclone_cache 33 111953835 111953835 99 99 :0000192 2666 5283688 5281047 99 99 :0001024 849 5247230 5246389 83 83 vm_area_struct 1349 119642 118355 91 22 :0004096 15 66753 66751 98 98 :0000064 2067 25297 23383 98 78 dentry 10259 28635 18464 91 45 :0000080 11004 18950 8089 98 98 :0000096 1703 12358 10784 99 98 :0000128 762 10582 9875 94 18 :0000512 184 9807 9647 95 81 :0002048 479 9669 9195 83 65 anon_vma 777 9461 9002 99 71 kmalloc-8 6492 9981 5624 99 97 :0000768 258 7174 6931 58 15 So the skbuff_fclone_cache is of highest importance for the tbench load. Pretty high load on the 192 sized slab. Look for the aliases slabinfo -a | grep 000192 :0000192 <- xfs_btree_cur filp kmalloc-192 uid_cache tw_sock_TCP request_sock_TCPv6 tw_sock_TCPv6 skbuff_head_cache xfs_ili Likely skbuff_head_cache. Looking into the statistics of the skbuff_fclone_cache is possible through slabinfo skbuff_fclone_cache ->-r option implied if cache name is mentioned .... Usual output ... Slab Perf Counter Alloc Free %Al %Fr -------------------------------------------------- Fastpath 111953360 111946981 99 99 Slowpath 1044 7423 0 0 Page Alloc 272 264 0 0 Add partial 25 325 0 0 Remove partial 86 264 0 0 RemoteObj/SlabFrozen 350 4832 0 0 Total 111954404 111954404 Flushes 49 Refill 0 Deactivate Full=325(92%) Empty=0(0%) ToHead=24(6%) ToTail=1(0%) Looks good because the fastpath is overwhelmingly taken. skbuff_head_cache: Slab Perf Counter Alloc Free %Al %Fr -------------------------------------------------- Fastpath 5297262 5259882 99 99 Slowpath 4477 39586 0 0 Page Alloc 937 824 0 0 Add partial 0 2515 0 0 Remove partial 1691 824 0 0 RemoteObj/SlabFrozen 2621 9684 0 0 Total 5301739 5299468 Deactivate Full=2620(100%) Empty=0(0%) ToHead=0(0%) ToTail=0(0%) Descriptions of the output: Total: The total number of allocation and frees that occurred for a slab Fastpath: The number of allocations/frees that used the fastpath. Slowpath: Other allocations Page Alloc: Number of calls to the page allocator as a result of slowpath processing Add Partial: Number of slabs added to the partial list through free or alloc (occurs during cpuslab flushes) Remove Partial: Number of slabs removed from the partial list as a result of allocations retrieving a partial slab or by a free freeing the last object of a slab. RemoteObj/Froz: How many times were remotely freed object encountered when a slab was about to be deactivated. Frozen: How many times was free able to skip list processing because the slab was in use as the cpuslab of another processor. Flushes: Number of times the cpuslab was flushed on request (kmem_cache_shrink, may result from races in __slab_alloc) Refill: Number of times we were able to refill the cpuslab from remotely freed objects for the same slab. Deactivate: Statistics how slabs were deactivated. Shows how they were put onto the partial list. In general fastpath is very good. Slowpath without partial list processing is also desirable. Any touching of partial list uses node specific locks which may potentially cause list lock contention. Signed-off-by: Christoph Lameter <clameter@sgi.com>
230 lines
6.2 KiB
C
230 lines
6.2 KiB
C
#ifndef _LINUX_SLUB_DEF_H
|
|
#define _LINUX_SLUB_DEF_H
|
|
|
|
/*
|
|
* SLUB : A Slab allocator without object queues.
|
|
*
|
|
* (C) 2007 SGI, Christoph Lameter <clameter@sgi.com>
|
|
*/
|
|
#include <linux/types.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/kobject.h>
|
|
|
|
enum stat_item {
|
|
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
|
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
|
FREE_FASTPATH, /* Free to cpu slub */
|
|
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
|
FREE_FROZEN, /* Freeing to frozen slab */
|
|
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
|
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
|
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */
|
|
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
|
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
|
FREE_SLAB, /* Slab freed to the page allocator */
|
|
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
|
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
|
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
|
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
|
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
|
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
|
NR_SLUB_STAT_ITEMS };
|
|
|
|
struct kmem_cache_cpu {
|
|
void **freelist; /* Pointer to first free per cpu object */
|
|
struct page *page; /* The slab from which we are allocating */
|
|
int node; /* The node of the page (or -1 for debug) */
|
|
unsigned int offset; /* Freepointer offset (in word units) */
|
|
unsigned int objsize; /* Size of an object (from kmem_cache) */
|
|
#ifdef CONFIG_SLUB_STATS
|
|
unsigned stat[NR_SLUB_STAT_ITEMS];
|
|
#endif
|
|
};
|
|
|
|
struct kmem_cache_node {
|
|
spinlock_t list_lock; /* Protect partial list and nr_partial */
|
|
unsigned long nr_partial;
|
|
atomic_long_t nr_slabs;
|
|
struct list_head partial;
|
|
#ifdef CONFIG_SLUB_DEBUG
|
|
struct list_head full;
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* Slab cache management.
|
|
*/
|
|
struct kmem_cache {
|
|
/* Used for retriving partial slabs etc */
|
|
unsigned long flags;
|
|
int size; /* The size of an object including meta data */
|
|
int objsize; /* The size of an object without meta data */
|
|
int offset; /* Free pointer offset. */
|
|
int order;
|
|
|
|
/*
|
|
* Avoid an extra cache line for UP, SMP and for the node local to
|
|
* struct kmem_cache.
|
|
*/
|
|
struct kmem_cache_node local_node;
|
|
|
|
/* Allocation and freeing of slabs */
|
|
int objects; /* Number of objects in slab */
|
|
int refcount; /* Refcount for slab cache destroy */
|
|
void (*ctor)(struct kmem_cache *, void *);
|
|
int inuse; /* Offset to metadata */
|
|
int align; /* Alignment */
|
|
const char *name; /* Name (only for display!) */
|
|
struct list_head list; /* List of slab caches */
|
|
#ifdef CONFIG_SLUB_DEBUG
|
|
struct kobject kobj; /* For sysfs */
|
|
#endif
|
|
|
|
#ifdef CONFIG_NUMA
|
|
/*
|
|
* Defragmentation by allocating from a remote node.
|
|
*/
|
|
int remote_node_defrag_ratio;
|
|
struct kmem_cache_node *node[MAX_NUMNODES];
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
struct kmem_cache_cpu *cpu_slab[NR_CPUS];
|
|
#else
|
|
struct kmem_cache_cpu cpu_slab;
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* Kmalloc subsystem.
|
|
*/
|
|
#if defined(ARCH_KMALLOC_MINALIGN) && ARCH_KMALLOC_MINALIGN > 8
|
|
#define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN
|
|
#else
|
|
#define KMALLOC_MIN_SIZE 8
|
|
#endif
|
|
|
|
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
|
|
|
|
/*
|
|
* We keep the general caches in an array of slab caches that are used for
|
|
* 2^x bytes of allocations.
|
|
*/
|
|
extern struct kmem_cache kmalloc_caches[PAGE_SHIFT];
|
|
|
|
/*
|
|
* Sorry that the following has to be that ugly but some versions of GCC
|
|
* have trouble with constant propagation and loops.
|
|
*/
|
|
static __always_inline int kmalloc_index(size_t size)
|
|
{
|
|
if (!size)
|
|
return 0;
|
|
|
|
if (size <= KMALLOC_MIN_SIZE)
|
|
return KMALLOC_SHIFT_LOW;
|
|
|
|
if (size > 64 && size <= 96)
|
|
return 1;
|
|
if (size > 128 && size <= 192)
|
|
return 2;
|
|
if (size <= 8) return 3;
|
|
if (size <= 16) return 4;
|
|
if (size <= 32) return 5;
|
|
if (size <= 64) return 6;
|
|
if (size <= 128) return 7;
|
|
if (size <= 256) return 8;
|
|
if (size <= 512) return 9;
|
|
if (size <= 1024) return 10;
|
|
if (size <= 2 * 1024) return 11;
|
|
/*
|
|
* The following is only needed to support architectures with a larger page
|
|
* size than 4k.
|
|
*/
|
|
if (size <= 4 * 1024) return 12;
|
|
if (size <= 8 * 1024) return 13;
|
|
if (size <= 16 * 1024) return 14;
|
|
if (size <= 32 * 1024) return 15;
|
|
if (size <= 64 * 1024) return 16;
|
|
if (size <= 128 * 1024) return 17;
|
|
if (size <= 256 * 1024) return 18;
|
|
if (size <= 512 * 1024) return 19;
|
|
if (size <= 1024 * 1024) return 20;
|
|
if (size <= 2 * 1024 * 1024) return 21;
|
|
return -1;
|
|
|
|
/*
|
|
* What we really wanted to do and cannot do because of compiler issues is:
|
|
* int i;
|
|
* for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
|
|
* if (size <= (1 << i))
|
|
* return i;
|
|
*/
|
|
}
|
|
|
|
/*
|
|
* Find the slab cache for a given combination of allocation flags and size.
|
|
*
|
|
* This ought to end up with a global pointer to the right cache
|
|
* in kmalloc_caches.
|
|
*/
|
|
static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
|
|
{
|
|
int index = kmalloc_index(size);
|
|
|
|
if (index == 0)
|
|
return NULL;
|
|
|
|
return &kmalloc_caches[index];
|
|
}
|
|
|
|
#ifdef CONFIG_ZONE_DMA
|
|
#define SLUB_DMA __GFP_DMA
|
|
#else
|
|
/* Disable DMA functionality */
|
|
#define SLUB_DMA (__force gfp_t)0
|
|
#endif
|
|
|
|
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
|
|
void *__kmalloc(size_t size, gfp_t flags);
|
|
|
|
static __always_inline void *kmalloc(size_t size, gfp_t flags)
|
|
{
|
|
if (__builtin_constant_p(size)) {
|
|
if (size > PAGE_SIZE / 2)
|
|
return (void *)__get_free_pages(flags | __GFP_COMP,
|
|
get_order(size));
|
|
|
|
if (!(flags & SLUB_DMA)) {
|
|
struct kmem_cache *s = kmalloc_slab(size);
|
|
|
|
if (!s)
|
|
return ZERO_SIZE_PTR;
|
|
|
|
return kmem_cache_alloc(s, flags);
|
|
}
|
|
}
|
|
return __kmalloc(size, flags);
|
|
}
|
|
|
|
#ifdef CONFIG_NUMA
|
|
void *__kmalloc_node(size_t size, gfp_t flags, int node);
|
|
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
|
|
|
|
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
|
|
{
|
|
if (__builtin_constant_p(size) &&
|
|
size <= PAGE_SIZE / 2 && !(flags & SLUB_DMA)) {
|
|
struct kmem_cache *s = kmalloc_slab(size);
|
|
|
|
if (!s)
|
|
return ZERO_SIZE_PTR;
|
|
|
|
return kmem_cache_alloc_node(s, flags, node);
|
|
}
|
|
return __kmalloc_node(size, flags, node);
|
|
}
|
|
#endif
|
|
|
|
#endif /* _LINUX_SLUB_DEF_H */
|