summaryrefslogtreecommitdiff
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c414
1 files changed, 174 insertions, 240 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 4b3895cb90ee..7f8aaa291a4e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -149,13 +149,6 @@ static inline void ClearSlabDebug(struct page *page)
/* Enable to test recovery from slab corruption on boot */
#undef SLUB_RESILIENCY_TEST
-/*
- * Currently fastpath is not supported if preemption is enabled.
- */
-#if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT)
-#define SLUB_FASTPATH
-#endif
-
#if PAGE_SHIFT <= 12
/*
@@ -298,32 +291,16 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
#endif
}
-/*
- * The end pointer in a slab is special. It points to the first object in the
- * slab but has bit 0 set to mark it.
- *
- * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
- * in the mapping set.
- */
-static inline int is_end(void *addr)
-{
- return (unsigned long)addr & PAGE_MAPPING_ANON;
-}
-
-static void *slab_address(struct page *page)
-{
- return page->end - PAGE_MAPPING_ANON;
-}
-
+/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object)
{
void *base;
- if (object == page->end)
+ if (!object)
return 1;
- base = slab_address(page);
+ base = page_address(page);
if (object < base || object >= base + s->objects * s->size ||
(object - base) % s->size) {
return 0;
@@ -356,8 +333,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
/* Scan freelist */
#define for_each_free_object(__p, __s, __free) \
- for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\
- __p))
+ for (__p = (__free); __p; __p = get_freepointer((__s), __p))
/* Determine object index from a given position */
static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -509,7 +485,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
{
unsigned int off; /* Offset of last byte */
- u8 *addr = slab_address(page);
+ u8 *addr = page_address(page);
print_tracking(s, p);
@@ -644,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
* A. Free pointer (if we cannot overwrite object on free)
* B. Tracking data for SLAB_STORE_USER
* C. Padding to reach required alignment boundary or at mininum
- * one word if debuggin is on to be able to detect writes
+ * one word if debugging is on to be able to detect writes
* before the word boundary.
*
* Padding is done using 0x5a (POISON_INUSE)
@@ -687,7 +663,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
if (!(s->flags & SLAB_POISON))
return 1;
- start = slab_address(page);
+ start = page_address(page);
end = start + (PAGE_SIZE << s->order);
length = s->objects * s->size;
remainder = end - (start + length);
@@ -755,7 +731,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
* of the free objects in this slab. May cause
* another error because the object count is now wrong.
*/
- set_freepointer(s, p, page->end);
+ set_freepointer(s, p, NULL);
return 0;
}
return 1;
@@ -789,18 +765,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
void *fp = page->freelist;
void *object = NULL;
- while (fp != page->end && nr <= s->objects) {
+ while (fp && nr <= s->objects) {
if (fp == search)
return 1;
if (!check_valid_pointer(s, page, fp)) {
if (object) {
object_err(s, page, object,
"Freechain corrupt");
- set_freepointer(s, object, page->end);
+ set_freepointer(s, object, NULL);
break;
} else {
slab_err(s, page, "Freepointer corrupt");
- page->freelist = page->end;
+ page->freelist = NULL;
page->inuse = s->objects;
slab_fix(s, "Freelist cleared");
return 0;
@@ -861,6 +837,35 @@ static void remove_full(struct kmem_cache *s, struct page *page)
spin_unlock(&n->list_lock);
}
+/* Tracking of the number of slabs for debugging purposes */
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ return atomic_long_read(&n->nr_slabs);
+}
+
+static inline void inc_slabs_node(struct kmem_cache *s, int node)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ /*
+ * May be called early in order to allocate a slab for the
+ * kmem_cache_node structure. Solve the chicken-egg
+ * dilemma by deferring the increment of the count during
+ * bootstrap (see early_kmem_cache_node_alloc).
+ */
+ if (!NUMA_BUILD || n)
+ atomic_long_inc(&n->nr_slabs);
+}
+static inline void dec_slabs_node(struct kmem_cache *s, int node)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ atomic_long_dec(&n->nr_slabs);
+}
+
+/* Object debug checks for alloc/free paths */
static void setup_object_debug(struct kmem_cache *s, struct page *page,
void *object)
{
@@ -877,7 +882,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
if (!check_slab(s, page))
goto bad;
- if (object && !on_freelist(s, page, object)) {
+ if (!on_freelist(s, page, object)) {
object_err(s, page, object, "Object already allocated");
goto bad;
}
@@ -887,7 +892,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
goto bad;
}
- if (object && !check_object(s, page, object, 0))
+ if (!check_object(s, page, object, 0))
goto bad;
/* Success perform special debug activities for allocs */
@@ -906,7 +911,7 @@ bad:
*/
slab_fix(s, "Marking all objects used");
page->inuse = s->objects;
- page->freelist = page->end;
+ page->freelist = NULL;
}
return 0;
}
@@ -946,7 +951,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
}
/* Special debug activities for freeing objects */
- if (!SlabFrozen(page) && page->freelist == page->end)
+ if (!SlabFrozen(page) && !page->freelist)
remove_full(s, page);
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
@@ -1022,30 +1027,11 @@ static unsigned long kmem_cache_flags(unsigned long objsize,
void (*ctor)(struct kmem_cache *, void *))
{
/*
- * The page->offset field is only 16 bit wide. This is an offset
- * in units of words from the beginning of an object. If the slab
- * size is bigger then we cannot move the free pointer behind the
- * object anymore.
- *
- * On 32 bit platforms the limit is 256k. On 64bit platforms
- * the limit is 512k.
- *
- * Debugging or ctor may create a need to move the free
- * pointer. Fail if this happens.
+ * Enable debugging if selected on the kernel commandline.
*/
- if (objsize >= 65535 * sizeof(void *)) {
- BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON |
- SLAB_STORE_USER | SLAB_DESTROY_BY_RCU));
- BUG_ON(ctor);
- } else {
- /*
- * Enable debugging if selected on the kernel commandline.
- */
- if (slub_debug && (!slub_debug_slabs ||
- strncmp(slub_debug_slabs, name,
- strlen(slub_debug_slabs)) == 0))
- flags |= slub_debug;
- }
+ if (slub_debug && (!slub_debug_slabs ||
+ strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0))
+ flags |= slub_debug;
return flags;
}
@@ -1071,6 +1057,11 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize,
return flags;
}
#define slub_debug 0
+
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+ { return 0; }
+static inline void inc_slabs_node(struct kmem_cache *s, int node) {}
+static inline void dec_slabs_node(struct kmem_cache *s, int node) {}
#endif
/*
* Slab allocation and freeing
@@ -1109,7 +1100,6 @@ static void setup_object(struct kmem_cache *s, struct page *page,
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
- struct kmem_cache_node *n;
void *start;
void *last;
void *p;
@@ -1121,9 +1111,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
if (!page)
goto out;
- n = get_node(s, page_to_nid(page));
- if (n)
- atomic_long_inc(&n->nr_slabs);
+ inc_slabs_node(s, page_to_nid(page));
page->slab = s;
page->flags |= 1 << PG_slab;
if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
@@ -1131,7 +1119,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
SetSlabDebug(page);
start = page_address(page);
- page->end = start + 1;
if (unlikely(s->flags & SLAB_POISON))
memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@@ -1143,7 +1130,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
last = p;
}
setup_object(s, page, last);
- set_freepointer(s, last, page->end);
+ set_freepointer(s, last, NULL);
page->freelist = start;
page->inuse = 0;
@@ -1159,7 +1146,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
void *p;
slab_pad_check(s, page);
- for_each_object(p, s, slab_address(page))
+ for_each_object(p, s, page_address(page))
check_object(s, page, p, 0);
ClearSlabDebug(page);
}
@@ -1169,7 +1156,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages);
- page->mapping = NULL;
+ __ClearPageSlab(page);
+ reset_page_mapcount(page);
__free_pages(page, s->order);
}
@@ -1196,11 +1184,7 @@ static void free_slab(struct kmem_cache *s, struct page *page)
static void discard_slab(struct kmem_cache *s, struct page *page)
{
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-
- atomic_long_dec(&n->nr_slabs);
- reset_page_mapcount(page);
- __ClearPageSlab(page);
+ dec_slabs_node(s, page_to_nid(page));
free_slab(s, page);
}
@@ -1314,7 +1298,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
* may return off node objects because partial slabs are obtained
* from other nodes and filled up.
*
- * If /sys/slab/xx/defrag_ratio is set to 100 (which makes
+ * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
* defrag_ratio = 1000) then every (well almost) allocation will
* first attempt to defrag slab caches on other nodes. This means
* scanning over all nodes to look for partial slabs which may be
@@ -1373,7 +1357,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
ClearSlabFrozen(page);
if (page->inuse) {
- if (page->freelist != page->end) {
+ if (page->freelist) {
add_partial(n, page, tail);
stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
} else {
@@ -1389,9 +1373,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
* Adding an empty slab to the partial slabs in order
* to avoid page allocator overhead. This slab needs
* to come after the other slabs with objects in
- * order to fill them up. That way the size of the
- * partial list stays small. kmem_cache_shrink can
- * reclaim empty slabs from the partial list.
+ * so that the others get filled first. That way the
+ * size of the partial list stays small.
+ *
+ * kmem_cache_shrink can reclaim any empty slabs from the
+ * partial list.
*/
add_partial(n, page, 1);
slab_unlock(page);
@@ -1411,18 +1397,14 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
struct page *page = c->page;
int tail = 1;
- if (c->freelist)
+ if (page->freelist)
stat(c, DEACTIVATE_REMOTE_FREES);
/*
- * Merge cpu freelist into freelist. Typically we get here
+ * Merge cpu freelist into slab freelist. Typically we get here
* because both freelists are empty. So this is unlikely
* to occur.
- *
- * We need to use _is_end here because deactivate slab may
- * be called for a debug slab. Then c->freelist may contain
- * a dummy pointer.
*/
- while (unlikely(!is_end(c->freelist))) {
+ while (unlikely(c->freelist)) {
void **object;
tail = 0; /* Hot objects. Put the slab first */
@@ -1449,6 +1431,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
/*
* Flush cpu slab.
+ *
* Called from IPI handler with interrupts disabled.
*/
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
@@ -1507,43 +1490,41 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
* rest of the freelist to the lockless freelist.
*
* And if we were unable to get a new slab from the partial slab lists then
- * we need to allocate a new slab. This is slowest path since we may sleep.
+ * we need to allocate a new slab. This is the slowest path since it involves
+ * a call to the page allocator and the setup of a new slab.
*/
static void *__slab_alloc(struct kmem_cache *s,
gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
{
void **object;
struct page *new;
-#ifdef SLUB_FASTPATH
- unsigned long flags;
- local_irq_save(flags);
-#endif
+ /* We handle __GFP_ZERO in the caller */
+ gfpflags &= ~__GFP_ZERO;
+
if (!c->page)
goto new_slab;
slab_lock(c->page);
if (unlikely(!node_match(c, node)))
goto another_slab;
+
stat(c, ALLOC_REFILL);
+
load_freelist:
object = c->page->freelist;
- if (unlikely(object == c->page->end))
+ if (unlikely(!object))
goto another_slab;
if (unlikely(SlabDebug(c->page)))
goto debug;
- object = c->page->freelist;
c->freelist = object[c->offset];
c->page->inuse = s->objects;
- c->page->freelist = c->page->end;
+ c->page->freelist = NULL;
c->node = page_to_nid(c->page);
unlock_out:
slab_unlock(c->page);
stat(c, ALLOC_SLOWPATH);
-#ifdef SLUB_FASTPATH
- local_irq_restore(flags);
-#endif
return object;
another_slab:
@@ -1575,9 +1556,7 @@ new_slab:
c->page = new;
goto load_freelist;
}
-#ifdef SLUB_FASTPATH
- local_irq_restore(flags);
-#endif
+
/*
* No memory available.
*
@@ -1589,12 +1568,17 @@ new_slab:
* That is only possible if certain conditions are met that are being
* checked when a slab is created.
*/
- if (!(gfpflags & __GFP_NORETRY) && (s->flags & __PAGE_ALLOC_FALLBACK))
- return kmalloc_large(s->objsize, gfpflags);
-
+ if (!(gfpflags & __GFP_NORETRY) &&
+ (s->flags & __PAGE_ALLOC_FALLBACK)) {
+ if (gfpflags & __GFP_WAIT)
+ local_irq_enable();
+ object = kmalloc_large(s->objsize, gfpflags);
+ if (gfpflags & __GFP_WAIT)
+ local_irq_disable();
+ return object;
+ }
return NULL;
debug:
- object = c->page->freelist;
if (!alloc_debug_processing(s, c->page, object, addr))
goto another_slab;
@@ -1619,39 +1603,11 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
{
void **object;
struct kmem_cache_cpu *c;
-
-/*
- * The SLUB_FASTPATH path is provisional and is currently disabled if the
- * kernel is compiled with preemption or if the arch does not support
- * fast cmpxchg operations. There are a couple of coming changes that will
- * simplify matters and allow preemption. Ultimately we may end up making
- * SLUB_FASTPATH the default.
- *
- * 1. The introduction of the per cpu allocator will avoid array lookups
- * through get_cpu_slab(). A special register can be used instead.
- *
- * 2. The introduction of per cpu atomic operations (cpu_ops) means that
- * we can realize the logic here entirely with per cpu atomics. The
- * per cpu atomic ops will take care of the preemption issues.
- */
-
-#ifdef SLUB_FASTPATH
- c = get_cpu_slab(s, raw_smp_processor_id());
- do {
- object = c->freelist;
- if (unlikely(is_end(object) || !node_match(c, node))) {
- object = __slab_alloc(s, gfpflags, node, addr, c);
- break;
- }
- stat(c, ALLOC_FASTPATH);
- } while (cmpxchg_local(&c->freelist, object, object[c->offset])
- != object);
-#else
unsigned long flags;
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
- if (unlikely(is_end(c->freelist) || !node_match(c, node)))
+ if (unlikely(!c->freelist || !node_match(c, node)))
object = __slab_alloc(s, gfpflags, node, addr, c);
@@ -1661,7 +1617,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
stat(c, ALLOC_FASTPATH);
}
local_irq_restore(flags);
-#endif
if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, c->objsize);
@@ -1698,17 +1653,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
void **object = (void *)x;
struct kmem_cache_cpu *c;
-#ifdef SLUB_FASTPATH
- unsigned long flags;
-
- local_irq_save(flags);
-#endif
c = get_cpu_slab(s, raw_smp_processor_id());
stat(c, FREE_SLOWPATH);
slab_lock(page);
if (unlikely(SlabDebug(page)))
goto debug;
+
checks_ok:
prior = object[offset] = page->freelist;
page->freelist = object;
@@ -1723,24 +1674,20 @@ checks_ok:
goto slab_empty;
/*
- * Objects left in the slab. If it
- * was not on the partial list before
+ * Objects left in the slab. If it was not on the partial list before
* then add it.
*/
- if (unlikely(prior == page->end)) {
+ if (unlikely(!prior)) {
add_partial(get_node(s, page_to_nid(page)), page, 1);
stat(c, FREE_ADD_PARTIAL);
}
out_unlock:
slab_unlock(page);
-#ifdef SLUB_FASTPATH
- local_irq_restore(flags);
-#endif
return;
slab_empty:
- if (prior != page->end) {
+ if (prior) {
/*
* Slab still on the partial list.
*/
@@ -1749,9 +1696,6 @@ slab_empty:
}
slab_unlock(page);
stat(c, FREE_SLAB);
-#ifdef SLUB_FASTPATH
- local_irq_restore(flags);
-#endif
discard_slab(s, page);
return;
@@ -1777,39 +1721,11 @@ static __always_inline void slab_free(struct kmem_cache *s,
{
void **object = (void *)x;
struct kmem_cache_cpu *c;
-
-#ifdef SLUB_FASTPATH
- void **freelist;
-
- c = get_cpu_slab(s, raw_smp_processor_id());
- debug_check_no_locks_freed(object, s->objsize);
- do {
- freelist = c->freelist;
- barrier();
- /*
- * If the compiler would reorder the retrieval of c->page to
- * come before c->freelist then an interrupt could
- * change the cpu slab before we retrieve c->freelist. We
- * could be matching on a page no longer active and put the
- * object onto the freelist of the wrong slab.
- *
- * On the other hand: If we already have the freelist pointer
- * then any change of cpu_slab will cause the cmpxchg to fail
- * since the freelist pointers are unique per slab.
- */
- if (unlikely(page != c->page || c->node < 0)) {
- __slab_free(s, page, x, addr, c->offset);
- break;
- }
- object[c->offset] = freelist;
- stat(c, FREE_FASTPATH);
- } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
-#else
unsigned long flags;
local_irq_save(flags);
- debug_check_no_locks_freed(object, s->objsize);
c = get_cpu_slab(s, smp_processor_id());
+ debug_check_no_locks_freed(object, c->objsize);
if (likely(page == c->page && c->node >= 0)) {
object[c->offset] = c->freelist;
c->freelist = object;
@@ -1818,7 +1734,6 @@ static __always_inline void slab_free(struct kmem_cache *s,
__slab_free(s, page, x, addr, c->offset);
local_irq_restore(flags);
-#endif
}
void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -1973,20 +1888,21 @@ static unsigned long calculate_alignment(unsigned long flags,
unsigned long align, unsigned long size)
{
/*
- * If the user wants hardware cache aligned objects then
- * follow that suggestion if the object is sufficiently
- * large.
+ * If the user wants hardware cache aligned objects then follow that
+ * suggestion if the object is sufficiently large.
*
- * The hardware cache alignment cannot override the
- * specified alignment though. If that is greater
- * then use it.
+ * The hardware cache alignment cannot override the specified
+ * alignment though. If that is greater then use it.
*/
- if ((flags & SLAB_HWCACHE_ALIGN) &&
- size > cache_line_size() / 2)
- return max_t(unsigned long, align, cache_line_size());
+ if (flags & SLAB_HWCACHE_ALIGN) {
+ unsigned long ralign = cache_line_size();
+ while (size <= ralign / 2)
+ ralign /= 2;
+ align = max(align, ralign);
+ }
if (align < ARCH_SLAB_MINALIGN)
- return ARCH_SLAB_MINALIGN;
+ align = ARCH_SLAB_MINALIGN;
return ALIGN(align, sizeof(void *));
}
@@ -1995,19 +1911,22 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
c->page = NULL;
- c->freelist = (void *)PAGE_MAPPING_ANON;
+ c->freelist = NULL;
c->node = 0;
c->offset = s->offset / sizeof(void *);
c->objsize = s->objsize;
+#ifdef CONFIG_SLUB_STATS
+ memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
+#endif
}
static void init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
- atomic_long_set(&n->nr_slabs, 0);
spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
+ atomic_long_set(&n->nr_slabs, 0);
INIT_LIST_HEAD(&n->full);
#endif
}
@@ -2176,7 +2095,8 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
init_tracking(kmalloc_caches, n);
#endif
init_kmem_cache_node(n);
- atomic_long_inc(&n->nr_slabs);
+ inc_slabs_node(kmalloc_caches, node);
+
/*
* lockdep requires consistent irq usage for each lock
* so even though there cannot be a race this early in
@@ -2258,6 +2178,14 @@ static int calculate_sizes(struct kmem_cache *s)
unsigned long align = s->align;
/*
+ * Round up object size to the next word boundary. We can only
+ * place the free pointer at word boundaries and this determines
+ * the possible location of the free pointer.
+ */
+ size = ALIGN(size, sizeof(void *));
+
+#ifdef CONFIG_SLUB_DEBUG
+ /*
* Determine if we can poison the object itself. If the user of
* the slab may touch the object after free or before allocation
* then we should never poison the object itself.
@@ -2268,14 +2196,7 @@ static int calculate_sizes(struct kmem_cache *s)
else
s->flags &= ~__OBJECT_POISON;
- /*
- * Round up object size to the next word boundary. We can only
- * place the free pointer at word boundaries and this determines
- * the possible location of the free pointer.
- */
- size = ALIGN(size, sizeof(void *));
-#ifdef CONFIG_SLUB_DEBUG
/*
* If we are Redzoning then check if there is some space between the
* end of the object and the free pointer. If not then add an
@@ -2428,7 +2349,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
/*
* We could also check if the object is on the slabs freelist.
* But this would be too expensive and it seems that the main
- * purpose of kmem_ptr_valid is to check if the object belongs
+ * purpose of kmem_ptr_valid() is to check if the object belongs
* to a certain slab.
*/
return 1;
@@ -2487,7 +2408,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)
struct kmem_cache_node *n = get_node(s, node);
n->nr_partial -= free_list(s, n, &n->partial);
- if (atomic_long_read(&n->nr_slabs))
+ if (slabs_node(s, node))
return 1;
}
free_kmem_cache_nodes(s);
@@ -2520,10 +2441,6 @@ EXPORT_SYMBOL(kmem_cache_destroy);
struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
EXPORT_SYMBOL(kmalloc_caches);
-#ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
-#endif
-
static int __init setup_slub_min_order(char *str)
{
get_option(&str, &slub_min_order);
@@ -2583,6 +2500,7 @@ panic:
}
#ifdef CONFIG_ZONE_DMA
+static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
static void sysfs_add_func(struct work_struct *w)
{
@@ -2715,13 +2633,24 @@ void *__kmalloc(size_t size, gfp_t flags)
}
EXPORT_SYMBOL(__kmalloc);
+static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
+{
+ struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
+ get_order(size));
+
+ if (page)
+ return page_address(page);
+ else
+ return NULL;
+}
+
#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *s;
if (unlikely(size > PAGE_SIZE))
- return kmalloc_large(size, flags);
+ return kmalloc_large_node(size, flags, node);
s = get_slab(size, flags);
@@ -2738,19 +2667,17 @@ size_t ksize(const void *object)
struct page *page;
struct kmem_cache *s;
- BUG_ON(!object);
if (unlikely(object == ZERO_SIZE_PTR))
return 0;
page = virt_to_head_page(object);
- BUG_ON(!page);
if (unlikely(!PageSlab(page)))
return PAGE_SIZE << compound_order(page);
s = page->slab;
- BUG_ON(!s);
+#ifdef CONFIG_SLUB_DEBUG
/*
* Debugging requires use of the padding between object
* and whatever may come after it.
@@ -2758,6 +2685,7 @@ size_t ksize(const void *object)
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
return s->objsize;
+#endif
/*
* If we have the need to store the freelist pointer
* back there or track user information then we can
@@ -2765,7 +2693,6 @@ size_t ksize(const void *object)
*/
if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
return s->inuse;
-
/*
* Else we can use all the padding etc for the allocation
*/
@@ -2790,19 +2717,6 @@ void kfree(const void *x)
}
EXPORT_SYMBOL(kfree);
-static unsigned long count_partial(struct kmem_cache_node *n)
-{
- unsigned long flags;
- unsigned long x = 0;
- struct page *page;
-
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, lru)
- x += page->inuse;
- spin_unlock_irqrestore(&n->list_lock, flags);
- return x;
-}
-
/*
* kmem_cache_shrink removes empty slabs from the partial lists and sorts
* the remaining slabs by the number of items in use. The slabs with the
@@ -2916,7 +2830,7 @@ static void slab_mem_offline_callback(void *arg)
* and offline_pages() function shoudn't call this
* callback. So, we must fail.
*/
- BUG_ON(atomic_long_read(&n->nr_slabs));
+ BUG_ON(slabs_node(s, offline_node));
s->node[offline_node] = NULL;
kmem_cache_free(kmalloc_caches, n);
@@ -3042,7 +2956,7 @@ void __init kmem_cache_init(void)
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
- * mips it seems. The standard arches will not generate any code here.
+ * MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
@@ -3071,7 +2985,6 @@ void __init kmem_cache_init(void)
kmem_size = sizeof(struct kmem_cache);
#endif
-
printk(KERN_INFO
"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
" CPUs=%d, Nodes=%d\n",
@@ -3168,12 +3081,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
*/
for_each_online_cpu(cpu)
get_cpu_slab(s, cpu)->objsize = s->objsize;
+
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
up_write(&slub_lock);
+
if (sysfs_slab_alias(s, name))
goto err;
return s;
}
+
s = kmalloc(kmem_size, GFP_KERNEL);
if (s) {
if (kmem_cache_open(s, GFP_KERNEL, name,
@@ -3269,7 +3185,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
struct kmem_cache *s;
if (unlikely(size > PAGE_SIZE))
- return kmalloc_large(size, gfpflags);
+ return kmalloc_large_node(size, gfpflags, node);
s = get_slab(size, gfpflags);
@@ -3279,12 +3195,27 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
return slab_alloc(s, gfpflags, node, caller);
}
+#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO)
+static unsigned long count_partial(struct kmem_cache_node *n)
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct page *page;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ x += page->inuse;
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+#endif
+
#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
static int validate_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
{
void *p;
- void *addr = slab_address(page);
+ void *addr = page_address(page);
if (!check_slab(s, page) ||
!on_freelist(s, page, NULL))
@@ -3567,7 +3498,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
static void process_slab(struct loc_track *t, struct kmem_cache *s,
struct page *page, enum track_item alloc)
{
- void *addr = slab_address(page);
+ void *addr = page_address(page);
DECLARE_BITMAP(map, s->objects);
void *p;
@@ -3676,8 +3607,8 @@ enum slab_stat_type {
#define SO_CPU (1 << SL_CPU)
#define SO_OBJECTS (1 << SL_OBJECTS)
-static unsigned long slab_objects(struct kmem_cache *s,
- char *buf, unsigned long flags)
+static ssize_t show_slab_objects(struct kmem_cache *s,
+ char *buf, unsigned long flags)
{
unsigned long total = 0;
int cpu;
@@ -3687,6 +3618,8 @@ static unsigned long slab_objects(struct kmem_cache *s,
unsigned long *per_cpu;
nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
+ if (!nodes)
+ return -ENOMEM;
per_cpu = nodes + nr_node_ids;
for_each_possible_cpu(cpu) {
@@ -3839,25 +3772,25 @@ SLAB_ATTR_RO(aliases);
static ssize_t slabs_show(struct kmem_cache *s, char *buf)
{
- return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU);
+ return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU);
}
SLAB_ATTR_RO(slabs);
static ssize_t partial_show(struct kmem_cache *s, char *buf)
{
- return slab_objects(s, buf, SO_PARTIAL);
+ return show_slab_objects(s, buf, SO_PARTIAL);
}
SLAB_ATTR_RO(partial);
static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
{
- return slab_objects(s, buf, SO_CPU);
+ return show_slab_objects(s, buf, SO_CPU);
}
SLAB_ATTR_RO(cpu_slabs);
static ssize_t objects_show(struct kmem_cache *s, char *buf)
{
- return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS);
+ return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS);
}
SLAB_ATTR_RO(objects);
@@ -4056,7 +3989,6 @@ SLAB_ATTR(remote_node_defrag_ratio);
#endif
#ifdef CONFIG_SLUB_STATS
-
static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
{
unsigned long sum = 0;
@@ -4076,10 +4008,12 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
len = sprintf(buf, "%lu", sum);
+#ifdef CONFIG_SMP
for_each_online_cpu(cpu) {
if (data[cpu] && len < PAGE_SIZE - 20)
- len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
+ len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
}
+#endif
kfree(data);
return len + sprintf(buf + len, "\n");
}
@@ -4240,8 +4174,8 @@ static struct kset *slab_kset;
#define ID_STR_LENGTH 64
/* Create a unique string id for a slab cache:
- * format
- * :[flags-]size:[memory address of kmemcache]
+ *
+ * Format :[flags-]size
*/
static char *create_unique_id(struct kmem_cache *s)
{