Loading Documentation/vm/00-INDEX +0 −2 Original line number Diff line number Diff line Loading @@ -30,8 +30,6 @@ page_migration - description of page migration in NUMA systems. pagemap.txt - pagemap, from the userspace perspective slabinfo.c - source code for a tool to get reports about slabs. slub.txt - a short users guide for SLUB. unevictable-lru.txt Loading include/linux/mm_types.h +13 −1 Original line number Diff line number Diff line Loading @@ -79,9 +79,21 @@ struct page { }; /* Third double word block */ union { struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! */ struct { /* slub per cpu partial pages */ struct page *next; /* Next partial slab */ #ifdef CONFIG_64BIT int pages; /* Nr of partial slabs left */ int pobjects; /* Approximate # of objects */ #else short int pages; short int pobjects; #endif }; }; /* Remainder is not double word aligned */ union { Loading include/linux/slub_def.h +4 −0 Original line number Diff line number Diff line Loading @@ -36,12 +36,15 @@ enum stat_item { ORDER_FALLBACK, /* Number of times fallback was necessary */ CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */ CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */ CPU_PARTIAL_FREE, /* USed cpu partial on free */ NR_SLUB_STAT_ITEMS }; struct kmem_cache_cpu { void **freelist; /* Pointer to next available object */ unsigned long tid; /* Globally unique transaction id */ struct page *page; /* The slab from which we are allocating */ struct page *partial; /* Partially allocated frozen slabs */ int node; /* The node of the page (or -1 for debug) */ #ifdef CONFIG_SLUB_STATS unsigned stat[NR_SLUB_STAT_ITEMS]; Loading Loading @@ -79,6 +82,7 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int objsize; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ int cpu_partial; /* Number of per cpu partial objects to keep around */ struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ Loading mm/slab.c +7 −12 Original line number Diff line number Diff line Loading @@ -1857,9 +1857,9 @@ static void dump_line(char *data, int offset, int limit) error = data[offset + i]; bad_count++; } printk(" %02x", (unsigned char)data[offset + i]); } printk("\n"); print_hex_dump(KERN_CONT, "", 0, 16, 1, &data[offset], limit, 1); if (bad_count == 1) { error ^= POISON_FREE; Loading Loading @@ -3039,14 +3039,9 @@ bad: printk(KERN_ERR "slab: Internal list corruption detected in " "cache '%s'(%d), slabp %p(%d). Hexdump:\n", cachep->name, cachep->num, slabp, slabp->inuse); for (i = 0; i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); i++) { if (i % 16 == 0) printk("\n%03x:", i); printk(" %02x", ((unsigned char *)slabp)[i]); } printk("\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), 1); BUG(); } } Loading Loading @@ -4584,7 +4579,7 @@ static const struct file_operations proc_slabstats_operations = { static int __init slab_proc_init(void) { proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations); #ifdef CONFIG_DEBUG_SLAB_LEAK proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); #endif Loading mm/slub.c +392 −166 Original line number Diff line number Diff line Loading @@ -467,34 +467,8 @@ static int disable_higher_order_debug; */ static void print_section(char *text, u8 *addr, unsigned int length) { int i, offset; int newline = 1; char ascii[17]; ascii[16] = 0; for (i = 0; i < length; i++) { if (newline) { printk(KERN_ERR "%8s 0x%p: ", text, addr + i); newline = 0; } printk(KERN_CONT " %02x", addr[i]); offset = i % 16; ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; if (offset == 15) { printk(KERN_CONT " %s\n", ascii); newline = 1; } } if (!newline) { i %= 16; while (i < 16) { printk(KERN_CONT " "); ascii[i] = ' '; i++; } printk(KERN_CONT " %s\n", ascii); } print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, length, 1); } static struct track *get_track(struct kmem_cache *s, void *object, Loading Loading @@ -627,8 +601,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (p > addr + 16) print_section("Bytes b4 ", p - 16, 16); print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE)); print_section("Object ", p, min_t(unsigned long, s->objsize, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) print_section("Redzone ", p + s->objsize, s->inuse - s->objsize); Loading Loading @@ -1447,7 +1421,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, last, NULL); page->freelist = start; page->inuse = 0; page->inuse = page->objects; page->frozen = 1; out: return page; Loading Loading @@ -1534,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n, struct page *page, int tail) { n->nr_partial++; if (tail) if (tail == DEACTIVATE_TO_TAIL) list_add_tail(&page->lru, &n->partial); else list_add(&page->lru, &n->partial); Loading @@ -1554,10 +1528,13 @@ static inline void remove_partial(struct kmem_cache_node *n, * Lock slab, remove from the partial list and put the object into the * per cpu freelist. * * Returns a list of objects or NULL if it fails. * * Must hold list_lock. */ static inline int acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) static inline void *acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page, int mode) { void *freelist; unsigned long counters; Loading @@ -1572,6 +1549,7 @@ static inline int acquire_slab(struct kmem_cache *s, freelist = page->freelist; counters = page->counters; new.counters = counters; if (mode) new.inuse = page->objects; VM_BUG_ON(new.frozen); Loading @@ -1583,32 +1561,19 @@ static inline int acquire_slab(struct kmem_cache *s, "lock and freeze")); remove_partial(n, page); if (freelist) { /* Populate the per cpu freelist */ this_cpu_write(s->cpu_slab->freelist, freelist); this_cpu_write(s->cpu_slab->page, page); this_cpu_write(s->cpu_slab->node, page_to_nid(page)); return 1; } else { /* * Slab page came from the wrong list. No object to allocate * from. Put it onto the correct list and continue partial * scan. */ printk(KERN_ERR "SLUB: %s : Page without available objects on" " partial list\n", s->name); return 0; } return freelist; } static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); /* * Try to allocate a partial slab from a specific node. */ static struct page *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n) static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, struct kmem_cache_cpu *c) { struct page *page; struct page *page, *page2; void *object = NULL; /* * Racy check. If we mistakenly see no partial slabs then we Loading @@ -1620,26 +1585,43 @@ static struct page *get_partial_node(struct kmem_cache *s, return NULL; spin_lock(&n->list_lock); list_for_each_entry(page, &n->partial, lru) if (acquire_slab(s, n, page)) goto out; page = NULL; out: list_for_each_entry_safe(page, page2, &n->partial, lru) { void *t = acquire_slab(s, n, page, object == NULL); int available; if (!t) break; if (!object) { c->page = page; c->node = page_to_nid(page); stat(s, ALLOC_FROM_PARTIAL); object = t; available = page->objects - page->inuse; } else { page->freelist = t; available = put_cpu_partial(s, page, 0); } if (kmem_cache_debug(s) || available > s->cpu_partial / 2) break; } spin_unlock(&n->list_lock); return page; return object; } /* * Get a page from somewhere. Search in increasing NUMA distances. */ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, struct kmem_cache_cpu *c) { #ifdef CONFIG_NUMA struct zonelist *zonelist; struct zoneref *z; struct zone *zone; enum zone_type high_zoneidx = gfp_zone(flags); struct page *page; void *object; /* * The defrag ratio allows a configuration of the tradeoffs between Loading Loading @@ -1672,10 +1654,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) if (n && cpuset_zone_allowed_hardwall(zone, flags) && n->nr_partial > s->min_partial) { page = get_partial_node(s, n); if (page) { object = get_partial_node(s, n, c); if (object) { put_mems_allowed(); return page; return object; } } } Loading @@ -1687,16 +1669,17 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) /* * Get a partial page, lock it and return it. */ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_cpu *c) { struct page *page; void *object; int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; page = get_partial_node(s, get_node(s, searchnode)); if (page || node != NUMA_NO_NODE) return page; object = get_partial_node(s, get_node(s, searchnode), c); if (object || node != NUMA_NO_NODE) return object; return get_any_partial(s, flags); return get_any_partial(s, flags, c); } #ifdef CONFIG_PREEMPT Loading Loading @@ -1765,9 +1748,6 @@ void init_kmem_cache_cpus(struct kmem_cache *s) for_each_possible_cpu(cpu) per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); } /* * Remove the cpu slab */ /* * Remove the cpu slab Loading @@ -1781,13 +1761,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) enum slab_modes l = M_NONE, m = M_NONE; void *freelist; void *nextfree; int tail = 0; int tail = DEACTIVATE_TO_HEAD; struct page new; struct page old; if (page->freelist) { stat(s, DEACTIVATE_REMOTE_FREES); tail = 1; tail = DEACTIVATE_TO_TAIL; } c->tid = next_tid(c->tid); Loading Loading @@ -1893,7 +1873,7 @@ redo: if (m == M_PARTIAL) { add_partial(n, page, tail); stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); stat(s, tail); } else if (m == M_FULL) { Loading @@ -1920,6 +1900,123 @@ redo: } } /* Unfreeze all the cpu partial slabs */ static void unfreeze_partials(struct kmem_cache *s) { struct kmem_cache_node *n = NULL; struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); struct page *page; while ((page = c->partial)) { enum slab_modes { M_PARTIAL, M_FREE }; enum slab_modes l, m; struct page new; struct page old; c->partial = page->next; l = M_FREE; do { old.freelist = page->freelist; old.counters = page->counters; VM_BUG_ON(!old.frozen); new.counters = old.counters; new.freelist = old.freelist; new.frozen = 0; if (!new.inuse && (!n || n->nr_partial > s->min_partial)) m = M_FREE; else { struct kmem_cache_node *n2 = get_node(s, page_to_nid(page)); m = M_PARTIAL; if (n != n2) { if (n) spin_unlock(&n->list_lock); n = n2; spin_lock(&n->list_lock); } } if (l != m) { if (l == M_PARTIAL) remove_partial(n, page); else add_partial(n, page, 1); l = m; } } while (!cmpxchg_double_slab(s, page, old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")); if (m == M_FREE) { stat(s, DEACTIVATE_EMPTY); discard_slab(s, page); stat(s, FREE_SLAB); } } if (n) spin_unlock(&n->list_lock); } /* * Put a page that was just frozen (in __slab_free) into a partial page * slot if available. This is done without interrupts disabled and without * preemption disabled. The cmpxchg is racy and may put the partial page * onto a random cpus partial slot. * * If we did not find a slot then simply move all the partials to the * per node partial list. */ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) { struct page *oldpage; int pages; int pobjects; do { pages = 0; pobjects = 0; oldpage = this_cpu_read(s->cpu_slab->partial); if (oldpage) { pobjects = oldpage->pobjects; pages = oldpage->pages; if (drain && pobjects > s->cpu_partial) { unsigned long flags; /* * partial array is full. Move the existing * set to the per node partial list. */ local_irq_save(flags); unfreeze_partials(s); local_irq_restore(flags); pobjects = 0; pages = 0; } } pages++; pobjects += page->objects - page->inuse; page->pages = pages; page->pobjects = pobjects; page->next = oldpage; } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); stat(s, CPU_PARTIAL_FREE); return pobjects; } static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { stat(s, CPUSLAB_FLUSH); Loading @@ -1935,8 +2032,12 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); if (likely(c && c->page)) if (likely(c)) { if (c->page) flush_slab(s, c); unfreeze_partials(s); } } static void flush_cpu_slab(void *d) Loading Loading @@ -2027,12 +2128,39 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) } } static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_cpu **pc) { void *object; struct kmem_cache_cpu *c; struct page *page = new_slab(s, flags, node); if (page) { c = __this_cpu_ptr(s->cpu_slab); if (c->page) flush_slab(s, c); /* * No other reference to the page yet so we can * muck around with it freely without cmpxchg */ object = page->freelist; page->freelist = NULL; stat(s, ALLOC_SLAB); c->node = page_to_nid(page); c->page = page; *pc = c; } else object = NULL; return object; } /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. * * Interrupts are disabled. * * Processing is still very fast if new objects have been freed to the * regular freelist. In that case we simply take over the regular freelist * as the lockless freelist and zap the regular freelist. Loading @@ -2049,7 +2177,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void **object; struct page *page; unsigned long flags; struct page new; unsigned long counters; Loading @@ -2064,13 +2191,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c = this_cpu_ptr(s->cpu_slab); #endif /* We handle __GFP_ZERO in the caller */ gfpflags &= ~__GFP_ZERO; page = c->page; if (!page) if (!c->page) goto new_slab; redo: if (unlikely(!node_match(c, node))) { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, c); Loading @@ -2080,8 +2203,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, stat(s, ALLOC_SLOWPATH); do { object = page->freelist; counters = page->counters; object = c->page->freelist; counters = c->page->counters; new.counters = counters; VM_BUG_ON(!new.frozen); Loading @@ -2095,15 +2218,15 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, * and use them to refill the per cpu queue. */ new.inuse = page->objects; new.inuse = c->page->objects; new.frozen = object != NULL; } while (!__cmpxchg_double_slab(s, page, } while (!__cmpxchg_double_slab(s, c->page, object, counters, NULL, new.counters, "__slab_alloc")); if (unlikely(!object)) { if (!object) { c->page = NULL; stat(s, DEACTIVATE_BYPASS); goto new_slab; Loading @@ -2112,58 +2235,47 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, stat(s, ALLOC_REFILL); load_freelist: VM_BUG_ON(!page->frozen); c->freelist = get_freepointer(s, object); c->tid = next_tid(c->tid); local_irq_restore(flags); return object; new_slab: page = get_partial(s, gfpflags, node); if (page) { stat(s, ALLOC_FROM_PARTIAL); object = c->freelist; if (kmem_cache_debug(s)) goto debug; goto load_freelist; if (c->partial) { c->page = c->partial; c->partial = c->page->next; c->node = page_to_nid(c->page); stat(s, CPU_PARTIAL_ALLOC); c->freelist = NULL; goto redo; } page = new_slab(s, gfpflags, node); /* Then do expensive stuff like retrieving pages from the partial lists */ object = get_partial(s, gfpflags, node, c); if (page) { c = __this_cpu_ptr(s->cpu_slab); if (c->page) flush_slab(s, c); /* * No other reference to the page yet so we can * muck around with it freely without cmpxchg */ object = page->freelist; page->freelist = NULL; page->inuse = page->objects; if (unlikely(!object)) { stat(s, ALLOC_SLAB); c->node = page_to_nid(page); c->page = page; object = new_slab_objects(s, gfpflags, node, &c); if (kmem_cache_debug(s)) goto debug; goto load_freelist; } if (unlikely(!object)) { if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) slab_out_of_memory(s, gfpflags, node); local_irq_restore(flags); return NULL; } } debug: if (!object || !alloc_debug_processing(s, page, object, addr)) goto new_slab; if (likely(!kmem_cache_debug(s))) goto load_freelist; /* Only entered in the debug case */ if (!alloc_debug_processing(s, c->page, object, addr)) goto new_slab; /* Slab failed checks. Next slab needed */ c->freelist = get_freepointer(s, object); deactivate_slab(s, c); c->page = NULL; c->node = NUMA_NO_NODE; local_irq_restore(flags); return object; Loading Loading @@ -2333,6 +2445,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page, was_frozen = new.frozen; new.inuse--; if ((!new.inuse || !prior) && !was_frozen && !n) { if (!kmem_cache_debug(s) && !prior) /* * Slab was on no list before and will be partially empty * We can defer the list move and instead freeze it. */ new.frozen = 1; else { /* Needs to be taken off a list */ n = get_node(s, page_to_nid(page)); /* * Speculatively acquire the list_lock. Loading @@ -2343,6 +2466,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, * other processors updating the list of slabs. */ spin_lock_irqsave(&n->list_lock, flags); } } inuse = new.inuse; Loading @@ -2352,6 +2477,14 @@ static void __slab_free(struct kmem_cache *s, struct page *page, "__slab_free")); if (likely(!n)) { /* * If we just froze the page then put it onto the * per cpu partial list. */ if (new.frozen && !was_frozen) put_cpu_partial(s, page, 1); /* * The list lock was not taken therefore no list * activity can be necessary. Loading @@ -2377,7 +2510,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, */ if (unlikely(!prior)) { remove_full(s, page); add_partial(n, page, 1); add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } } Loading Loading @@ -2421,7 +2554,6 @@ static __always_inline void slab_free(struct kmem_cache *s, slab_free_hook(s, x); redo: /* * Determine the currently cpus per cpu slab. * The cpu may change afterward. However that does not matter since Loading Loading @@ -2685,7 +2817,7 @@ static void early_kmem_cache_node_alloc(int node) n = page->freelist; BUG_ON(!n); page->freelist = get_freepointer(kmem_cache_node, n); page->inuse++; page->inuse = 1; page->frozen = 0; kmem_cache_node->node[node] = n; #ifdef CONFIG_SLUB_DEBUG Loading @@ -2695,7 +2827,7 @@ static void early_kmem_cache_node_alloc(int node) init_kmem_cache_node(n, kmem_cache_node); inc_slabs_node(kmem_cache_node, node, page->objects); add_partial(n, page, 0); add_partial(n, page, DEACTIVATE_TO_HEAD); } static void free_kmem_cache_nodes(struct kmem_cache *s) Loading Loading @@ -2911,7 +3043,34 @@ static int kmem_cache_open(struct kmem_cache *s, * The larger the object size is, the more pages we want on the partial * list to avoid pounding the page allocator excessively. */ set_min_partial(s, ilog2(s->size)); set_min_partial(s, ilog2(s->size) / 2); /* * cpu_partial determined the maximum number of objects kept in the * per cpu partial lists of a processor. * * Per cpu partial lists mainly contain slabs that just have one * object freed. If they are used for allocation then they can be * filled up again with minimal effort. The slab will never hit the * per node partial lists and therefore no locking will be required. * * This setting also determines * * A) The number of objects from per cpu partial slabs dumped to the * per node list when we reach the limit. * B) The number of objects in cpu partial slabs to extract from the * per node list when we run out of per cpu objects. We only fetch 50% * to keep some capacity around for frees. */ if (s->size >= PAGE_SIZE) s->cpu_partial = 2; else if (s->size >= 1024) s->cpu_partial = 6; else if (s->size >= 256) s->cpu_partial = 13; else s->cpu_partial = 30; s->refcount = 1; #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; Loading Loading @@ -2970,13 +3129,13 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, /* * Attempt to free all partial slabs on a node. * This is called from kmem_cache_close(). We must be the last thread * using the cache and therefore we do not need to lock anymore. */ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) { unsigned long flags; struct page *page, *h; spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { remove_partial(n, page); Loading @@ -2986,7 +3145,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) "Objects remaining on kmem_cache_close()"); } } spin_unlock_irqrestore(&n->list_lock, flags); } /* Loading Loading @@ -3020,6 +3178,7 @@ void kmem_cache_destroy(struct kmem_cache *s) s->refcount--; if (!s->refcount) { list_del(&s->list); up_write(&slub_lock); if (kmem_cache_close(s)) { printk(KERN_ERR "SLUB %s: %s called for cache that " "still has objects.\n", s->name, __func__); Loading @@ -3028,7 +3187,7 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->flags & SLAB_DESTROY_BY_RCU) rcu_barrier(); sysfs_slab_remove(s); } } else up_write(&slub_lock); } EXPORT_SYMBOL(kmem_cache_destroy); Loading Loading @@ -3347,23 +3506,23 @@ int kmem_cache_shrink(struct kmem_cache *s) * list_lock. page->inuse here is the upper limit. */ list_for_each_entry_safe(page, t, &n->partial, lru) { if (!page->inuse) { remove_partial(n, page); discard_slab(s, page); } else { list_move(&page->lru, slabs_by_inuse + page->inuse); } list_move(&page->lru, slabs_by_inuse + page->inuse); if (!page->inuse) n->nr_partial--; } /* * Rebuild the partial list with the slabs filled up most * first and the least used slabs at the end. */ for (i = objects - 1; i >= 0; i--) for (i = objects - 1; i > 0; i--) list_splice(slabs_by_inuse + i, n->partial.prev); spin_unlock_irqrestore(&n->list_lock, flags); /* Release empty slabs */ list_for_each_entry_safe(page, t, slabs_by_inuse, lru) discard_slab(s, page); } kfree(slabs_by_inuse); Loading Loading @@ -4319,6 +4478,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, for_each_possible_cpu(cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); struct page *page; if (!c || c->node < 0) continue; Loading @@ -4334,6 +4494,13 @@ static ssize_t show_slab_objects(struct kmem_cache *s, total += x; nodes[c->node] += x; } page = c->partial; if (page) { x = page->pobjects; total += x; nodes[c->node] += x; } per_cpu[c->node]++; } } Loading Loading @@ -4412,11 +4579,12 @@ struct slab_attribute { }; #define SLAB_ATTR_RO(_name) \ static struct slab_attribute _name##_attr = __ATTR_RO(_name) static struct slab_attribute _name##_attr = \ __ATTR(_name, 0400, _name##_show, NULL) #define SLAB_ATTR(_name) \ static struct slab_attribute _name##_attr = \ __ATTR(_name, 0644, _name##_show, _name##_store) __ATTR(_name, 0600, _name##_show, _name##_store) static ssize_t slab_size_show(struct kmem_cache *s, char *buf) { Loading Loading @@ -4485,6 +4653,27 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, } SLAB_ATTR(min_partial); static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%u\n", s->cpu_partial); } static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, size_t length) { unsigned long objects; int err; err = strict_strtoul(buf, 10, &objects); if (err) return err; s->cpu_partial = objects; flush_all(s); return length; } SLAB_ATTR(cpu_partial); static ssize_t ctor_show(struct kmem_cache *s, char *buf) { if (!s->ctor) Loading Loading @@ -4523,6 +4712,37 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(objects_partial); static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) { int objects = 0; int pages = 0; int cpu; int len; for_each_online_cpu(cpu) { struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial; if (page) { pages += page->pages; objects += page->pobjects; } } len = sprintf(buf, "%d(%d)", objects, pages); #ifdef CONFIG_SMP for_each_online_cpu(cpu) { struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial; if (page && len < PAGE_SIZE - 20) len += sprintf(buf + len, " C%d=%d(%d)", cpu, page->pobjects, page->pages); } #endif return len + sprintf(buf + len, "\n"); } SLAB_ATTR_RO(slabs_cpu_partial); static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); Loading Loading @@ -4845,6 +5065,8 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass); STAT_ATTR(ORDER_FALLBACK, order_fallback); STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc); STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); #endif static struct attribute *slab_attrs[] = { Loading @@ -4853,6 +5075,7 @@ static struct attribute *slab_attrs[] = { &objs_per_slab_attr.attr, &order_attr.attr, &min_partial_attr.attr, &cpu_partial_attr.attr, &objects_attr.attr, &objects_partial_attr.attr, &partial_attr.attr, Loading @@ -4865,6 +5088,7 @@ static struct attribute *slab_attrs[] = { &destroy_by_rcu_attr.attr, &shrink_attr.attr, &reserved_attr.attr, &slabs_cpu_partial_attr.attr, #ifdef CONFIG_SLUB_DEBUG &total_objects_attr.attr, &slabs_attr.attr, Loading Loading @@ -4906,6 +5130,8 @@ static struct attribute *slab_attrs[] = { &order_fallback_attr.attr, &cmpxchg_double_fail_attr.attr, &cmpxchg_double_cpu_fail_attr.attr, &cpu_partial_alloc_attr.attr, &cpu_partial_free_attr.attr, #endif #ifdef CONFIG_FAILSLAB &failslab_attr.attr, Loading Loading @@ -5257,7 +5483,7 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); return 0; } module_init(slab_proc_init); Loading Loading
Documentation/vm/00-INDEX +0 −2 Original line number Diff line number Diff line Loading @@ -30,8 +30,6 @@ page_migration - description of page migration in NUMA systems. pagemap.txt - pagemap, from the userspace perspective slabinfo.c - source code for a tool to get reports about slabs. slub.txt - a short users guide for SLUB. unevictable-lru.txt Loading
include/linux/mm_types.h +13 −1 Original line number Diff line number Diff line Loading @@ -79,9 +79,21 @@ struct page { }; /* Third double word block */ union { struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! */ struct { /* slub per cpu partial pages */ struct page *next; /* Next partial slab */ #ifdef CONFIG_64BIT int pages; /* Nr of partial slabs left */ int pobjects; /* Approximate # of objects */ #else short int pages; short int pobjects; #endif }; }; /* Remainder is not double word aligned */ union { Loading
include/linux/slub_def.h +4 −0 Original line number Diff line number Diff line Loading @@ -36,12 +36,15 @@ enum stat_item { ORDER_FALLBACK, /* Number of times fallback was necessary */ CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */ CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */ CPU_PARTIAL_FREE, /* USed cpu partial on free */ NR_SLUB_STAT_ITEMS }; struct kmem_cache_cpu { void **freelist; /* Pointer to next available object */ unsigned long tid; /* Globally unique transaction id */ struct page *page; /* The slab from which we are allocating */ struct page *partial; /* Partially allocated frozen slabs */ int node; /* The node of the page (or -1 for debug) */ #ifdef CONFIG_SLUB_STATS unsigned stat[NR_SLUB_STAT_ITEMS]; Loading Loading @@ -79,6 +82,7 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int objsize; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ int cpu_partial; /* Number of per cpu partial objects to keep around */ struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ Loading
mm/slab.c +7 −12 Original line number Diff line number Diff line Loading @@ -1857,9 +1857,9 @@ static void dump_line(char *data, int offset, int limit) error = data[offset + i]; bad_count++; } printk(" %02x", (unsigned char)data[offset + i]); } printk("\n"); print_hex_dump(KERN_CONT, "", 0, 16, 1, &data[offset], limit, 1); if (bad_count == 1) { error ^= POISON_FREE; Loading Loading @@ -3039,14 +3039,9 @@ bad: printk(KERN_ERR "slab: Internal list corruption detected in " "cache '%s'(%d), slabp %p(%d). Hexdump:\n", cachep->name, cachep->num, slabp, slabp->inuse); for (i = 0; i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); i++) { if (i % 16 == 0) printk("\n%03x:", i); printk(" %02x", ((unsigned char *)slabp)[i]); } printk("\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), 1); BUG(); } } Loading Loading @@ -4584,7 +4579,7 @@ static const struct file_operations proc_slabstats_operations = { static int __init slab_proc_init(void) { proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations); #ifdef CONFIG_DEBUG_SLAB_LEAK proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); #endif Loading
mm/slub.c +392 −166 Original line number Diff line number Diff line Loading @@ -467,34 +467,8 @@ static int disable_higher_order_debug; */ static void print_section(char *text, u8 *addr, unsigned int length) { int i, offset; int newline = 1; char ascii[17]; ascii[16] = 0; for (i = 0; i < length; i++) { if (newline) { printk(KERN_ERR "%8s 0x%p: ", text, addr + i); newline = 0; } printk(KERN_CONT " %02x", addr[i]); offset = i % 16; ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; if (offset == 15) { printk(KERN_CONT " %s\n", ascii); newline = 1; } } if (!newline) { i %= 16; while (i < 16) { printk(KERN_CONT " "); ascii[i] = ' '; i++; } printk(KERN_CONT " %s\n", ascii); } print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, length, 1); } static struct track *get_track(struct kmem_cache *s, void *object, Loading Loading @@ -627,8 +601,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (p > addr + 16) print_section("Bytes b4 ", p - 16, 16); print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE)); print_section("Object ", p, min_t(unsigned long, s->objsize, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) print_section("Redzone ", p + s->objsize, s->inuse - s->objsize); Loading Loading @@ -1447,7 +1421,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, last, NULL); page->freelist = start; page->inuse = 0; page->inuse = page->objects; page->frozen = 1; out: return page; Loading Loading @@ -1534,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n, struct page *page, int tail) { n->nr_partial++; if (tail) if (tail == DEACTIVATE_TO_TAIL) list_add_tail(&page->lru, &n->partial); else list_add(&page->lru, &n->partial); Loading @@ -1554,10 +1528,13 @@ static inline void remove_partial(struct kmem_cache_node *n, * Lock slab, remove from the partial list and put the object into the * per cpu freelist. * * Returns a list of objects or NULL if it fails. * * Must hold list_lock. */ static inline int acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) static inline void *acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page, int mode) { void *freelist; unsigned long counters; Loading @@ -1572,6 +1549,7 @@ static inline int acquire_slab(struct kmem_cache *s, freelist = page->freelist; counters = page->counters; new.counters = counters; if (mode) new.inuse = page->objects; VM_BUG_ON(new.frozen); Loading @@ -1583,32 +1561,19 @@ static inline int acquire_slab(struct kmem_cache *s, "lock and freeze")); remove_partial(n, page); if (freelist) { /* Populate the per cpu freelist */ this_cpu_write(s->cpu_slab->freelist, freelist); this_cpu_write(s->cpu_slab->page, page); this_cpu_write(s->cpu_slab->node, page_to_nid(page)); return 1; } else { /* * Slab page came from the wrong list. No object to allocate * from. Put it onto the correct list and continue partial * scan. */ printk(KERN_ERR "SLUB: %s : Page without available objects on" " partial list\n", s->name); return 0; } return freelist; } static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); /* * Try to allocate a partial slab from a specific node. */ static struct page *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n) static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, struct kmem_cache_cpu *c) { struct page *page; struct page *page, *page2; void *object = NULL; /* * Racy check. If we mistakenly see no partial slabs then we Loading @@ -1620,26 +1585,43 @@ static struct page *get_partial_node(struct kmem_cache *s, return NULL; spin_lock(&n->list_lock); list_for_each_entry(page, &n->partial, lru) if (acquire_slab(s, n, page)) goto out; page = NULL; out: list_for_each_entry_safe(page, page2, &n->partial, lru) { void *t = acquire_slab(s, n, page, object == NULL); int available; if (!t) break; if (!object) { c->page = page; c->node = page_to_nid(page); stat(s, ALLOC_FROM_PARTIAL); object = t; available = page->objects - page->inuse; } else { page->freelist = t; available = put_cpu_partial(s, page, 0); } if (kmem_cache_debug(s) || available > s->cpu_partial / 2) break; } spin_unlock(&n->list_lock); return page; return object; } /* * Get a page from somewhere. Search in increasing NUMA distances. */ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, struct kmem_cache_cpu *c) { #ifdef CONFIG_NUMA struct zonelist *zonelist; struct zoneref *z; struct zone *zone; enum zone_type high_zoneidx = gfp_zone(flags); struct page *page; void *object; /* * The defrag ratio allows a configuration of the tradeoffs between Loading Loading @@ -1672,10 +1654,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) if (n && cpuset_zone_allowed_hardwall(zone, flags) && n->nr_partial > s->min_partial) { page = get_partial_node(s, n); if (page) { object = get_partial_node(s, n, c); if (object) { put_mems_allowed(); return page; return object; } } } Loading @@ -1687,16 +1669,17 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) /* * Get a partial page, lock it and return it. */ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_cpu *c) { struct page *page; void *object; int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; page = get_partial_node(s, get_node(s, searchnode)); if (page || node != NUMA_NO_NODE) return page; object = get_partial_node(s, get_node(s, searchnode), c); if (object || node != NUMA_NO_NODE) return object; return get_any_partial(s, flags); return get_any_partial(s, flags, c); } #ifdef CONFIG_PREEMPT Loading Loading @@ -1765,9 +1748,6 @@ void init_kmem_cache_cpus(struct kmem_cache *s) for_each_possible_cpu(cpu) per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); } /* * Remove the cpu slab */ /* * Remove the cpu slab Loading @@ -1781,13 +1761,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) enum slab_modes l = M_NONE, m = M_NONE; void *freelist; void *nextfree; int tail = 0; int tail = DEACTIVATE_TO_HEAD; struct page new; struct page old; if (page->freelist) { stat(s, DEACTIVATE_REMOTE_FREES); tail = 1; tail = DEACTIVATE_TO_TAIL; } c->tid = next_tid(c->tid); Loading Loading @@ -1893,7 +1873,7 @@ redo: if (m == M_PARTIAL) { add_partial(n, page, tail); stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); stat(s, tail); } else if (m == M_FULL) { Loading @@ -1920,6 +1900,123 @@ redo: } } /* Unfreeze all the cpu partial slabs */ static void unfreeze_partials(struct kmem_cache *s) { struct kmem_cache_node *n = NULL; struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); struct page *page; while ((page = c->partial)) { enum slab_modes { M_PARTIAL, M_FREE }; enum slab_modes l, m; struct page new; struct page old; c->partial = page->next; l = M_FREE; do { old.freelist = page->freelist; old.counters = page->counters; VM_BUG_ON(!old.frozen); new.counters = old.counters; new.freelist = old.freelist; new.frozen = 0; if (!new.inuse && (!n || n->nr_partial > s->min_partial)) m = M_FREE; else { struct kmem_cache_node *n2 = get_node(s, page_to_nid(page)); m = M_PARTIAL; if (n != n2) { if (n) spin_unlock(&n->list_lock); n = n2; spin_lock(&n->list_lock); } } if (l != m) { if (l == M_PARTIAL) remove_partial(n, page); else add_partial(n, page, 1); l = m; } } while (!cmpxchg_double_slab(s, page, old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")); if (m == M_FREE) { stat(s, DEACTIVATE_EMPTY); discard_slab(s, page); stat(s, FREE_SLAB); } } if (n) spin_unlock(&n->list_lock); } /* * Put a page that was just frozen (in __slab_free) into a partial page * slot if available. This is done without interrupts disabled and without * preemption disabled. The cmpxchg is racy and may put the partial page * onto a random cpus partial slot. * * If we did not find a slot then simply move all the partials to the * per node partial list. */ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) { struct page *oldpage; int pages; int pobjects; do { pages = 0; pobjects = 0; oldpage = this_cpu_read(s->cpu_slab->partial); if (oldpage) { pobjects = oldpage->pobjects; pages = oldpage->pages; if (drain && pobjects > s->cpu_partial) { unsigned long flags; /* * partial array is full. Move the existing * set to the per node partial list. */ local_irq_save(flags); unfreeze_partials(s); local_irq_restore(flags); pobjects = 0; pages = 0; } } pages++; pobjects += page->objects - page->inuse; page->pages = pages; page->pobjects = pobjects; page->next = oldpage; } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); stat(s, CPU_PARTIAL_FREE); return pobjects; } static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { stat(s, CPUSLAB_FLUSH); Loading @@ -1935,8 +2032,12 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); if (likely(c && c->page)) if (likely(c)) { if (c->page) flush_slab(s, c); unfreeze_partials(s); } } static void flush_cpu_slab(void *d) Loading Loading @@ -2027,12 +2128,39 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) } } static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_cpu **pc) { void *object; struct kmem_cache_cpu *c; struct page *page = new_slab(s, flags, node); if (page) { c = __this_cpu_ptr(s->cpu_slab); if (c->page) flush_slab(s, c); /* * No other reference to the page yet so we can * muck around with it freely without cmpxchg */ object = page->freelist; page->freelist = NULL; stat(s, ALLOC_SLAB); c->node = page_to_nid(page); c->page = page; *pc = c; } else object = NULL; return object; } /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. * * Interrupts are disabled. * * Processing is still very fast if new objects have been freed to the * regular freelist. In that case we simply take over the regular freelist * as the lockless freelist and zap the regular freelist. Loading @@ -2049,7 +2177,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void **object; struct page *page; unsigned long flags; struct page new; unsigned long counters; Loading @@ -2064,13 +2191,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c = this_cpu_ptr(s->cpu_slab); #endif /* We handle __GFP_ZERO in the caller */ gfpflags &= ~__GFP_ZERO; page = c->page; if (!page) if (!c->page) goto new_slab; redo: if (unlikely(!node_match(c, node))) { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, c); Loading @@ -2080,8 +2203,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, stat(s, ALLOC_SLOWPATH); do { object = page->freelist; counters = page->counters; object = c->page->freelist; counters = c->page->counters; new.counters = counters; VM_BUG_ON(!new.frozen); Loading @@ -2095,15 +2218,15 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, * and use them to refill the per cpu queue. */ new.inuse = page->objects; new.inuse = c->page->objects; new.frozen = object != NULL; } while (!__cmpxchg_double_slab(s, page, } while (!__cmpxchg_double_slab(s, c->page, object, counters, NULL, new.counters, "__slab_alloc")); if (unlikely(!object)) { if (!object) { c->page = NULL; stat(s, DEACTIVATE_BYPASS); goto new_slab; Loading @@ -2112,58 +2235,47 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, stat(s, ALLOC_REFILL); load_freelist: VM_BUG_ON(!page->frozen); c->freelist = get_freepointer(s, object); c->tid = next_tid(c->tid); local_irq_restore(flags); return object; new_slab: page = get_partial(s, gfpflags, node); if (page) { stat(s, ALLOC_FROM_PARTIAL); object = c->freelist; if (kmem_cache_debug(s)) goto debug; goto load_freelist; if (c->partial) { c->page = c->partial; c->partial = c->page->next; c->node = page_to_nid(c->page); stat(s, CPU_PARTIAL_ALLOC); c->freelist = NULL; goto redo; } page = new_slab(s, gfpflags, node); /* Then do expensive stuff like retrieving pages from the partial lists */ object = get_partial(s, gfpflags, node, c); if (page) { c = __this_cpu_ptr(s->cpu_slab); if (c->page) flush_slab(s, c); /* * No other reference to the page yet so we can * muck around with it freely without cmpxchg */ object = page->freelist; page->freelist = NULL; page->inuse = page->objects; if (unlikely(!object)) { stat(s, ALLOC_SLAB); c->node = page_to_nid(page); c->page = page; object = new_slab_objects(s, gfpflags, node, &c); if (kmem_cache_debug(s)) goto debug; goto load_freelist; } if (unlikely(!object)) { if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) slab_out_of_memory(s, gfpflags, node); local_irq_restore(flags); return NULL; } } debug: if (!object || !alloc_debug_processing(s, page, object, addr)) goto new_slab; if (likely(!kmem_cache_debug(s))) goto load_freelist; /* Only entered in the debug case */ if (!alloc_debug_processing(s, c->page, object, addr)) goto new_slab; /* Slab failed checks. Next slab needed */ c->freelist = get_freepointer(s, object); deactivate_slab(s, c); c->page = NULL; c->node = NUMA_NO_NODE; local_irq_restore(flags); return object; Loading Loading @@ -2333,6 +2445,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page, was_frozen = new.frozen; new.inuse--; if ((!new.inuse || !prior) && !was_frozen && !n) { if (!kmem_cache_debug(s) && !prior) /* * Slab was on no list before and will be partially empty * We can defer the list move and instead freeze it. */ new.frozen = 1; else { /* Needs to be taken off a list */ n = get_node(s, page_to_nid(page)); /* * Speculatively acquire the list_lock. Loading @@ -2343,6 +2466,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, * other processors updating the list of slabs. */ spin_lock_irqsave(&n->list_lock, flags); } } inuse = new.inuse; Loading @@ -2352,6 +2477,14 @@ static void __slab_free(struct kmem_cache *s, struct page *page, "__slab_free")); if (likely(!n)) { /* * If we just froze the page then put it onto the * per cpu partial list. */ if (new.frozen && !was_frozen) put_cpu_partial(s, page, 1); /* * The list lock was not taken therefore no list * activity can be necessary. Loading @@ -2377,7 +2510,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, */ if (unlikely(!prior)) { remove_full(s, page); add_partial(n, page, 1); add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } } Loading Loading @@ -2421,7 +2554,6 @@ static __always_inline void slab_free(struct kmem_cache *s, slab_free_hook(s, x); redo: /* * Determine the currently cpus per cpu slab. * The cpu may change afterward. However that does not matter since Loading Loading @@ -2685,7 +2817,7 @@ static void early_kmem_cache_node_alloc(int node) n = page->freelist; BUG_ON(!n); page->freelist = get_freepointer(kmem_cache_node, n); page->inuse++; page->inuse = 1; page->frozen = 0; kmem_cache_node->node[node] = n; #ifdef CONFIG_SLUB_DEBUG Loading @@ -2695,7 +2827,7 @@ static void early_kmem_cache_node_alloc(int node) init_kmem_cache_node(n, kmem_cache_node); inc_slabs_node(kmem_cache_node, node, page->objects); add_partial(n, page, 0); add_partial(n, page, DEACTIVATE_TO_HEAD); } static void free_kmem_cache_nodes(struct kmem_cache *s) Loading Loading @@ -2911,7 +3043,34 @@ static int kmem_cache_open(struct kmem_cache *s, * The larger the object size is, the more pages we want on the partial * list to avoid pounding the page allocator excessively. */ set_min_partial(s, ilog2(s->size)); set_min_partial(s, ilog2(s->size) / 2); /* * cpu_partial determined the maximum number of objects kept in the * per cpu partial lists of a processor. * * Per cpu partial lists mainly contain slabs that just have one * object freed. If they are used for allocation then they can be * filled up again with minimal effort. The slab will never hit the * per node partial lists and therefore no locking will be required. * * This setting also determines * * A) The number of objects from per cpu partial slabs dumped to the * per node list when we reach the limit. * B) The number of objects in cpu partial slabs to extract from the * per node list when we run out of per cpu objects. We only fetch 50% * to keep some capacity around for frees. */ if (s->size >= PAGE_SIZE) s->cpu_partial = 2; else if (s->size >= 1024) s->cpu_partial = 6; else if (s->size >= 256) s->cpu_partial = 13; else s->cpu_partial = 30; s->refcount = 1; #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; Loading Loading @@ -2970,13 +3129,13 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, /* * Attempt to free all partial slabs on a node. * This is called from kmem_cache_close(). We must be the last thread * using the cache and therefore we do not need to lock anymore. */ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) { unsigned long flags; struct page *page, *h; spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { remove_partial(n, page); Loading @@ -2986,7 +3145,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) "Objects remaining on kmem_cache_close()"); } } spin_unlock_irqrestore(&n->list_lock, flags); } /* Loading Loading @@ -3020,6 +3178,7 @@ void kmem_cache_destroy(struct kmem_cache *s) s->refcount--; if (!s->refcount) { list_del(&s->list); up_write(&slub_lock); if (kmem_cache_close(s)) { printk(KERN_ERR "SLUB %s: %s called for cache that " "still has objects.\n", s->name, __func__); Loading @@ -3028,7 +3187,7 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->flags & SLAB_DESTROY_BY_RCU) rcu_barrier(); sysfs_slab_remove(s); } } else up_write(&slub_lock); } EXPORT_SYMBOL(kmem_cache_destroy); Loading Loading @@ -3347,23 +3506,23 @@ int kmem_cache_shrink(struct kmem_cache *s) * list_lock. page->inuse here is the upper limit. */ list_for_each_entry_safe(page, t, &n->partial, lru) { if (!page->inuse) { remove_partial(n, page); discard_slab(s, page); } else { list_move(&page->lru, slabs_by_inuse + page->inuse); } list_move(&page->lru, slabs_by_inuse + page->inuse); if (!page->inuse) n->nr_partial--; } /* * Rebuild the partial list with the slabs filled up most * first and the least used slabs at the end. */ for (i = objects - 1; i >= 0; i--) for (i = objects - 1; i > 0; i--) list_splice(slabs_by_inuse + i, n->partial.prev); spin_unlock_irqrestore(&n->list_lock, flags); /* Release empty slabs */ list_for_each_entry_safe(page, t, slabs_by_inuse, lru) discard_slab(s, page); } kfree(slabs_by_inuse); Loading Loading @@ -4319,6 +4478,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, for_each_possible_cpu(cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); struct page *page; if (!c || c->node < 0) continue; Loading @@ -4334,6 +4494,13 @@ static ssize_t show_slab_objects(struct kmem_cache *s, total += x; nodes[c->node] += x; } page = c->partial; if (page) { x = page->pobjects; total += x; nodes[c->node] += x; } per_cpu[c->node]++; } } Loading Loading @@ -4412,11 +4579,12 @@ struct slab_attribute { }; #define SLAB_ATTR_RO(_name) \ static struct slab_attribute _name##_attr = __ATTR_RO(_name) static struct slab_attribute _name##_attr = \ __ATTR(_name, 0400, _name##_show, NULL) #define SLAB_ATTR(_name) \ static struct slab_attribute _name##_attr = \ __ATTR(_name, 0644, _name##_show, _name##_store) __ATTR(_name, 0600, _name##_show, _name##_store) static ssize_t slab_size_show(struct kmem_cache *s, char *buf) { Loading Loading @@ -4485,6 +4653,27 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, } SLAB_ATTR(min_partial); static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%u\n", s->cpu_partial); } static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, size_t length) { unsigned long objects; int err; err = strict_strtoul(buf, 10, &objects); if (err) return err; s->cpu_partial = objects; flush_all(s); return length; } SLAB_ATTR(cpu_partial); static ssize_t ctor_show(struct kmem_cache *s, char *buf) { if (!s->ctor) Loading Loading @@ -4523,6 +4712,37 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(objects_partial); static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) { int objects = 0; int pages = 0; int cpu; int len; for_each_online_cpu(cpu) { struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial; if (page) { pages += page->pages; objects += page->pobjects; } } len = sprintf(buf, "%d(%d)", objects, pages); #ifdef CONFIG_SMP for_each_online_cpu(cpu) { struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial; if (page && len < PAGE_SIZE - 20) len += sprintf(buf + len, " C%d=%d(%d)", cpu, page->pobjects, page->pages); } #endif return len + sprintf(buf + len, "\n"); } SLAB_ATTR_RO(slabs_cpu_partial); static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); Loading Loading @@ -4845,6 +5065,8 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass); STAT_ATTR(ORDER_FALLBACK, order_fallback); STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc); STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); #endif static struct attribute *slab_attrs[] = { Loading @@ -4853,6 +5075,7 @@ static struct attribute *slab_attrs[] = { &objs_per_slab_attr.attr, &order_attr.attr, &min_partial_attr.attr, &cpu_partial_attr.attr, &objects_attr.attr, &objects_partial_attr.attr, &partial_attr.attr, Loading @@ -4865,6 +5088,7 @@ static struct attribute *slab_attrs[] = { &destroy_by_rcu_attr.attr, &shrink_attr.attr, &reserved_attr.attr, &slabs_cpu_partial_attr.attr, #ifdef CONFIG_SLUB_DEBUG &total_objects_attr.attr, &slabs_attr.attr, Loading Loading @@ -4906,6 +5130,8 @@ static struct attribute *slab_attrs[] = { &order_fallback_attr.attr, &cmpxchg_double_fail_attr.attr, &cmpxchg_double_cpu_fail_attr.attr, &cpu_partial_alloc_attr.attr, &cpu_partial_free_attr.attr, #endif #ifdef CONFIG_FAILSLAB &failslab_attr.attr, Loading Loading @@ -5257,7 +5483,7 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); return 0; } module_init(slab_proc_init); Loading