slub: Invert locking and avoid slab lock (881db7fb) · Commits · e / devices / android_kernel_oneplus_sm7250

mm/slub.c

+52 −77

Original line number	Diff line number	Diff line
		@@ -2,10 +2,11 @@
		* SLUB: A slab allocator that limits cache line use instead of queuing
		* objects in per cpu and per node lists.
		*
		* The allocator synchronizes using per slab locks and only
		* uses a centralized lock to manage a pool of partial slabs.
		* The allocator synchronizes using per slab locks or atomic operatios
		* and only uses a centralized lock to manage a pool of partial slabs.
		*
		* (C) 2007 SGI, Christoph Lameter
		* (C) 2011 Linux Foundation, Christoph Lameter
		*/

		#include <linux/mm.h>
		@@ -32,15 +33,27 @@

		/*
		* Lock order:
		* 1. slab_lock(page)
		* 2. slab->list_lock
		* 1. slub_lock (Global Semaphore)
		* 2. node->list_lock
		* 3. slab_lock(page) (Only on some arches and for debugging)
		*
		* The slab_lock protects operations on the object of a particular
		* slab and its metadata in the page struct. If the slab lock
		* has been taken then no allocations nor frees can be performed
		* on the objects in the slab nor can the slab be added or removed
		* from the partial or full lists since this would mean modifying
		* the page_struct of the slab.
		* slub_lock
		*
		* The role of the slub_lock is to protect the list of all the slabs
		* and to synchronize major metadata changes to slab cache structures.
		*
		* The slab_lock is only used for debugging and on arches that do not
		* have the ability to do a cmpxchg_double. It only protects the second
		* double word in the page struct. Meaning
		* A. page->freelist -> List of object free in a page
		* B. page->counters -> Counters of objects
		* C. page->frozen -> frozen state
		*
		* If a slab is frozen then it is exempt from list management. It is not
		* on any list. The processor that froze the slab is the one who can
		* perform list operations on the page. Other processors may put objects
		* onto the freelist but the processor that froze the slab is the only
		* one that can retrieve the objects from the page's freelist.
		*
		* The list_lock protects the partial and full list on each node and
		* the partial slab counter. If taken then no new slabs may be added or
		@@ -53,20 +66,6 @@
		* slabs, operations can continue without any centralized lock. F.e.
		* allocating a long series of objects that fill up slabs does not require
		* the list lock.
		*
		* The lock order is sometimes inverted when we are trying to get a slab
		* off a list. We take the list_lock and then look for a page on the list
		* to use. While we do that objects in the slabs may be freed. We can
		* only operate on the slab if we have also taken the slab_lock. So we use
		* a slab_trylock() on the slab. If trylock was successful then no frees
		* can occur anymore and we can use the slab for allocations etc. If the
		* slab_trylock() does not succeed then frees are in progress in the slab and
		* we must stay away from it for a while since we may cause a bouncing
		* cacheline if we try to acquire the lock. So go onto the next slab.
		* If all pages are busy then we may allocate a new slab instead of reusing
		* a partial slab. A new slab has no one operating on it and thus there is
		* no danger of cacheline contention.
		*
		* Interrupts are disabled during allocation and deallocation in order to
		* make the slab allocator safe to use in the context of an irq. In addition
		* interrupts are disabled to ensure that the processor does not change
		@@ -342,6 +341,19 @@ static inline int oo_objects(struct kmem_cache_order_objects x)
		return x.x & OO_MASK;
		}

		/*
		* Per slab locking using the pagelock
		*/
		static __always_inline void slab_lock(struct page *page)
		{
		bit_spin_lock(PG_locked, &page->flags);
		}

		static __always_inline void slab_unlock(struct page *page)
		{
		__bit_spin_unlock(PG_locked, &page->flags);
		}

		static inline bool cmpxchg_double_slab(struct kmem_cache s, struct page page,
		void *freelist_old, unsigned long counters_old,
		void *freelist_new, unsigned long counters_new,
		@@ -356,11 +368,14 @@ static inline bool cmpxchg_double_slab(struct kmem_cache s, struct page page,
		} else
		#endif
		{
		slab_lock(page);
		if (page->freelist == freelist_old && page->counters == counters_old) {
		page->freelist = freelist_new;
		page->counters = counters_new;
		slab_unlock(page);
		return 1;
		}
		slab_unlock(page);
		}

		cpu_relax();
		@@ -377,7 +392,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache s, struct page page,
		/*
		* Determine a map of object in use on a page.
		*
		* Slab lock or node listlock must be held to guarantee that the page does
		* Node listlock must be held to guarantee that the page does
		* not vanish from under us.
		*/
		static void get_map(struct kmem_cache s, struct page page, unsigned long *map)
		@@ -808,10 +823,11 @@ static int check_slab(struct kmem_cache s, struct page page)
		static int on_freelist(struct kmem_cache s, struct page page, void *search)
		{
		int nr = 0;
		void *fp = page->freelist;
		void *fp;
		void *object = NULL;
		unsigned long max_objects;

		fp = page->freelist;
		while (fp && nr <= page->objects) {
		if (fp == search)
		return 1;
		@@ -1024,6 +1040,8 @@ static noinline int alloc_debug_processing(struct kmem_cache s, struct page pa
		static noinline int free_debug_processing(struct kmem_cache *s,
		struct page page, void object, unsigned long addr)
		{
		slab_lock(page);

		if (!check_slab(s, page))
		goto fail;

		@@ -1059,10 +1077,12 @@ static noinline int free_debug_processing(struct kmem_cache *s,
		set_track(s, object, TRACK_FREE, addr);
		trace(s, page, object, 0);
		init_object(s, object, SLUB_RED_INACTIVE);
		slab_unlock(page);
		return 1;

		fail:
		slab_fix(s, "Object at 0x%p not freed", object);
		slab_unlock(page);
		return 0;
		}

		@@ -1393,27 +1413,6 @@ static void discard_slab(struct kmem_cache s, struct page page)
		free_slab(s, page);
		}

		/*
		* Per slab locking using the pagelock
		*/
		static __always_inline void slab_lock(struct page *page)
		{
		bit_spin_lock(PG_locked, &page->flags);
		}

		static __always_inline void slab_unlock(struct page *page)
		{
		__bit_spin_unlock(PG_locked, &page->flags);
		}

		static __always_inline int slab_trylock(struct page *page)
		{
		int rc = 1;

		rc = bit_spin_trylock(PG_locked, &page->flags);
		return rc;
		}

		/*
		* Management of partially allocated slabs.
		*
		@@ -1445,17 +1444,13 @@ static inline void remove_partial(struct kmem_cache_node *n,
		*
		* Must hold list_lock.
		*/
		static inline int lock_and_freeze_slab(struct kmem_cache *s,
		static inline int acquire_slab(struct kmem_cache *s,
		struct kmem_cache_node n, struct page page)
		{
		void *freelist;
		unsigned long counters;
		struct page new;


		if (!slab_trylock(page))
		return 0;

		/*
		* Zap the freelist and set the frozen bit.
		* The old freelist is the list of objects for the
		@@ -1491,7 +1486,6 @@ static inline int lock_and_freeze_slab(struct kmem_cache *s,
		*/
		printk(KERN_ERR "SLUB: %s : Page without available objects on"
		" partial list\n", s->name);
		slab_unlock(page);
		return 0;
		}
		}
		@@ -1515,7 +1509,7 @@ static struct page get_partial_node(struct kmem_cache s,

		spin_lock(&n->list_lock);
		list_for_each_entry(page, &n->partial, lru)
		if (lock_and_freeze_slab(s, n, page))
		if (acquire_slab(s, n, page))
		goto out;
		page = NULL;
		out:
		@@ -1804,8 +1798,6 @@ static void deactivate_slab(struct kmem_cache s, struct kmem_cache_cpu c)
		"unfreezing slab"))
		goto redo;

		slab_unlock(page);

		if (lock)
		spin_unlock(&n->list_lock);

		@@ -1819,7 +1811,6 @@ static void deactivate_slab(struct kmem_cache s, struct kmem_cache_cpu c)
		static inline void flush_slab(struct kmem_cache s, struct kmem_cache_cpu c)
		{
		stat(s, CPUSLAB_FLUSH);
		slab_lock(c->page);
		deactivate_slab(s, c);
		}

		@@ -1968,7 +1959,6 @@ static void __slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
		if (!page)
		goto new_slab;

		slab_lock(page);
		if (unlikely(!node_match(c, node)))
		goto another_slab;

		@@ -1994,8 +1984,6 @@ static void __slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,

		stat(s, ALLOC_REFILL);

		slab_unlock(page);

		c->freelist = get_freepointer(s, object);
		c->tid = next_tid(c->tid);
		local_irq_restore(flags);
		@@ -2031,7 +2019,6 @@ static void __slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
		page->inuse = page->objects;

		stat(s, ALLOC_SLAB);
		slab_lock(page);
		c->node = page_to_nid(page);
		c->page = page;
		goto load_freelist;
		@@ -2205,7 +2192,6 @@ static void __slab_free(struct kmem_cache s, struct page page,
		unsigned long uninitialized_var(flags);

		local_irq_save(flags);
		slab_lock(page);
		stat(s, FREE_SLOWPATH);

		if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
		@@ -2271,7 +2257,6 @@ static void __slab_free(struct kmem_cache s, struct page page,
		spin_unlock(&n->list_lock);

		out_unlock:
		slab_unlock(page);
		local_irq_restore(flags);
		return;

		@@ -2285,7 +2270,6 @@ static void __slab_free(struct kmem_cache s, struct page page,
		}

		spin_unlock(&n->list_lock);
		slab_unlock(page);
		local_irq_restore(flags);
		stat(s, FREE_SLAB);
		discard_slab(s, page);
		@@ -3202,14 +3186,8 @@ int kmem_cache_shrink(struct kmem_cache *s)
		* list_lock. page->inuse here is the upper limit.
		*/
		list_for_each_entry_safe(page, t, &n->partial, lru) {
		if (!page->inuse && slab_trylock(page)) {
		/*
		* Must hold slab lock here because slab_free
		* may have freed the last object and be
		* waiting to release the slab.
		*/
		if (!page->inuse) {
		remove_partial(n, page);
		slab_unlock(page);
		discard_slab(s, page);
		} else {
		list_move(&page->lru,
		@@ -3797,12 +3775,9 @@ static int validate_slab(struct kmem_cache s, struct page page,
		static void validate_slab_slab(struct kmem_cache s, struct page page,
		unsigned long *map)
		{
		if (slab_trylock(page)) {
		slab_lock(page);
		validate_slab(s, page, map);
		slab_unlock(page);
		} else
		printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
		s->name, page);
		}

		static int validate_slab_node(struct kmem_cache *s,