Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 676165a8 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds
Browse files

[PATCH] Fix buddy list race that could lead to page lru list corruptions



Rohit found an obscure bug causing buddy list corruption.

page_is_buddy is using a non-atomic test (PagePrivate && page_count == 0)
to determine whether or not a free page's buddy is itself free and in the
buddy lists.

Each of the conjuncts may be true at different times due to unrelated
conditions, so the non-atomic page_is_buddy test may find each conjunct to
be true even if they were not both true at the same time (ie. the page was
not on the buddy lists).

Signed-off-by: default avatarMartin Bligh <mbligh@google.com>
Signed-off-by: default avatarRohit Seth <rohitseth@google.com>
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent c3a9d654
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -229,10 +229,9 @@ struct page {
		unsigned long private;		/* Mapping-private opaque data:
					 	 * usually used for buffer_heads
						 * if PagePrivate set; used for
						 * swp_entry_t if PageSwapCache.
						 * When page is free, this
						 * swp_entry_t if PageSwapCache;
						 * indicates order in the buddy
						 * system.
						 * system if PG_buddy is set.
						 */
		struct address_space *mapping;	/* If low bit clear, points to
						 * inode address_space, or NULL.
+7 −1
Original line number Diff line number Diff line
@@ -74,7 +74,9 @@
#define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
#define PG_reclaim		17	/* To be reclaimed asap */
#define PG_nosave_free		18	/* Free, should not be written */
#define PG_uncached		19	/* Page has been mapped as uncached */
#define PG_buddy		19	/* Page is free, on buddy lists */

#define PG_uncached		20	/* Page has been mapped as uncached */

/*
 * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -317,6 +319,10 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
#define SetPageNosaveFree(page)	set_bit(PG_nosave_free, &(page)->flags)
#define ClearPageNosaveFree(page)		clear_bit(PG_nosave_free, &(page)->flags)

#define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
#define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
#define __ClearPageBuddy(page)	__clear_bit(PG_buddy, &(page)->flags)

#define PageMappedToDisk(page)	test_bit(PG_mappedtodisk, &(page)->flags)
#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
+18 −13
Original line number Diff line number Diff line
@@ -151,7 +151,8 @@ static void bad_page(struct page *page)
			1 << PG_reclaim |
			1 << PG_slab    |
			1 << PG_swapcache |
			1 << PG_writeback );
			1 << PG_writeback |
			1 << PG_buddy );
	set_page_count(page, 0);
	reset_page_mapcount(page);
	page->mapping = NULL;
@@ -236,12 +237,12 @@ static inline unsigned long page_order(struct page *page) {

static inline void set_page_order(struct page *page, int order) {
	set_page_private(page, order);
	__SetPagePrivate(page);
	__SetPageBuddy(page);
}

static inline void rmv_page_order(struct page *page)
{
	__ClearPagePrivate(page);
	__ClearPageBuddy(page);
	set_page_private(page, 0);
}

@@ -280,11 +281,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
 * This function checks whether a page is free && is the buddy
 * we can do coalesce a page and its buddy if
 * (a) the buddy is not in a hole &&
 * (b) the buddy is free &&
 * (c) the buddy is on the buddy system &&
 * (d) a page and its buddy have the same order.
 * for recording page's order, we use page_private(page) and PG_private.
 * (b) the buddy is in the buddy system &&
 * (c) a page and its buddy have the same order.
 *
 * For recording whether a page is in the buddy system, we use PG_buddy.
 * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
 *
 * For recording page's order, we use page_private(page).
 */
static inline int page_is_buddy(struct page *page, int order)
{
@@ -293,10 +296,10 @@ static inline int page_is_buddy(struct page *page, int order)
		return 0;
#endif

       if (PagePrivate(page)           &&
           (page_order(page) == order) &&
            page_count(page) == 0)
	if (PageBuddy(page) && page_order(page) == order) {
		BUG_ON(page_count(page) != 0);
               return 1;
	}
       return 0;
}

@@ -313,7 +316,7 @@ static inline int page_is_buddy(struct page *page, int order)
 * as necessary, plus some accounting needed to play nicely with other
 * parts of the VM system.
 * At each level, we keep a list of pages, which are heads of continuous
 * free pages of length of (1 << order) and marked with PG_Private.Page's
 * free pages of length of (1 << order) and marked with PG_buddy. Page's
 * order is recorded in page_private(page) field.
 * So when we are allocating or freeing one, we can derive the state of the
 * other.  That is, if we allocate a small block, and both were   
@@ -376,7 +379,8 @@ static inline int free_pages_check(struct page *page)
			1 << PG_slab	|
			1 << PG_swapcache |
			1 << PG_writeback |
			1 << PG_reserved ))))
			1 << PG_reserved |
			1 << PG_buddy ))))
		bad_page(page);
	if (PageDirty(page))
		__ClearPageDirty(page);
@@ -524,7 +528,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
			1 << PG_slab    |
			1 << PG_swapcache |
			1 << PG_writeback |
			1 << PG_reserved ))))
			1 << PG_reserved |
			1 << PG_buddy ))))
		bad_page(page);

	/*