hugetlb: balance freeing of huge pages across nodes (e8c5c824) · Commits · e / devices / android_kernel_teracube_2e

include/linux/hugetlb.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -185,7 +185,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
		#define HSTATE_NAME_LEN 32
		/* Defines one hugetlb page size */
		struct hstate {
		int hugetlb_next_nid;
		int next_nid_to_alloc;
		int next_nid_to_free;
		unsigned int order;
		unsigned long mask;
		unsigned long max_huge_pages;

mm/hugetlb.c

+86 −46

Original line number	Diff line number	Diff line
		@@ -456,24 +456,6 @@ static void enqueue_huge_page(struct hstate h, struct page page)
		h->free_huge_pages_node[nid]++;
		}

		static struct page dequeue_huge_page(struct hstate h)
		{
		int nid;
		struct page *page = NULL;

		for (nid = 0; nid < MAX_NUMNODES; ++nid) {
		if (!list_empty(&h->hugepage_freelists[nid])) {
		page = list_entry(h->hugepage_freelists[nid].next,
		struct page, lru);
		list_del(&page->lru);
		h->free_huge_pages--;
		h->free_huge_pages_node[nid]--;
		break;
		}
		}
		return page;
		}

		static struct page dequeue_huge_page_vma(struct hstate h,
		struct vm_area_struct *vma,
		unsigned long address, int avoid_reserve)
		@@ -641,7 +623,7 @@ static struct page alloc_fresh_huge_page_node(struct hstate h, int nid)

		/*
		* Use a helper variable to find the next node and then
		* copy it back to hugetlb_next_nid afterwards:
		* copy it back to next_nid_to_alloc afterwards:
		* otherwise there's a window in which a racer might
		* pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
		* But we don't need to use a spin_lock here: it really
		@@ -650,13 +632,13 @@ static struct page alloc_fresh_huge_page_node(struct hstate h, int nid)
		* if we just successfully allocated a hugepage so that
		* the next caller gets hugepages on the next node.
		*/
		static int hstate_next_node(struct hstate *h)
		static int hstate_next_node_to_alloc(struct hstate *h)
		{
		int next_nid;
		next_nid = next_node(h->hugetlb_next_nid, node_online_map);
		next_nid = next_node(h->next_nid_to_alloc, node_online_map);
		if (next_nid == MAX_NUMNODES)
		next_nid = first_node(node_online_map);
		h->hugetlb_next_nid = next_nid;
		h->next_nid_to_alloc = next_nid;
		return next_nid;
		}

		@@ -667,14 +649,15 @@ static int alloc_fresh_huge_page(struct hstate *h)
		int next_nid;
		int ret = 0;

		start_nid = h->hugetlb_next_nid;
		start_nid = h->next_nid_to_alloc;
		next_nid = start_nid;

		do {
		page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
		page = alloc_fresh_huge_page_node(h, next_nid);
		if (page)
		ret = 1;
		next_nid = hstate_next_node(h);
		} while (!page && h->hugetlb_next_nid != start_nid);
		next_nid = hstate_next_node_to_alloc(h);
		} while (!page && next_nid != start_nid);

		if (ret)
		count_vm_event(HTLB_BUDDY_PGALLOC);
		@@ -684,6 +667,52 @@ static int alloc_fresh_huge_page(struct hstate *h)
		return ret;
		}

		/*
		* helper for free_pool_huge_page() - find next node
		* from which to free a huge page
		*/
		static int hstate_next_node_to_free(struct hstate *h)
		{
		int next_nid;
		next_nid = next_node(h->next_nid_to_free, node_online_map);
		if (next_nid == MAX_NUMNODES)
		next_nid = first_node(node_online_map);
		h->next_nid_to_free = next_nid;
		return next_nid;
		}

		/*
		* Free huge page from pool from next node to free.
		* Attempt to keep persistent huge pages more or less
		* balanced over allowed nodes.
		* Called with hugetlb_lock locked.
		*/
		static int free_pool_huge_page(struct hstate *h)
		{
		int start_nid;
		int next_nid;
		int ret = 0;

		start_nid = h->next_nid_to_free;
		next_nid = start_nid;

		do {
		if (!list_empty(&h->hugepage_freelists[next_nid])) {
		struct page *page =
		list_entry(h->hugepage_freelists[next_nid].next,
		struct page, lru);
		list_del(&page->lru);
		h->free_huge_pages--;
		h->free_huge_pages_node[next_nid]--;
		update_and_free_page(h, page);
		ret = 1;
		}
		next_nid = hstate_next_node_to_free(h);
		} while (!ret && next_nid != start_nid);

		return ret;
		}

		static struct page alloc_buddy_huge_page(struct hstate h,
		struct vm_area_struct *vma, unsigned long address)
		{
		@@ -1008,7 +1037,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
		void *addr;

		addr = __alloc_bootmem_node_nopanic(
		NODE_DATA(h->hugetlb_next_nid),
		NODE_DATA(h->next_nid_to_alloc),
		huge_page_size(h), huge_page_size(h), 0);

		if (addr) {
		@@ -1020,7 +1049,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
		m = addr;
		goto found;
		}
		hstate_next_node(h);
		hstate_next_node_to_alloc(h);
		nr_nodes--;
		}
		return 0;
		@@ -1141,31 +1170,43 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
		*/
		static int adjust_pool_surplus(struct hstate *h, int delta)
		{
		static int prev_nid;
		int nid = prev_nid;
		int start_nid, next_nid;
		int ret = 0;

		VM_BUG_ON(delta != -1 && delta != 1);
		do {
		nid = next_node(nid, node_online_map);
		if (nid == MAX_NUMNODES)
		nid = first_node(node_online_map);

		/* To shrink on this node, there must be a surplus page */
		if (delta < 0 && !h->surplus_huge_pages_node[nid])
		if (delta < 0)
		start_nid = h->next_nid_to_alloc;
		else
		start_nid = h->next_nid_to_free;
		next_nid = start_nid;

		do {
		int nid = next_nid;
		if (delta < 0) {
		next_nid = hstate_next_node_to_alloc(h);
		/*
		* To shrink on this node, there must be a surplus page
		*/
		if (!h->surplus_huge_pages_node[nid])
		continue;
		/* Surplus cannot exceed the total number of pages */
		if (delta > 0 && h->surplus_huge_pages_node[nid] >=
		}
		if (delta > 0) {
		next_nid = hstate_next_node_to_free(h);
		/*
		* Surplus cannot exceed the total number of pages
		*/
		if (h->surplus_huge_pages_node[nid] >=
		h->nr_huge_pages_node[nid])
		continue;
		}

		h->surplus_huge_pages += delta;
		h->surplus_huge_pages_node[nid] += delta;
		ret = 1;
		break;
		} while (nid != prev_nid);
		} while (next_nid != start_nid);

		prev_nid = nid;
		return ret;
		}

		@@ -1227,10 +1268,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
		min_count = max(count, min_count);
		try_to_free_low(h, min_count);
		while (min_count < persistent_huge_pages(h)) {
		struct page *page = dequeue_huge_page(h);
		if (!page)
		if (!free_pool_huge_page(h))
		break;
		update_and_free_page(h, page);
		}
		while (count < persistent_huge_pages(h)) {
		if (!adjust_pool_surplus(h, 1))
		@@ -1442,7 +1481,8 @@ void __init hugetlb_add_hstate(unsigned order)
		h->free_huge_pages = 0;
		for (i = 0; i < MAX_NUMNODES; ++i)
		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
		h->hugetlb_next_nid = first_node(node_online_map);
		h->next_nid_to_alloc = first_node(node_online_map);
		h->next_nid_to_free = first_node(node_online_map);
		snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
		huge_page_size(h)/1024);