Drivers: hv: balloon: Implement hot-add functionality (1cac8cd4) · Commits · e / devices / android_kernel_fairphone_FP4

drivers/hv/hv_balloon.c

+387 −21

Original line number	Original line	Diff line number	Diff line
	@@ -412,6 +412,27 @@ struct dm_info_msg {
	* End protocol definitions.		* End protocol definitions.
	*/		*/

			/*
			* State to manage hot adding memory into the guest.
			* The range start_pfn : end_pfn specifies the range
			* that the host has asked us to hot add. The range
			* start_pfn : ha_end_pfn specifies the range that we have
			* currently hot added. We hot add in multiples of 128M
			* chunks; it is possible that we may not be able to bring
			* online all the pages in the region. The range
			* covered_start_pfn : covered_end_pfn defines the pages that can
			* be brough online.
			*/

			struct hv_hotadd_state {
			struct list_head list;
			unsigned long start_pfn;
			unsigned long covered_start_pfn;
			unsigned long covered_end_pfn;
			unsigned long ha_end_pfn;
			unsigned long end_pfn;
			};

	struct balloon_state {		struct balloon_state {
	__u32 num_pages;		__u32 num_pages;
	struct work_struct wrk;		struct work_struct wrk;
	@@ -419,16 +440,17 @@ struct balloon_state {

	struct hot_add_wrk {		struct hot_add_wrk {
	union dm_mem_page_range ha_page_range;		union dm_mem_page_range ha_page_range;
			union dm_mem_page_range ha_region_range;
	struct work_struct wrk;		struct work_struct wrk;
	};		};

	static bool hot_add;		static bool hot_add = true;
	static bool do_hot_add;		static bool do_hot_add;
	/*		/*
	* Delay reporting memory pressure by		* Delay reporting memory pressure by
	* the specified number of seconds.		* the specified number of seconds.
	*/		*/
	static uint pressure_report_delay = 30;		static uint pressure_report_delay = 45;

	module_param(hot_add, bool, (S_IRUGO \| S_IWUSR));		module_param(hot_add, bool, (S_IRUGO \| S_IWUSR));
	MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");		MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
	@@ -456,6 +478,7 @@ enum hv_dm_state {
	static __u8 recv_buffer[PAGE_SIZE];		static __u8 recv_buffer[PAGE_SIZE];
	static __u8 *send_buffer;		static __u8 *send_buffer;
	#define PAGES_IN_2M 512		#define PAGES_IN_2M 512
			#define HA_CHUNK (32 * 1024)

	struct hv_dynmem_device {		struct hv_dynmem_device {
	struct hv_device *dev;		struct hv_device *dev;
	@@ -478,6 +501,17 @@ struct hv_dynmem_device {
	*/		*/
	struct hot_add_wrk ha_wrk;		struct hot_add_wrk ha_wrk;

			/*
			* This state tracks if the host has specified a hot-add
			* region.
			*/
			bool host_specified_ha_region;

			/*
			* State to synchronize hot-add.
			*/
			struct completion ol_waitevent;
			bool ha_waiting;
	/*		/*
	* This thread handles hot-add		* This thread handles hot-add
	* requests from the host as well as notifying		* requests from the host as well as notifying
	@@ -486,6 +520,11 @@ struct hv_dynmem_device {
	*/		*/
	struct task_struct *thread;		struct task_struct *thread;

			/*
			* A list of hot-add regions.
			*/
			struct list_head ha_region_list;

	/*		/*
	* We start with the highest version we can support		* We start with the highest version we can support
	* and downgrade based on the host; we save here the		* and downgrade based on the host; we save here the
	@@ -496,35 +535,329 @@ struct hv_dynmem_device {

	static struct hv_dynmem_device dm_device;		static struct hv_dynmem_device dm_device;

	static void hot_add_req(struct work_struct *dummy)		#ifdef CONFIG_MEMORY_HOTPLUG

			void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
	{		{
			int i;

	struct dm_hot_add_response resp;		for (i = 0; i < size; i++) {
			struct page *pg;
			pg = pfn_to_page(start_pfn + i);
			__online_page_set_limits(pg);
			__online_page_increment_counters(pg);
			__online_page_free(pg);
			}
			}

			static void hv_mem_hot_add(unsigned long start, unsigned long size,
			unsigned long pfn_count,
			struct hv_hotadd_state *has)
			{
			int ret = 0;
			int i, nid, t;
			unsigned long start_pfn;
			unsigned long processed_pfn;
			unsigned long total_pfn = pfn_count;

			for (i = 0; i < (size/HA_CHUNK); i++) {
			start_pfn = start + (i * HA_CHUNK);
			has->ha_end_pfn += HA_CHUNK;

			if (total_pfn > HA_CHUNK) {
			processed_pfn = HA_CHUNK;
			total_pfn -= HA_CHUNK;
			} else {
			processed_pfn = total_pfn;
			total_pfn = 0;
			}

			has->covered_end_pfn += processed_pfn;

			init_completion(&dm_device.ol_waitevent);
			dm_device.ha_waiting = true;

			nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
			ret = add_memory(nid, PFN_PHYS((start_pfn)),
			(HA_CHUNK << PAGE_SHIFT));

			if (ret) {
			pr_info("hot_add memory failed error is %d\n", ret);
			has->ha_end_pfn -= HA_CHUNK;
			has->covered_end_pfn -= processed_pfn;
			break;
			}

			/*
			* Wait for the memory block to be onlined.
			*/
			t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
			if (t == 0) {
			pr_info("hot_add memory timedout\n");
			has->ha_end_pfn -= HA_CHUNK;
			has->covered_end_pfn -= processed_pfn;
			break;
			}

	if (do_hot_add) {		}

	pr_info("Memory hot add not supported\n");		return;
			}

			static void hv_online_page(struct page *pg)
			{
			struct list_head *cur;
			struct hv_hotadd_state *has;
			unsigned long cur_start_pgp;
			unsigned long cur_end_pgp;

			if (dm_device.ha_waiting) {
			dm_device.ha_waiting = false;
			complete(&dm_device.ol_waitevent);
			}

			list_for_each(cur, &dm_device.ha_region_list) {
			has = list_entry(cur, struct hv_hotadd_state, list);
			cur_start_pgp = (unsigned long)
			pfn_to_page(has->covered_start_pfn);
			cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);

			if (((unsigned long)pg >= cur_start_pgp) &&
			((unsigned long)pg < cur_end_pgp)) {
	/*		/*
	* Currently we do not support hot add.		* This frame is currently backed; online the
	* Just fail the request.		* page.
	*/		*/
			__online_page_set_limits(pg);
			__online_page_increment_counters(pg);
			__online_page_free(pg);
			has->covered_start_pfn++;
			}
	}		}
			}

			static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
			{
			struct list_head *cur;
			struct hv_hotadd_state *has;
			unsigned long residual, new_inc;

			if (list_empty(&dm_device.ha_region_list))
			return false;

			list_for_each(cur, &dm_device.ha_region_list) {
			has = list_entry(cur, struct hv_hotadd_state, list);

			/*
			* If the pfn range we are dealing with is not in the current
			* "hot add block", move on.
			*/
			if ((start_pfn >= has->end_pfn))
			continue;
			/*
			* If the current hot add-request extends beyond
			* our current limit; extend it.
			*/
			if ((start_pfn + pfn_cnt) > has->end_pfn) {
			residual = (start_pfn + pfn_cnt - has->end_pfn);
			/*
			* Extend the region by multiples of HA_CHUNK.
			*/
			new_inc = (residual / HA_CHUNK) * HA_CHUNK;
			if (residual % HA_CHUNK)
			new_inc += HA_CHUNK;

			has->end_pfn += new_inc;
			}

			/*
			* If the current start pfn is not where the covered_end
			* is, update it.
			*/

			if (has->covered_end_pfn != start_pfn) {
			has->covered_end_pfn = start_pfn;
			has->covered_start_pfn = start_pfn;
			}
			return true;

			}

			return false;
			}

			static unsigned long handle_pg_range(unsigned long pg_start,
			unsigned long pg_count)
			{
			unsigned long start_pfn = pg_start;
			unsigned long pfn_cnt = pg_count;
			unsigned long size;
			struct list_head *cur;
			struct hv_hotadd_state *has;
			unsigned long pgs_ol = 0;
			unsigned long old_covered_state;

			if (list_empty(&dm_device.ha_region_list))
			return 0;

			list_for_each(cur, &dm_device.ha_region_list) {
			has = list_entry(cur, struct hv_hotadd_state, list);

			/*
			* If the pfn range we are dealing with is not in the current
			* "hot add block", move on.
			*/
			if ((start_pfn >= has->end_pfn))
			continue;

			old_covered_state = has->covered_end_pfn;

			if (start_pfn < has->ha_end_pfn) {
			/*
			* This is the case where we are backing pages
			* in an already hot added region. Bring
			* these pages online first.
			*/
			pgs_ol = has->ha_end_pfn - start_pfn;
			if (pgs_ol > pfn_cnt)
			pgs_ol = pfn_cnt;
			hv_bring_pgs_online(start_pfn, pgs_ol);
			has->covered_end_pfn += pgs_ol;
			has->covered_start_pfn += pgs_ol;
			pfn_cnt -= pgs_ol;
			}

			if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
			/*
			* We have some residual hot add range
			* that needs to be hot added; hot add
			* it now. Hot add a multiple of
			* of HA_CHUNK that fully covers the pages
			* we have.
			*/
			size = (has->end_pfn - has->ha_end_pfn);
			if (pfn_cnt <= size) {
			size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK);
			if (pfn_cnt % HA_CHUNK)
			size += HA_CHUNK;
			} else {
			pfn_cnt = size;
			}
			hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
			}
			/*
			* If we managed to online any pages that were given to us,
			* we declare success.
			*/
			return has->covered_end_pfn - old_covered_state;

			}

			return 0;
			}

			static unsigned long process_hot_add(unsigned long pg_start,
			unsigned long pfn_cnt,
			unsigned long rg_start,
			unsigned long rg_size)
			{
			struct hv_hotadd_state *ha_region = NULL;

			if (pfn_cnt == 0)
			return 0;

			if (!dm_device.host_specified_ha_region)
			if (pfn_covered(pg_start, pfn_cnt))
			goto do_pg_range;

			/*
			* If the host has specified a hot-add range; deal with it first.
			*/

			if ((rg_size != 0) && (!dm_device.host_specified_ha_region)) {
			ha_region = kzalloc(sizeof(struct hv_hotadd_state), GFP_KERNEL);
			if (!ha_region)
			return 0;

			INIT_LIST_HEAD(&ha_region->list);

			list_add_tail(&ha_region->list, &dm_device.ha_region_list);
			ha_region->start_pfn = rg_start;
			ha_region->ha_end_pfn = rg_start;
			ha_region->covered_start_pfn = pg_start;
			ha_region->covered_end_pfn = pg_start;
			ha_region->end_pfn = rg_start + rg_size;
			}

			do_pg_range:
			/*
			* Process the page range specified; bringing them
			* online if possible.
			*/
			return handle_pg_range(pg_start, pfn_cnt);
			}

			#endif

			static void hot_add_req(struct work_struct *dummy)
			{
			struct dm_hot_add_response resp;
			#ifdef CONFIG_MEMORY_HOTPLUG
			unsigned long pg_start, pfn_cnt;
			unsigned long rg_start, rg_sz;
			#endif
			struct hv_dynmem_device *dm = &dm_device;

	memset(&resp, 0, sizeof(struct dm_hot_add_response));		memset(&resp, 0, sizeof(struct dm_hot_add_response));
	resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;		resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;
	resp.hdr.size = sizeof(struct dm_hot_add_response);		resp.hdr.size = sizeof(struct dm_hot_add_response);
	resp.hdr.trans_id = atomic_inc_return(&trans_id);		resp.hdr.trans_id = atomic_inc_return(&trans_id);

	resp.page_count = 0;		#ifdef CONFIG_MEMORY_HOTPLUG
			pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
			pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;

			rg_start = dm->ha_wrk.ha_region_range.finfo.start_page;
			rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt;

			if ((rg_start == 0) && (!dm->host_specified_ha_region)) {
			unsigned long region_size;
			unsigned long region_start;

			/*
			* The host has not specified the hot-add region.
			* Based on the hot-add page range being specified,
			* compute a hot-add region that can cover the pages
			* that need to be hot-added while ensuring the alignment
			* and size requirements of Linux as it relates to hot-add.
			*/
			region_start = pg_start;
			region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
			if (pfn_cnt % HA_CHUNK)
			region_size += HA_CHUNK;

			region_start = (pg_start / HA_CHUNK) * HA_CHUNK;

			rg_start = region_start;
			rg_sz = region_size;
			}

			resp.page_count = process_hot_add(pg_start, pfn_cnt,
			rg_start, rg_sz);
			#endif
			if (resp.page_count > 0)
			resp.result = 1;
			else
	resp.result = 0;		resp.result = 0;

	dm_device.state = DM_INITIALIZED;		if (!do_hot_add \|\| (resp.page_count == 0))
	vmbus_sendpacket(dm_device.dev->channel, &resp,		pr_info("Memory hot add failed\n");

			dm->state = DM_INITIALIZED;
			vmbus_sendpacket(dm->dev->channel, &resp,
	sizeof(struct dm_hot_add_response),		sizeof(struct dm_hot_add_response),
	(unsigned long)NULL,		(unsigned long)NULL,
	VM_PKT_DATA_INBAND, 0);		VM_PKT_DATA_INBAND, 0);

	}		}

	static void process_info(struct hv_dynmem_device dm, struct dm_info_msg msg)		static void process_info(struct hv_dynmem_device dm, struct dm_info_msg msg)
	@@ -867,6 +1200,7 @@ static void balloon_onchannelcallback(void *context)
	struct dm_balloon *bal_msg;		struct dm_balloon *bal_msg;
	struct dm_hot_add *ha_msg;		struct dm_hot_add *ha_msg;
	union dm_mem_page_range *ha_pg_range;		union dm_mem_page_range *ha_pg_range;
			union dm_mem_page_range *ha_region;

	memset(recv_buffer, 0, sizeof(recv_buffer));		memset(recv_buffer, 0, sizeof(recv_buffer));
	vmbus_recvpacket(dev->channel, recv_buffer,		vmbus_recvpacket(dev->channel, recv_buffer,
	@@ -907,8 +1241,26 @@ static void balloon_onchannelcallback(void *context)
	pr_warn("Currently hot-adding\n");		pr_warn("Currently hot-adding\n");
	dm->state = DM_HOT_ADD;		dm->state = DM_HOT_ADD;
	ha_msg = (struct dm_hot_add *)recv_buffer;		ha_msg = (struct dm_hot_add *)recv_buffer;
			if (ha_msg->hdr.size == sizeof(struct dm_hot_add)) {
			/*
			* This is a normal hot-add request specifying
			* hot-add memory.
			*/
	ha_pg_range = &ha_msg->range;		ha_pg_range = &ha_msg->range;
	dm_device.ha_wrk.ha_page_range = *ha_pg_range;		dm->ha_wrk.ha_page_range = *ha_pg_range;
			dm->ha_wrk.ha_region_range.page_range = 0;
			} else {
			/*
			* Host is specifying that we first hot-add
			* a region and then partially populate this
			* region.
			*/
			dm->host_specified_ha_region = true;
			ha_pg_range = &ha_msg->range;
			ha_region = &ha_pg_range[1];
			dm->ha_wrk.ha_page_range = *ha_pg_range;
			dm->ha_wrk.ha_region_range = *ha_region;
			}
	schedule_work(&dm_device.ha_wrk.wrk);		schedule_work(&dm_device.ha_wrk.wrk);
	break;		break;

	@@ -952,8 +1304,10 @@ static int balloon_probe(struct hv_device *dev,
	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;		dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
	init_completion(&dm_device.host_event);		init_completion(&dm_device.host_event);
	init_completion(&dm_device.config_event);		init_completion(&dm_device.config_event);
			INIT_LIST_HEAD(&dm_device.ha_region_list);
	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);		INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
	INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);		INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
			dm_device.host_specified_ha_region = false;

	dm_device.thread =		dm_device.thread =
	kthread_run(dm_thread_func, &dm_device, "hv_balloon");		kthread_run(dm_thread_func, &dm_device, "hv_balloon");
	@@ -962,6 +1316,10 @@ static int balloon_probe(struct hv_device *dev,
	goto probe_error1;		goto probe_error1;
	}		}

			#ifdef CONFIG_MEMORY_HOTPLUG
			set_online_page_callback(&hv_online_page);
			#endif

	hv_set_drvdata(dev, &dm_device);		hv_set_drvdata(dev, &dm_device);
	/*		/*
	* Initiate the hand shake with the host and negotiate		* Initiate the hand shake with the host and negotiate
	@@ -1006,12 +1364,6 @@ static int balloon_probe(struct hv_device *dev,
	cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);		cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);

	cap_msg.caps.cap_bits.balloon = 1;		cap_msg.caps.cap_bits.balloon = 1;
	/*
	* While we currently don't support hot-add,
	* we still advertise this capability since the
	* host requires that guests partcipating in the
	* dynamic memory protocol support hot add.
	*/
	cap_msg.caps.cap_bits.hot_add = 1;		cap_msg.caps.cap_bits.hot_add = 1;

	/*		/*
	@@ -1049,6 +1401,9 @@ static int balloon_probe(struct hv_device *dev,
	return 0;		return 0;

	probe_error2:		probe_error2:
			#ifdef CONFIG_MEMORY_HOTPLUG
			restore_online_page_callback(&hv_online_page);
			#endif
	kthread_stop(dm_device.thread);		kthread_stop(dm_device.thread);

	probe_error1:		probe_error1:
	@@ -1061,15 +1416,26 @@ static int balloon_probe(struct hv_device *dev,
	static int balloon_remove(struct hv_device *dev)		static int balloon_remove(struct hv_device *dev)
	{		{
	struct hv_dynmem_device *dm = hv_get_drvdata(dev);		struct hv_dynmem_device *dm = hv_get_drvdata(dev);
			struct list_head cur, tmp;
			struct hv_hotadd_state *has;

	if (dm->num_pages_ballooned != 0)		if (dm->num_pages_ballooned != 0)
	pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);		pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);

	cancel_work_sync(&dm->balloon_wrk.wrk);		cancel_work_sync(&dm->balloon_wrk.wrk);
	cancel_work_sync(&dm->ha_wrk.wrk);		cancel_work_sync(&dm->ha_wrk.wrk);

	vmbus_close(dev->channel);		vmbus_close(dev->channel);
	kthread_stop(dm->thread);		kthread_stop(dm->thread);
	kfree(send_buffer);		kfree(send_buffer);
			#ifdef CONFIG_MEMORY_HOTPLUG
			restore_online_page_callback(&hv_online_page);
			#endif
			list_for_each_safe(cur, tmp, &dm->ha_region_list) {
			has = list_entry(cur, struct hv_hotadd_state, list);
			list_del(&has->list);
			kfree(has);
			}

	return 0;		return 0;
	}		}