Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ae1139ec authored by Dan Williams's avatar Dan Williams Committed by Dave Jiang
Browse files

mm, memory_failure: Collect mapping size in collect_procs()



In preparation for supporting memory_failure() for dax mappings, teach
collect_procs() to also determine the mapping size. Unlike typical
mappings the dax mapping size is determined by walking page-table
entries rather than using the compound-page accounting for THP pages.

Acked-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarDave Jiang <dave.jiang@intel.com>
parent 23e7b5c2
Loading
Loading
Loading
Loading
+40 −41
Original line number Diff line number Diff line
@@ -173,23 +173,52 @@ int hwpoison_filter(struct page *p)

EXPORT_SYMBOL_GPL(hwpoison_filter);

/*
 * Kill all processes that have a poisoned page mapped and then isolate
 * the page.
 *
 * General strategy:
 * Find all processes having the page mapped and kill them.
 * But we keep a page reference around so that the page is not
 * actually freed yet.
 * Then stash the page away
 *
 * There's no convenient way to get back to mapped processes
 * from the VMAs. So do a brute-force search over all
 * running processes.
 *
 * Remember that machine checks are not common (or rather
 * if they are common you have other problems), so this shouldn't
 * be a performance issue.
 *
 * Also there are some races possible while we get from the
 * error detection to actually handle it.
 */

struct to_kill {
	struct list_head nd;
	struct task_struct *tsk;
	unsigned long addr;
	short size_shift;
	char addr_valid;
};

/*
 * Send all the processes who have the page mapped a signal.
 * ``action optional'' if they are not immediately affected by the error
 * ``action required'' if error happened in current execution context
 */
static int kill_proc(struct task_struct *t, unsigned long addr,
			unsigned long pfn, struct page *page, int flags)
static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
{
	short addr_lsb;
	struct task_struct *t = tk->tsk;
	short addr_lsb = tk->size_shift;
	int ret;

	pr_err("Memory failure: %#lx: Killing %s:%d due to hardware memory corruption\n",
		pfn, t->comm, t->pid);
	addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;

	if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
		ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)addr,
		ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr,
				       addr_lsb, current);
	} else {
		/*
@@ -198,7 +227,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr,
		 * This could cause a loop when the user sets SIGBUS
		 * to SIG_IGN, but hopefully no one will do that?
		 */
		ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)addr,
		ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
				      addr_lsb, t);  /* synchronous? */
	}
	if (ret < 0)
@@ -234,35 +263,6 @@ void shake_page(struct page *p, int access)
}
EXPORT_SYMBOL_GPL(shake_page);

/*
 * Kill all processes that have a poisoned page mapped and then isolate
 * the page.
 *
 * General strategy:
 * Find all processes having the page mapped and kill them.
 * But we keep a page reference around so that the page is not
 * actually freed yet.
 * Then stash the page away
 *
 * There's no convenient way to get back to mapped processes
 * from the VMAs. So do a brute-force search over all
 * running processes.
 *
 * Remember that machine checks are not common (or rather
 * if they are common you have other problems), so this shouldn't
 * be a performance issue.
 *
 * Also there are some races possible while we get from the
 * error detection to actually handle it.
 */

struct to_kill {
	struct list_head nd;
	struct task_struct *tsk;
	unsigned long addr;
	char addr_valid;
};

/*
 * Failure handling: if we can't find or can't kill a process there's
 * not much we can do.	We just print a message and ignore otherwise.
@@ -292,6 +292,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
	}
	tk->addr = page_address_in_vma(p, vma);
	tk->addr_valid = 1;
	tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;

	/*
	 * In theory we don't have to kill when the page was
@@ -317,9 +318,8 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
 * Also when FAIL is set do a force kill because something went
 * wrong earlier.
 */
static void kill_procs(struct list_head *to_kill, int forcekill,
			  bool fail, struct page *page, unsigned long pfn,
			  int flags)
static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
		unsigned long pfn, int flags)
{
	struct to_kill *tk, *next;

@@ -342,8 +342,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill,
			 * check for that, but we need to tell the
			 * process anyways.
			 */
			else if (kill_proc(tk->tsk, tk->addr,
					      pfn, page, flags) < 0)
			else if (kill_proc(tk, pfn, flags) < 0)
				pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
				       pfn, tk->tsk->comm, tk->tsk->pid);
		}
@@ -1012,7 +1011,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
	 * any accesses to the poisoned memory.
	 */
	forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL);
	kill_procs(&tokill, forcekill, !unmap_success, p, pfn, flags);
	kill_procs(&tokill, forcekill, !unmap_success, pfn, flags);

	return unmap_success;
}