Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 095444fa authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Greg Kroah-Hartman
Browse files

futex: Replace PF_EXITPIDONE with a state



commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream

The futex exit handling relies on PF_ flags. That's suboptimal as it
requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
the middle of do_exit() to enforce the observability of PF_EXITING in the
futex code.

Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
over to the new state. The PF_EXITING dependency will be cleaned up in a
later step.

This prepares for handling various futex exit issues later.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarIngo Molnar <mingo@kernel.org>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de


Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 3fe0ed7b
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
@@ -53,6 +53,10 @@ union futex_key {
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }

#ifdef CONFIG_FUTEX
enum {
	FUTEX_STATE_OK,
	FUTEX_STATE_DEAD,
};

static inline void futex_init_task(struct task_struct *tsk)
{
@@ -62,6 +66,34 @@ static inline void futex_init_task(struct task_struct *tsk)
#endif
	INIT_LIST_HEAD(&tsk->pi_state_list);
	tsk->pi_state_cache = NULL;
	tsk->futex_state = FUTEX_STATE_OK;
}

/**
 * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
 * @tsk:	task to set the state on
 *
 * Set the futex exit state of the task lockless. The futex waiter code
 * observes that state when a task is exiting and loops until the task has
 * actually finished the futex cleanup. The worst case for this is that the
 * waiter runs through the wait loop until the state becomes visible.
 *
 * This has two callers:
 *
 * - futex_mm_release() after the futex exit cleanup has been done
 *
 * - do_exit() from the recursive fault handling path.
 *
 * In case of a recursive fault this is best effort. Either the futex exit
 * code has run already or not. If the OWNER_DIED bit has been set on the
 * futex then the waiter can take it over. If not, the problem is pushed
 * back to user space. If the futex exit code did not run yet, then an
 * already queued waiter might block forever, but there is nothing which
 * can be done about that.
 */
static inline void futex_exit_done(struct task_struct *tsk)
{
	tsk->futex_state = FUTEX_STATE_DEAD;
}

void futex_mm_release(struct task_struct *tsk);
@@ -71,6 +103,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
#else
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_mm_release(struct task_struct *tsk) { }
static inline void futex_exit_done(struct task_struct *tsk) { }
static inline long do_futex(u32 __user *uaddr, int op, u32 val,
			    ktime_t *timeout, u32 __user *uaddr2,
			    u32 val2, u32 val3)
+1 −1
Original line number Diff line number Diff line
@@ -996,6 +996,7 @@ struct task_struct {
#endif
	struct list_head		pi_state_list;
	struct futex_pi_state		*pi_state_cache;
	unsigned int			futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
@@ -1377,7 +1378,6 @@ extern struct pid *cad_pid;
 */
#define PF_IDLE			0x00000002	/* I am an IDLE thread */
#define PF_EXITING		0x00000004	/* Getting shut down */
#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
#define PF_VCPU			0x00000010	/* I'm a virtual CPU */
#define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
#define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
+2 −16
Original line number Diff line number Diff line
@@ -818,16 +818,7 @@ void __noreturn do_exit(long code)
	 */
	if (unlikely(tsk->flags & PF_EXITING)) {
		pr_alert("Fixing recursive fault but reboot is needed!\n");
		/*
		 * We can do this unlocked here. The futex code uses
		 * this flag just to verify whether the pi state
		 * cleanup has been done or not. In the worst case it
		 * loops once more. We pretend that the cleanup was
		 * done as there is no way to return. Either the
		 * OWNER_DIED bit is set by now or we push the blocked
		 * task into the wait for ever nirwana as well.
		 */
		tsk->flags |= PF_EXITPIDONE;
		futex_exit_done(tsk);
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule();
	}
@@ -918,12 +909,7 @@ void __noreturn do_exit(long code)
	 * Make sure we are holding no locks:
	 */
	debug_check_no_locks_held();
	/*
	 * We can do this unlocked here. The futex code uses this flag
	 * just to verify whether the pi state cleanup has been done
	 * or not. In the worst case it loops once more.
	 */
	tsk->flags |= PF_EXITPIDONE;
	futex_exit_done(tsk);

	if (tsk->io_context)
		exit_io_context(tsk);
+13 −12
Original line number Diff line number Diff line
@@ -1182,9 +1182,10 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
	u32 uval2;

	/*
	 * If PF_EXITPIDONE is not yet set, then try again.
	 * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
	 * for it to finish.
	 */
	if (tsk && !(tsk->flags & PF_EXITPIDONE))
	if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
		return -EAGAIN;

	/*
@@ -1203,8 +1204,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
	 *    *uaddr = 0xC0000000;	     tsk = get_task(PID);
	 *   }				     if (!tsk->flags & PF_EXITING) {
	 *  ...				       attach();
	 *  tsk->flags |= PF_EXITPIDONE;     } else {
	 *				       if (!(tsk->flags & PF_EXITPIDONE))
	 *  tsk->futex_state =               } else {
	 *	FUTEX_STATE_DEAD;              if (tsk->futex_state !=
	 *					  FUTEX_STATE_DEAD)
	 *				         return -EAGAIN;
	 *				       return -ESRCH; <--- FAIL
	 *				     }
@@ -1260,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
	}

	/*
	 * We need to look at the task state flags to figure out,
	 * whether the task is exiting. To protect against the do_exit
	 * change of the task flags, we do this protected by
	 * p->pi_lock:
	 * We need to look at the task state to figure out, whether the
	 * task is exiting. To protect against the change of the task state
	 * in futex_exit_release(), we do this protected by p->pi_lock:
	 */
	raw_spin_lock_irq(&p->pi_lock);
	if (unlikely(p->flags & PF_EXITING)) {
	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
		/*
		 * The task is on the way out. When PF_EXITPIDONE is
		 * set, we know that the task has finished the
		 * cleanup:
		 * The task is on the way out. When the futex state is
		 * FUTEX_STATE_DEAD, we know that the task has finished
		 * the cleanup:
		 */
		int ret = handle_exit_race(uaddr, uval, p);