Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eea1d585 authored by Eric Wong's avatar Eric Wong Committed by Linus Torvalds
Browse files

epoll: use RCU to protect wakeup_source in epitem



This prevents wakeup_source destruction when a user hits the item with
EPOLL_CTL_MOD while ep_poll_callback is running.

Tested with CONFIG_SPARSE_RCU_POINTER=y and "make fs/eventpoll.o C=2"

Signed-off-by: default avatarEric Wong <normalperson@yhbt.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arve Hjønnevåg <arve@android.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: NeilBrown <neilb@suse.de>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 39732ca5
Loading
Loading
Loading
Loading
+71 −21
Original line number Original line Diff line number Diff line
@@ -160,7 +160,7 @@ struct epitem {
	struct list_head fllink;
	struct list_head fllink;


	/* wakeup_source used when EPOLLWAKEUP is set */
	/* wakeup_source used when EPOLLWAKEUP is set */
	struct wakeup_source *ws;
	struct wakeup_source __rcu *ws;


	/* The structure that describe the interested events and the source fd */
	/* The structure that describe the interested events and the source fd */
	struct epoll_event event;
	struct epoll_event event;
@@ -538,6 +538,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
	}
	}
}
}


/* call only when ep->mtx is held */
static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi)
{
	return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx));
}

/* call only when ep->mtx is held */
static inline void ep_pm_stay_awake(struct epitem *epi)
{
	struct wakeup_source *ws = ep_wakeup_source(epi);

	if (ws)
		__pm_stay_awake(ws);
}

static inline bool ep_has_wakeup_source(struct epitem *epi)
{
	return rcu_access_pointer(epi->ws) ? true : false;
}

/* call when ep->mtx cannot be held (ep_poll_callback) */
static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
{
	struct wakeup_source *ws;

	rcu_read_lock();
	ws = rcu_dereference(epi->ws);
	if (ws)
		__pm_stay_awake(ws);
	rcu_read_unlock();
}

/**
/**
 * ep_scan_ready_list - Scans the ready list in a way that makes possible for
 * ep_scan_ready_list - Scans the ready list in a way that makes possible for
 *                      the scan code, to call f_op->poll(). Also allows for
 *                      the scan code, to call f_op->poll(). Also allows for
@@ -601,7 +633,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
		 */
		 */
		if (!ep_is_linked(&epi->rdllink)) {
		if (!ep_is_linked(&epi->rdllink)) {
			list_add_tail(&epi->rdllink, &ep->rdllist);
			list_add_tail(&epi->rdllink, &ep->rdllist);
			__pm_stay_awake(epi->ws);
			ep_pm_stay_awake(epi);
		}
		}
	}
	}
	/*
	/*
@@ -670,7 +702,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
		list_del_init(&epi->rdllink);
		list_del_init(&epi->rdllink);
	spin_unlock_irqrestore(&ep->lock, flags);
	spin_unlock_irqrestore(&ep->lock, flags);


	wakeup_source_unregister(epi->ws);
	wakeup_source_unregister(ep_wakeup_source(epi));


	/* At this point it is safe to free the eventpoll item */
	/* At this point it is safe to free the eventpoll item */
	kmem_cache_free(epi_cache, epi);
	kmem_cache_free(epi_cache, epi);
@@ -754,7 +786,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
			 * callback, but it's not actually ready, as far as
			 * callback, but it's not actually ready, as far as
			 * caller requested events goes. We can remove it here.
			 * caller requested events goes. We can remove it here.
			 */
			 */
			__pm_relax(epi->ws);
			__pm_relax(ep_wakeup_source(epi));
			list_del_init(&epi->rdllink);
			list_del_init(&epi->rdllink);
		}
		}
	}
	}
@@ -986,7 +1018,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
	/* If this file is already in the ready list we exit soon */
	/* If this file is already in the ready list we exit soon */
	if (!ep_is_linked(&epi->rdllink)) {
	if (!ep_is_linked(&epi->rdllink)) {
		list_add_tail(&epi->rdllink, &ep->rdllist);
		list_add_tail(&epi->rdllink, &ep->rdllist);
		__pm_stay_awake(epi->ws);
		ep_pm_stay_awake_rcu(epi);
	}
	}


	/*
	/*
@@ -1148,6 +1180,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
static int ep_create_wakeup_source(struct epitem *epi)
{
{
	const char *name;
	const char *name;
	struct wakeup_source *ws;


	if (!epi->ep->ws) {
	if (!epi->ep->ws) {
		epi->ep->ws = wakeup_source_register("eventpoll");
		epi->ep->ws = wakeup_source_register("eventpoll");
@@ -1156,17 +1189,29 @@ static int ep_create_wakeup_source(struct epitem *epi)
	}
	}


	name = epi->ffd.file->f_path.dentry->d_name.name;
	name = epi->ffd.file->f_path.dentry->d_name.name;
	epi->ws = wakeup_source_register(name);
	ws = wakeup_source_register(name);
	if (!epi->ws)

	if (!ws)
		return -ENOMEM;
		return -ENOMEM;
	rcu_assign_pointer(epi->ws, ws);


	return 0;
	return 0;
}
}


static void ep_destroy_wakeup_source(struct epitem *epi)
/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */
static noinline void ep_destroy_wakeup_source(struct epitem *epi)
{
{
	wakeup_source_unregister(epi->ws);
	struct wakeup_source *ws = ep_wakeup_source(epi);
	epi->ws = NULL;

	rcu_assign_pointer(epi->ws, NULL);

	/*
	 * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is
	 * used internally by wakeup_source_remove, too (called by
	 * wakeup_source_unregister), so we cannot use call_rcu
	 */
	synchronize_rcu();
	wakeup_source_unregister(ws);
}
}


/*
/*
@@ -1201,7 +1246,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
		if (error)
		if (error)
			goto error_create_wakeup_source;
			goto error_create_wakeup_source;
	} else {
	} else {
		epi->ws = NULL;
		RCU_INIT_POINTER(epi->ws, NULL);
	}
	}


	/* Initialize the poll table using the queue callback */
	/* Initialize the poll table using the queue callback */
@@ -1249,7 +1294,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
	/* If the file is already "ready" we drop it inside the ready list */
	/* If the file is already "ready" we drop it inside the ready list */
	if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
	if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
		list_add_tail(&epi->rdllink, &ep->rdllist);
		list_add_tail(&epi->rdllink, &ep->rdllist);
		__pm_stay_awake(epi->ws);
		ep_pm_stay_awake(epi);


		/* Notify waiting tasks that events are available */
		/* Notify waiting tasks that events are available */
		if (waitqueue_active(&ep->wq))
		if (waitqueue_active(&ep->wq))
@@ -1290,7 +1335,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
		list_del_init(&epi->rdllink);
		list_del_init(&epi->rdllink);
	spin_unlock_irqrestore(&ep->lock, flags);
	spin_unlock_irqrestore(&ep->lock, flags);


	wakeup_source_unregister(epi->ws);
	wakeup_source_unregister(ep_wakeup_source(epi));


error_create_wakeup_source:
error_create_wakeup_source:
	kmem_cache_free(epi_cache, epi);
	kmem_cache_free(epi_cache, epi);
@@ -1319,9 +1364,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
	pt._key = event->events;
	pt._key = event->events;
	epi->event.data = event->data; /* protected by mtx */
	epi->event.data = event->data; /* protected by mtx */
	if (epi->event.events & EPOLLWAKEUP) {
	if (epi->event.events & EPOLLWAKEUP) {
		if (!epi->ws)
		if (!ep_has_wakeup_source(epi))
			ep_create_wakeup_source(epi);
			ep_create_wakeup_source(epi);
	} else if (epi->ws) {
	} else if (ep_has_wakeup_source(epi)) {
		ep_destroy_wakeup_source(epi);
		ep_destroy_wakeup_source(epi);
	}
	}


@@ -1359,7 +1404,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
		spin_lock_irq(&ep->lock);
		spin_lock_irq(&ep->lock);
		if (!ep_is_linked(&epi->rdllink)) {
		if (!ep_is_linked(&epi->rdllink)) {
			list_add_tail(&epi->rdllink, &ep->rdllist);
			list_add_tail(&epi->rdllink, &ep->rdllist);
			__pm_stay_awake(epi->ws);
			ep_pm_stay_awake(epi);


			/* Notify waiting tasks that events are available */
			/* Notify waiting tasks that events are available */
			if (waitqueue_active(&ep->wq))
			if (waitqueue_active(&ep->wq))
@@ -1385,6 +1430,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
	unsigned int revents;
	unsigned int revents;
	struct epitem *epi;
	struct epitem *epi;
	struct epoll_event __user *uevent;
	struct epoll_event __user *uevent;
	struct wakeup_source *ws;
	poll_table pt;
	poll_table pt;


	init_poll_funcptr(&pt, NULL);
	init_poll_funcptr(&pt, NULL);
@@ -1407,9 +1453,13 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
		 * instead, but then epi->ws would temporarily be out of sync
		 * instead, but then epi->ws would temporarily be out of sync
		 * with ep_is_linked().
		 * with ep_is_linked().
		 */
		 */
		if (epi->ws && epi->ws->active)
		ws = ep_wakeup_source(epi);
		if (ws) {
			if (ws->active)
				__pm_stay_awake(ep->ws);
				__pm_stay_awake(ep->ws);
		__pm_relax(epi->ws);
			__pm_relax(ws);
		}

		list_del_init(&epi->rdllink);
		list_del_init(&epi->rdllink);


		pt._key = epi->event.events;
		pt._key = epi->event.events;
@@ -1426,7 +1476,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
			if (__put_user(revents, &uevent->events) ||
			if (__put_user(revents, &uevent->events) ||
			    __put_user(epi->event.data, &uevent->data)) {
			    __put_user(epi->event.data, &uevent->data)) {
				list_add(&epi->rdllink, head);
				list_add(&epi->rdllink, head);
				__pm_stay_awake(epi->ws);
				ep_pm_stay_awake(epi);
				return eventcnt ? eventcnt : -EFAULT;
				return eventcnt ? eventcnt : -EFAULT;
			}
			}
			eventcnt++;
			eventcnt++;
@@ -1446,7 +1496,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
				 * poll callback will queue them in ep->ovflist.
				 * poll callback will queue them in ep->ovflist.
				 */
				 */
				list_add_tail(&epi->rdllink, &ep->rdllist);
				list_add_tail(&epi->rdllink, &ep->rdllist);
				__pm_stay_awake(epi->ws);
				ep_pm_stay_awake(epi);
			}
			}
		}
		}
	}
	}