aio: refcounting cleanup (36f55889) · Commits · e / devices / android_kernel_fairphone_FP4

fs/aio.c

+119 −153

Original line number	Diff line number	Diff line
		@@ -79,7 +79,7 @@ static inline unsigned aio_ring_avail(struct aio_ring_info *info,

		struct kioctx {
		atomic_t users;
		int dead;
		atomic_t dead;

		/* This needs improving */
		unsigned long user_id;
		@@ -98,6 +98,7 @@ struct kioctx {
		struct aio_ring_info ring_info;

		struct rcu_head rcu_head;
		struct work_struct rcu_work;
		};

		/------ sysctl variables----/
		@@ -237,44 +238,6 @@ static int aio_setup_ring(struct kioctx *ctx)
		kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
		} while(0)

		static void ctx_rcu_free(struct rcu_head *head)
		{
		struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
		kmem_cache_free(kioctx_cachep, ctx);
		}

		/* __put_ioctx
		* Called when the last user of an aio context has gone away,
		* and the struct needs to be freed.
		*/
		static void __put_ioctx(struct kioctx *ctx)
		{
		unsigned nr_events = ctx->max_reqs;
		BUG_ON(atomic_read(&ctx->reqs_active));

		aio_free_ring(ctx);
		if (nr_events) {
		spin_lock(&aio_nr_lock);
		BUG_ON(aio_nr - nr_events > aio_nr);
		aio_nr -= nr_events;
		spin_unlock(&aio_nr_lock);
		}
		pr_debug("freeing %p\n", ctx);
		call_rcu(&ctx->rcu_head, ctx_rcu_free);
		}

		static inline int try_get_ioctx(struct kioctx *kioctx)
		{
		return atomic_inc_not_zero(&kioctx->users);
		}

		static inline void put_ioctx(struct kioctx *kioctx)
		{
		BUG_ON(atomic_read(&kioctx->users) <= 0);
		if (unlikely(atomic_dec_and_test(&kioctx->users)))
		__put_ioctx(kioctx);
		}

		static int kiocb_cancel(struct kioctx ctx, struct kiocb kiocb,
		struct io_event *res)
		{
		@@ -298,6 +261,61 @@ static int kiocb_cancel(struct kioctx ctx, struct kiocb kiocb,
		return ret;
		}

		static void free_ioctx_rcu(struct rcu_head *head)
		{
		struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
		kmem_cache_free(kioctx_cachep, ctx);
		}

		/*
		* When this function runs, the kioctx has been removed from the "hash table"
		* and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
		* now it's safe to cancel any that need to be.
		*/
		static void free_ioctx(struct kioctx *ctx)
		{
		struct io_event res;
		struct kiocb *req;

		spin_lock_irq(&ctx->ctx_lock);

		while (!list_empty(&ctx->active_reqs)) {
		req = list_first_entry(&ctx->active_reqs,
		struct kiocb, ki_list);

		list_del_init(&req->ki_list);
		kiocb_cancel(ctx, req, &res);
		}

		spin_unlock_irq(&ctx->ctx_lock);

		wait_event(ctx->wait, !atomic_read(&ctx->reqs_active));

		aio_free_ring(ctx);

		spin_lock(&aio_nr_lock);
		BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
		aio_nr -= ctx->max_reqs;
		spin_unlock(&aio_nr_lock);

		pr_debug("freeing %p\n", ctx);

		/*
		* Here the call_rcu() is between the wait_event() for reqs_active to
		* hit 0, and freeing the ioctx.
		*
		* aio_complete() decrements reqs_active, but it has to touch the ioctx
		* after to issue a wakeup so we use rcu.
		*/
		call_rcu(&ctx->rcu_head, free_ioctx_rcu);
		}

		static void put_ioctx(struct kioctx *ctx)
		{
		if (unlikely(atomic_dec_and_test(&ctx->users)))
		free_ioctx(ctx);
		}

		/* ioctx_alloc
		* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
		*/
		@@ -324,6 +342,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
		ctx->max_reqs = nr_events;

		atomic_set(&ctx->users, 2);
		atomic_set(&ctx->dead, 0);
		spin_lock_init(&ctx->ctx_lock);
		spin_lock_init(&ctx->ring_info.ring_lock);
		init_waitqueue_head(&ctx->wait);
		@@ -361,44 +380,43 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
		return ERR_PTR(err);
		}

		/* kill_ctx
		* Cancels all outstanding aio requests on an aio context. Used
		* when the processes owning a context have all exited to encourage
		* the rapid destruction of the kioctx.
		*/
		static void kill_ctx(struct kioctx *ctx)
		static void kill_ioctx_work(struct work_struct *work)
		{
		struct task_struct *tsk = current;
		DECLARE_WAITQUEUE(wait, tsk);
		struct io_event res;
		struct kiocb *req;
		struct kioctx *ctx = container_of(work, struct kioctx, rcu_work);

		spin_lock_irq(&ctx->ctx_lock);
		ctx->dead = 1;
		while (!list_empty(&ctx->active_reqs)) {
		req = list_first_entry(&ctx->active_reqs,
		struct kiocb, ki_list);

		list_del_init(&req->ki_list);
		kiocb_cancel(ctx, req, &res);
		wake_up_all(&ctx->wait);
		put_ioctx(ctx);
		}

		if (!atomic_read(&ctx->reqs_active))
		goto out;
		static void kill_ioctx_rcu(struct rcu_head *head)
		{
		struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);

		add_wait_queue(&ctx->wait, &wait);
		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
		while (atomic_read(&ctx->reqs_active)) {
		spin_unlock_irq(&ctx->ctx_lock);
		io_schedule();
		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
		spin_lock_irq(&ctx->ctx_lock);
		INIT_WORK(&ctx->rcu_work, kill_ioctx_work);
		schedule_work(&ctx->rcu_work);
		}
		__set_task_state(tsk, TASK_RUNNING);
		remove_wait_queue(&ctx->wait, &wait);

		out:
		spin_unlock_irq(&ctx->ctx_lock);
		/* kill_ioctx
		* Cancels all outstanding aio requests on an aio context. Used
		* when the processes owning a context have all exited to encourage
		* the rapid destruction of the kioctx.
		*/
		static void kill_ioctx(struct kioctx *ctx)
		{
		if (!atomic_xchg(&ctx->dead, 1)) {
		hlist_del_rcu(&ctx->list);
		/* Between hlist_del_rcu() and dropping the initial ref */
		synchronize_rcu();

		/*
		* We can't punt to workqueue here because put_ioctx() ->
		* free_ioctx() will unmap the ringbuffer, and that has to be
		* done in the original process's context. kill_ioctx_rcu/work()
		* exist for exit_aio(), as in that path free_ioctx() won't do
		* the unmap.
		*/
		kill_ioctx_work(&ctx->rcu_work);
		}
		}

		/* wait_on_sync_kiocb:
		@@ -417,27 +435,25 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
		}
		EXPORT_SYMBOL(wait_on_sync_kiocb);

		/* exit_aio: called when the last user of mm goes away. At this point,
		* there is no way for any new requests to be submited or any of the
		* io_* syscalls to be called on the context. However, there may be
		* outstanding requests which hold references to the context; as they
		* go away, they will call put_ioctx and release any pinned memory
		* associated with the request (held via struct page * references).
		/*
		* exit_aio: called when the last user of mm goes away. At this point, there is
		* no way for any new requests to be submited or any of the io_* syscalls to be
		* called on the context.
		*
		* There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
		* them.
		*/
		void exit_aio(struct mm_struct *mm)
		{
		struct kioctx *ctx;
		struct hlist_node *n;

		while (!hlist_empty(&mm->ioctx_list)) {
		ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
		hlist_del_rcu(&ctx->list);

		kill_ctx(ctx);

		hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) {
		if (1 != atomic_read(&ctx->users))
		printk(KERN_DEBUG
		"exit_aio:ioctx still alive: %d %d %d\n",
		atomic_read(&ctx->users), ctx->dead,
		atomic_read(&ctx->users),
		atomic_read(&ctx->dead),
		atomic_read(&ctx->reqs_active));
		/*
		* We don't need to bother with munmap() here -
		@@ -448,7 +464,11 @@ void exit_aio(struct mm_struct *mm)
		* place that uses ->mmap_size, so it's safe.
		*/
		ctx->ring_info.mmap_size = 0;
		put_ioctx(ctx);

		if (!atomic_xchg(&ctx->dead, 1)) {
		hlist_del_rcu(&ctx->list);
		call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
		}
		}
		}

		@@ -514,8 +534,6 @@ static void kiocb_batch_free(struct kioctx ctx, struct kiocb_batch batch)
		kmem_cache_free(kiocb_cachep, req);
		atomic_dec(&ctx->reqs_active);
		}
		if (unlikely(!atomic_read(&ctx->reqs_active) && ctx->dead))
		wake_up_all(&ctx->wait);
		spin_unlock_irq(&ctx->ctx_lock);
		}

		@@ -612,13 +630,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
		rcu_read_lock();

		hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) {
		/*
		* RCU protects us against accessing freed memory but
		* we have to be careful not to get a reference when the
		* reference count already dropped to 0 (ctx->dead test
		* is unreliable because of races).
		*/
		if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
		if (ctx->user_id == ctx_id) {
		atomic_inc(&ctx->users);
		ret = ctx;
		break;
		}
		@@ -657,12 +670,15 @@ void aio_complete(struct kiocb *iocb, long res, long res2)

		info = &ctx->ring_info;

		/* add a completion event to the ring buffer.
		* must be done holding ctx->ctx_lock to prevent
		* other code from messing with the tail
		* pointer since we might be called from irq
		* context.
		/*
		* Add a completion event to the ring buffer. Must be done holding
		* ctx->ctx_lock to prevent other code from messing with the tail
		* pointer since we might be called from irq context.
		*
		* Take rcu_read_lock() in case the kioctx is being destroyed, as we
		* need to issue a wakeup after decrementing reqs_active.
		*/
		rcu_read_lock();
		spin_lock_irqsave(&ctx->ctx_lock, flags);

		list_del(&iocb->ki_list); /* remove from active_reqs */
		@@ -728,6 +744,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
		wake_up(&ctx->wait);

		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
		rcu_read_unlock();
		}
		EXPORT_SYMBOL(aio_complete);

		@@ -871,7 +888,7 @@ static int read_events(struct kioctx *ctx,
		break;
		if (min_nr <= i)
		break;
		if (unlikely(ctx->dead)) {
		if (unlikely(atomic_read(&ctx->dead))) {
		ret = -EINVAL;
		break;
		}
		@@ -914,35 +931,6 @@ static int read_events(struct kioctx *ctx,
		return i ? i : ret;
		}

		/* Take an ioctx and remove it from the list of ioctx's. Protects
		* against races with itself via ->dead.
		*/
		static void io_destroy(struct kioctx *ioctx)
		{
		struct mm_struct *mm = current->mm;
		int was_dead;

		/* delete the entry from the list is someone else hasn't already */
		spin_lock(&mm->ioctx_lock);
		was_dead = ioctx->dead;
		ioctx->dead = 1;
		hlist_del_rcu(&ioctx->list);
		spin_unlock(&mm->ioctx_lock);

		pr_debug("(%p)\n", ioctx);
		if (likely(!was_dead))
		put_ioctx(ioctx); /* twice for the list */

		kill_ctx(ioctx);

		/*
		* Wake up any waiters. The setting of ctx->dead must be seen
		* by other CPUs at this point. Right now, we rely on the
		* locking done by the above calls to ensure this consistency.
		*/
		wake_up_all(&ioctx->wait);
		}

		/* sys_io_setup:
		* Create an aio_context capable of receiving at least nr_events.
		* ctxp must not point to an aio_context that already exists, and
		@@ -978,7 +966,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
		if (!IS_ERR(ioctx)) {
		ret = put_user(ioctx->user_id, ctxp);
		if (ret)
		io_destroy(ioctx);
		kill_ioctx(ioctx);
		put_ioctx(ioctx);
		}

		@@ -996,7 +984,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
		{
		struct kioctx *ioctx = lookup_ioctx(ctx);
		if (likely(NULL != ioctx)) {
		io_destroy(ioctx);
		kill_ioctx(ioctx);
		put_ioctx(ioctx);
		return 0;
		}
		@@ -1300,25 +1288,6 @@ static int io_submit_one(struct kioctx ctx, struct iocb __user user_iocb,

		ret = aio_setup_iocb(req, compat);

		if (ret)
		goto out_put_req;

		spin_lock_irq(&ctx->ctx_lock);
		/*
		* We could have raced with io_destroy() and are currently holding a
		* reference to ctx which should be destroyed. We cannot submit IO
		* since ctx gets freed as soon as io_submit() puts its reference. The
		* check here is reliable: io_destroy() sets ctx->dead before waiting
		* for outstanding IO and the barrier between these two is realized by
		* unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
		* increment ctx->reqs_active before checking for ctx->dead and the
		* barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
		* don't see ctx->dead set here, io_destroy() waits for our IO to
		* finish.
		*/
		if (ctx->dead)
		ret = -EINVAL;
		spin_unlock_irq(&ctx->ctx_lock);
		if (ret)
		goto out_put_req;

		@@ -1348,9 +1317,6 @@ static int io_submit_one(struct kioctx ctx, struct iocb __user user_iocb,
		spin_unlock_irq(&ctx->ctx_lock);

		atomic_dec(&ctx->reqs_active);
		if (unlikely(!atomic_read(&ctx->reqs_active) && ctx->dead))
		wake_up_all(&ctx->wait);

		aio_put_req(req); /* drop extra ref to req */
		aio_put_req(req); /* drop i/o ref to req */
		return ret;