fs/userfaultfd.c: disable irqs for fault_pending and event locks (cbcfa130) · Commits · e / devices / android_kernel_fairphone_FP5

fs/userfaultfd.c

+26 −16

Original line number	Diff line number	Diff line
		@@ -40,6 +40,16 @@ enum userfaultfd_state {
		/*
		* Start with fault_pending_wqh and fault_wqh so they're more likely
		* to be in the same cacheline.
		*
		* Locking order:
		* fd_wqh.lock
		* fault_pending_wqh.lock
		* fault_wqh.lock
		* event_wqh.lock
		*
		* To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
		* since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
		* also taken in IRQ context.
		*/
		struct userfaultfd_ctx {
		/* waitqueue head for the pending (i.e. not read) userfaults */
		@@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
		blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
		TASK_KILLABLE;

		spin_lock(&ctx->fault_pending_wqh.lock);
		spin_lock_irq(&ctx->fault_pending_wqh.lock);
		/*
		* After the __add_wait_queue the uwq is visible to userland
		* through poll/read().
		@@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
		* __add_wait_queue.
		*/
		set_current_state(blocking_state);
		spin_unlock(&ctx->fault_pending_wqh.lock);
		spin_unlock_irq(&ctx->fault_pending_wqh.lock);

		if (!is_vm_hugetlb_page(vmf->vma))
		must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
		@@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
		* kernel stack can be released after the list_del_init.
		*/
		if (!list_empty_careful(&uwq.wq.entry)) {
		spin_lock(&ctx->fault_pending_wqh.lock);
		spin_lock_irq(&ctx->fault_pending_wqh.lock);
		/*
		* No need of list_del_init(), the uwq on the stack
		* will be freed shortly anyway.
		*/
		list_del(&uwq.wq.entry);
		spin_unlock(&ctx->fault_pending_wqh.lock);
		spin_unlock_irq(&ctx->fault_pending_wqh.lock);
		}

		/*
		@@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
		init_waitqueue_entry(&ewq->wq, current);
		release_new_ctx = NULL;

		spin_lock(&ctx->event_wqh.lock);
		spin_lock_irq(&ctx->event_wqh.lock);
		/*
		* After the __add_wait_queue the uwq is visible to userland
		* through poll/read().
		@@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
		break;
		}

		spin_unlock(&ctx->event_wqh.lock);
		spin_unlock_irq(&ctx->event_wqh.lock);

		wake_up_poll(&ctx->fd_wqh, EPOLLIN);
		schedule();

		spin_lock(&ctx->event_wqh.lock);
		spin_lock_irq(&ctx->event_wqh.lock);
		}
		__set_current_state(TASK_RUNNING);
		spin_unlock(&ctx->event_wqh.lock);
		spin_unlock_irq(&ctx->event_wqh.lock);

		if (release_new_ctx) {
		struct vm_area_struct *vma;
		@@ -918,10 +928,10 @@ static int userfaultfd_release(struct inode inode, struct file file)
		* the last page faults that may have been already waiting on
		* the fault_*wqh.
		*/
		spin_lock(&ctx->fault_pending_wqh.lock);
		spin_lock_irq(&ctx->fault_pending_wqh.lock);
		__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
		__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
		spin_unlock(&ctx->fault_pending_wqh.lock);
		spin_unlock_irq(&ctx->fault_pending_wqh.lock);

		/* Flush pending events that may still wait on event_wqh */
		wake_up_all(&ctx->event_wqh);
		@@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,

		if (!ret && msg->event == UFFD_EVENT_FORK) {
		ret = resolve_userfault_fork(ctx, fork_nctx, msg);
		spin_lock(&ctx->event_wqh.lock);
		spin_lock_irq(&ctx->event_wqh.lock);
		if (!list_empty(&fork_event)) {
		/*
		* The fork thread didn't abort, so we can
		@@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
		if (ret)
		userfaultfd_ctx_put(fork_nctx);
		}
		spin_unlock(&ctx->event_wqh.lock);
		spin_unlock_irq(&ctx->event_wqh.lock);
		}

		return ret;
		@@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file file, char __user buf,
		static void __wake_userfault(struct userfaultfd_ctx *ctx,
		struct userfaultfd_wake_range *range)
		{
		spin_lock(&ctx->fault_pending_wqh.lock);
		spin_lock_irq(&ctx->fault_pending_wqh.lock);
		/* wake all in the range and autoremove */
		if (waitqueue_active(&ctx->fault_pending_wqh))
		__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
		range);
		if (waitqueue_active(&ctx->fault_wqh))
		__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
		spin_unlock(&ctx->fault_pending_wqh.lock);
		spin_unlock_irq(&ctx->fault_pending_wqh.lock);
		}

		static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
		@@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file m, struct file f)
		wait_queue_entry_t *wq;
		unsigned long pending = 0, total = 0;

		spin_lock(&ctx->fault_pending_wqh.lock);
		spin_lock_irq(&ctx->fault_pending_wqh.lock);
		list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
		pending++;
		total++;
		@@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file m, struct file f)
		list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
		total++;
		}
		spin_unlock(&ctx->fault_pending_wqh.lock);
		spin_unlock_irq(&ctx->fault_pending_wqh.lock);

		/*
		* If more protocols will be added, there will be all shown