Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit aadd06e5 authored by Jens Axboe's avatar Jens Axboe
Browse files

[PATCH] splice: fix problems with sys_tee()



Several issues noticed/fixed:

- We cannot reliably block in link_pipe() while holding both input and output
  mutexes. So do preparatory checks before locking down both mutexes and doing
  the link.

- The ipipe->nrbufs vs i check was bad, because we could have dropped the
  ipipe lock in-between. This causes us to potentially look at unknown
  buffers if we were racing with someone else reading this pipe.

Signed-off-by: default avatarJens Axboe <axboe@suse.de>
parent b3cf2576
Loading
Loading
Loading
Loading
+133 −105
Original line number Original line Diff line number Diff line
@@ -1306,6 +1306,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
	return error;
	return error;
}
}


/*
 * Make sure there's data to read. Wait for input if we can, otherwise
 * return an appropriate error.
 */
static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
{
	int ret;

	/*
	 * Check ->nrbufs without the inode lock first. This function
	 * is speculative anyways, so missing one is ok.
	 */
	if (pipe->nrbufs)
		return 0;

	ret = 0;
	mutex_lock(&pipe->inode->i_mutex);

	while (!pipe->nrbufs) {
		if (signal_pending(current)) {
			ret = -ERESTARTSYS;
			break;
		}
		if (!pipe->writers)
			break;
		if (!pipe->waiting_writers) {
			if (flags & SPLICE_F_NONBLOCK) {
				ret = -EAGAIN;
				break;
			}
		}
		pipe_wait(pipe);
	}

	mutex_unlock(&pipe->inode->i_mutex);
	return ret;
}

/*
 * Make sure there's writeable room. Wait for room if we can, otherwise
 * return an appropriate error.
 */
static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
{
	int ret;

	/*
	 * Check ->nrbufs without the inode lock first. This function
	 * is speculative anyways, so missing one is ok.
	 */
	if (pipe->nrbufs < PIPE_BUFFERS)
		return 0;

	ret = 0;
	mutex_lock(&pipe->inode->i_mutex);

	while (pipe->nrbufs >= PIPE_BUFFERS) {
		if (!pipe->readers) {
			send_sig(SIGPIPE, current, 0);
			ret = -EPIPE;
			break;
		}
		if (flags & SPLICE_F_NONBLOCK) {
			ret = -EAGAIN;
			break;
		}
		if (signal_pending(current)) {
			ret = -ERESTARTSYS;
			break;
		}
		pipe->waiting_writers++;
		pipe_wait(pipe);
		pipe->waiting_writers--;
	}

	mutex_unlock(&pipe->inode->i_mutex);
	return ret;
}

/*
/*
 * Link contents of ipipe to opipe.
 * Link contents of ipipe to opipe.
 */
 */
@@ -1314,9 +1393,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
		     size_t len, unsigned int flags)
		     size_t len, unsigned int flags)
{
{
	struct pipe_buffer *ibuf, *obuf;
	struct pipe_buffer *ibuf, *obuf;
	int ret, do_wakeup, i, ipipe_first;
	int ret = 0, i = 0, nbuf;

	ret = do_wakeup = ipipe_first = 0;


	/*
	/*
	 * Potential ABBA deadlock, work around it by ordering lock
	 * Potential ABBA deadlock, work around it by ordering lock
@@ -1324,29 +1401,30 @@ static int link_pipe(struct pipe_inode_info *ipipe,
	 * could deadlock (one doing tee from A -> B, the other from B -> A).
	 * could deadlock (one doing tee from A -> B, the other from B -> A).
	 */
	 */
	if (ipipe->inode < opipe->inode) {
	if (ipipe->inode < opipe->inode) {
		ipipe_first = 1;
		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
		mutex_lock(&ipipe->inode->i_mutex);
		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
		mutex_lock(&opipe->inode->i_mutex);
	} else {
	} else {
		mutex_lock(&opipe->inode->i_mutex);
		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
		mutex_lock(&ipipe->inode->i_mutex);
		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
	}
	}


	for (i = 0;; i++) {
	do {
		if (!opipe->readers) {
		if (!opipe->readers) {
			send_sig(SIGPIPE, current, 0);
			send_sig(SIGPIPE, current, 0);
			if (!ret)
			if (!ret)
				ret = -EPIPE;
				ret = -EPIPE;
			break;
			break;
		}
		}
		if (ipipe->nrbufs - i) {
			ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));


		/*
		/*
			 * If we have room, fill this buffer
		 * If we have iterated all input buffers or ran out of
		 * output room, break.
		 */
		 */
			if (opipe->nrbufs < PIPE_BUFFERS) {
		if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
				int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
			break;

		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
		nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);


		/*
		/*
		 * Get a reference to this pipe buffer,
		 * Get a reference to this pipe buffer,
@@ -1367,83 +1445,18 @@ static int link_pipe(struct pipe_inode_info *ipipe,
			obuf->len = len;
			obuf->len = len;


		opipe->nrbufs++;
		opipe->nrbufs++;
				do_wakeup = 1;
		ret += obuf->len;
		ret += obuf->len;
		len -= obuf->len;
		len -= obuf->len;

		i++;
				if (!len)
	} while (len);
					break;
				if (opipe->nrbufs < PIPE_BUFFERS)
					continue;
			}

			/*
			 * We have input available, but no output room.
			 * If we already copied data, return that. If we
			 * need to drop the opipe lock, it must be ordered
			 * last to avoid deadlocks.
			 */
			if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
				if (!ret)
					ret = -EAGAIN;
				break;
			}
			if (signal_pending(current)) {
				if (!ret)
					ret = -ERESTARTSYS;
				break;
			}
			if (do_wakeup) {
				smp_mb();
				if (waitqueue_active(&opipe->wait))
					wake_up_interruptible(&opipe->wait);
				kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
				do_wakeup = 0;
			}

			opipe->waiting_writers++;
			pipe_wait(opipe);
			opipe->waiting_writers--;
			continue;
		}

		/*
		 * No input buffers, do the usual checks for available
		 * writers and blocking and wait if necessary
		 */
		if (!ipipe->writers)
			break;
		if (!ipipe->waiting_writers) {
			if (ret)
				break;
		}
		/*
		 * pipe_wait() drops the ipipe mutex. To avoid deadlocks
		 * with another process, we can only safely do that if
		 * the ipipe lock is ordered last.
		 */
		if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
			if (!ret)
				ret = -EAGAIN;
			break;
		}
		if (signal_pending(current)) {
			if (!ret)
				ret = -ERESTARTSYS;
			break;
		}

		if (waitqueue_active(&ipipe->wait))
			wake_up_interruptible_sync(&ipipe->wait);
		kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);

		pipe_wait(ipipe);
	}


	mutex_unlock(&ipipe->inode->i_mutex);
	mutex_unlock(&ipipe->inode->i_mutex);
	mutex_unlock(&opipe->inode->i_mutex);
	mutex_unlock(&opipe->inode->i_mutex);


	if (do_wakeup) {
	/*
	 * If we put data in the output pipe, wakeup any potential readers.
	 */
	if (ret > 0) {
		smp_mb();
		smp_mb();
		if (waitqueue_active(&opipe->wait))
		if (waitqueue_active(&opipe->wait))
			wake_up_interruptible(&opipe->wait);
			wake_up_interruptible(&opipe->wait);
@@ -1464,14 +1477,29 @@ static long do_tee(struct file *in, struct file *out, size_t len,
{
{
	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
	int ret = -EINVAL;


	/*
	/*
	 * Link ipipe to the two output pipes, consuming as we go along.
	 * Duplicate the contents of ipipe to opipe without actually
	 * copying the data.
	 */
	if (ipipe && opipe && ipipe != opipe) {
		/*
		 * Keep going, unless we encounter an error. The ipipe/opipe
		 * ordering doesn't really matter.
		 */
		 */
	if (ipipe && opipe)
		ret = link_ipipe_prep(ipipe, flags);
		return link_pipe(ipipe, opipe, len, flags);
		if (!ret) {
			ret = link_opipe_prep(opipe, flags);
			if (!ret) {
				ret = link_pipe(ipipe, opipe, len, flags);
				if (!ret && (flags & SPLICE_F_NONBLOCK))
					ret = -EAGAIN;
			}
		}
	}


	return -EINVAL;
	return ret;
}
}


asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)