Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6b6ab4c3 authored by qctecmdr's avatar qctecmdr Committed by Gerrit - the friendly Code Review server
Browse files

Merge "af_unix: fix garbage collect vs MSG_PEEK"

parents 46c75750 843227ef
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@

void unix_inflight(struct user_struct *user, struct file *fp);
void unix_notinflight(struct user_struct *user, struct file *fp);
void unix_destruct_scm(struct sk_buff *skb);
void unix_gc(void);
void wait_for_unix_gc(void);
struct sock *unix_get_socket(struct file *filp);
+1 −1
Original line number Diff line number Diff line
@@ -18,7 +18,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET)		+= ipv4/
obj-$(CONFIG_TLS)		+= tls/
obj-$(CONFIG_XFRM)		+= xfrm/
obj-$(CONFIG_UNIX)		+= unix/
obj-$(CONFIG_UNIX_SCM)		+= unix/
obj-$(CONFIG_NET)		+= ipv6/
obj-$(CONFIG_BPFILTER)		+= bpfilter/
obj-$(CONFIG_PACKET)		+= packet/
+5 −0
Original line number Diff line number Diff line
@@ -19,6 +19,11 @@ config UNIX

	  Say Y unless you know what you are doing.

config UNIX_SCM
	bool
	depends on UNIX
	default y

config UNIX_DIAG
	tristate "UNIX: socket monitoring interface"
	depends on UNIX
+2 −0
Original line number Diff line number Diff line
@@ -10,3 +10,5 @@ unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o

obj-$(CONFIG_UNIX_DIAG)	+= unix_diag.o
unix_diag-y		:= diag.o

obj-$(CONFIG_UNIX_SCM)	+= scm.o
+45 −57
Original line number Diff line number Diff line
@@ -119,6 +119,8 @@
#include <linux/freezer.h>
#include <linux/file.h>

#include "scm.h"

struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
DEFINE_SPINLOCK(unix_table_lock);
@@ -1514,65 +1516,51 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
	return err;
}

static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
	int i;

	scm->fp = UNIXCB(skb).fp;
	UNIXCB(skb).fp = NULL;

	for (i = scm->fp->count-1; i >= 0; i--)
		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
}

static void unix_destruct_scm(struct sk_buff *skb)
{
	struct scm_cookie scm;
	memset(&scm, 0, sizeof(scm));
	scm.pid  = UNIXCB(skb).pid;
	if (UNIXCB(skb).fp)
		unix_detach_fds(&scm, skb);

	/* Alas, it calls VFS */
	/* So fscking what? fput() had been SMP-safe since the last Summer */
	scm_destroy(&scm);
	sock_wfree(skb);
}

/*
 * The "user->unix_inflight" variable is protected by the garbage
 * collection lock, and we just read it locklessly here. If you go
 * over the limit, there might be a tiny race in actually noticing
 * it across threads. Tough.
 */
static inline bool too_many_unix_fds(struct task_struct *p)
{
	struct user_struct *user = current_user();

	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
	return false;
}

static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
	int i;

	if (too_many_unix_fds(current))
		return -ETOOMANYREFS;
	scm->fp = scm_fp_dup(UNIXCB(skb).fp);

	/*
	 * Need to duplicate file references for the sake of garbage
	 * collection.  Otherwise a socket in the fps might become a
	 * candidate for GC while the skb is not yet queued.
	 * Garbage collection of unix sockets starts by selecting a set of
	 * candidate sockets which have reference only from being in flight
	 * (total_refs == inflight_refs).  This condition is checked once during
	 * the candidate collection phase, and candidates are marked as such, so
	 * that non-candidates can later be ignored.  While inflight_refs is
	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
	 * is an instantaneous decision.
	 *
	 * Once a candidate, however, the socket must not be reinstalled into a
	 * file descriptor while the garbage collection is in progress.
	 *
	 * If the above conditions are met, then the directed graph of
	 * candidates (*) does not change while unix_gc_lock is held.
	 *
	 * Any operations that changes the file count through file descriptors
	 * (dup, close, sendmsg) does not change the graph since candidates are
	 * not installed in fds.
	 *
	 * Dequeing a candidate via recvmsg would install it into an fd, but
	 * that takes unix_gc_lock to decrement the inflight count, so it's
	 * serialized with garbage collection.
	 *
	 * MSG_PEEK is special in that it does not change the inflight count,
	 * yet does install the socket into an fd.  The following lock/unlock
	 * pair is to ensure serialization with garbage collection.  It must be
	 * done between incrementing the file count and installing the file into
	 * an fd.
	 *
	 * If garbage collection starts after the barrier provided by the
	 * lock/unlock, then it will see the elevated refcount and not mark this
	 * as a candidate.  If a garbage collection is already in progress
	 * before the file count was incremented, then the lock/unlock pair will
	 * ensure that garbage collection is finished before progressing to
	 * installing the fd.
	 *
	 * (*) A -> B where B is on the queue of A or B is on the queue of C
	 * which is on the queue of listening socket A.
	 */
	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
	if (!UNIXCB(skb).fp)
		return -ENOMEM;

	for (i = scm->fp->count - 1; i >= 0; i--)
		unix_inflight(scm->fp->user, scm->fp->fp[i]);
	return 0;
	spin_lock(&unix_gc_lock);
	spin_unlock(&unix_gc_lock);
}

static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -2200,7 +2188,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
		sk_peek_offset_fwd(sk, size);

		if (UNIXCB(skb).fp)
			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
			unix_peek_fds(&scm, skb);
	}
	err = (flags & MSG_TRUNC) ? skb->len - skip : size;

@@ -2441,7 +2429,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
			/* It is questionable, see note in unix_dgram_recvmsg.
			 */
			if (UNIXCB(skb).fp)
				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
				unix_peek_fds(&scm, skb);

			sk_peek_offset_fwd(sk, chunk);

Loading