Loading include/net/af_unix.h +1 −0 Original line number Diff line number Diff line Loading @@ -10,6 +10,7 @@ void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); void unix_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); Loading net/Makefile +1 −1 Original line number Diff line number Diff line Loading @@ -18,7 +18,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_UNIX_SCM) += unix/ obj-$(CONFIG_NET) += ipv6/ obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ Loading net/unix/Kconfig +5 −0 Original line number Diff line number Diff line Loading @@ -19,6 +19,11 @@ config UNIX Say Y unless you know what you are doing. config UNIX_SCM bool depends on UNIX default y config UNIX_DIAG tristate "UNIX: socket monitoring interface" depends on UNIX Loading net/unix/Makefile +2 −0 Original line number Diff line number Diff line Loading @@ -10,3 +10,5 @@ unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o obj-$(CONFIG_UNIX_DIAG) += unix_diag.o unix_diag-y := diag.o obj-$(CONFIG_UNIX_SCM) += scm.o net/unix/af_unix.c +45 −57 Original line number Diff line number Diff line Loading @@ -119,6 +119,8 @@ #include <linux/freezer.h> #include <linux/file.h> #include "scm.h" struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); Loading Loading @@ -1514,65 +1516,51 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) return err; } static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; scm->fp = UNIXCB(skb).fp; UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) unix_notinflight(scm->fp->user, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) { struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); scm.pid = UNIXCB(skb).pid; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); /* Alas, it calls VFS */ /* So fscking what? fput() had been SMP-safe since the last Summer */ scm_destroy(&scm); sock_wfree(skb); } /* * The "user->unix_inflight" variable is protected by the garbage * collection lock, and we just read it locklessly here. If you go * over the limit, there might be a tiny race in actually noticing * it across threads. Tough. */ static inline bool too_many_unix_fds(struct task_struct *p) { struct user_struct *user = current_user(); if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); return false; } static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; if (too_many_unix_fds(current)) return -ETOOMANYREFS; scm->fp = scm_fp_dup(UNIXCB(skb).fp); /* * Need to duplicate file references for the sake of garbage * collection. Otherwise a socket in the fps might become a * candidate for GC while the skb is not yet queued. * Garbage collection of unix sockets starts by selecting a set of * candidate sockets which have reference only from being in flight * (total_refs == inflight_refs). This condition is checked once during * the candidate collection phase, and candidates are marked as such, so * that non-candidates can later be ignored. While inflight_refs is * protected by unix_gc_lock, total_refs (file count) is not, hence this * is an instantaneous decision. * * Once a candidate, however, the socket must not be reinstalled into a * file descriptor while the garbage collection is in progress. * * If the above conditions are met, then the directed graph of * candidates (*) does not change while unix_gc_lock is held. * * Any operations that changes the file count through file descriptors * (dup, close, sendmsg) does not change the graph since candidates are * not installed in fds. * * Dequeing a candidate via recvmsg would install it into an fd, but * that takes unix_gc_lock to decrement the inflight count, so it's * serialized with garbage collection. * * MSG_PEEK is special in that it does not change the inflight count, * yet does install the socket into an fd. The following lock/unlock * pair is to ensure serialization with garbage collection. It must be * done between incrementing the file count and installing the file into * an fd. * * If garbage collection starts after the barrier provided by the * lock/unlock, then it will see the elevated refcount and not mark this * as a candidate. If a garbage collection is already in progress * before the file count was incremented, then the lock/unlock pair will * ensure that garbage collection is finished before progressing to * installing the fd. * * (*) A -> B where B is on the queue of A or B is on the queue of C * which is on the queue of listening socket A. */ UNIXCB(skb).fp = scm_fp_dup(scm->fp); if (!UNIXCB(skb).fp) return -ENOMEM; for (i = scm->fp->count - 1; i >= 0; i--) unix_inflight(scm->fp->user, scm->fp->fp[i]); return 0; spin_lock(&unix_gc_lock); spin_unlock(&unix_gc_lock); } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) Loading Loading @@ -2200,7 +2188,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; Loading Loading @@ -2441,7 +2429,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); Loading Loading
include/net/af_unix.h +1 −0 Original line number Diff line number Diff line Loading @@ -10,6 +10,7 @@ void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); void unix_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); Loading
net/Makefile +1 −1 Original line number Diff line number Diff line Loading @@ -18,7 +18,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_UNIX_SCM) += unix/ obj-$(CONFIG_NET) += ipv6/ obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ Loading
net/unix/Kconfig +5 −0 Original line number Diff line number Diff line Loading @@ -19,6 +19,11 @@ config UNIX Say Y unless you know what you are doing. config UNIX_SCM bool depends on UNIX default y config UNIX_DIAG tristate "UNIX: socket monitoring interface" depends on UNIX Loading
net/unix/Makefile +2 −0 Original line number Diff line number Diff line Loading @@ -10,3 +10,5 @@ unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o obj-$(CONFIG_UNIX_DIAG) += unix_diag.o unix_diag-y := diag.o obj-$(CONFIG_UNIX_SCM) += scm.o
net/unix/af_unix.c +45 −57 Original line number Diff line number Diff line Loading @@ -119,6 +119,8 @@ #include <linux/freezer.h> #include <linux/file.h> #include "scm.h" struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); Loading Loading @@ -1514,65 +1516,51 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) return err; } static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; scm->fp = UNIXCB(skb).fp; UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) unix_notinflight(scm->fp->user, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) { struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); scm.pid = UNIXCB(skb).pid; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); /* Alas, it calls VFS */ /* So fscking what? fput() had been SMP-safe since the last Summer */ scm_destroy(&scm); sock_wfree(skb); } /* * The "user->unix_inflight" variable is protected by the garbage * collection lock, and we just read it locklessly here. If you go * over the limit, there might be a tiny race in actually noticing * it across threads. Tough. */ static inline bool too_many_unix_fds(struct task_struct *p) { struct user_struct *user = current_user(); if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); return false; } static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; if (too_many_unix_fds(current)) return -ETOOMANYREFS; scm->fp = scm_fp_dup(UNIXCB(skb).fp); /* * Need to duplicate file references for the sake of garbage * collection. Otherwise a socket in the fps might become a * candidate for GC while the skb is not yet queued. * Garbage collection of unix sockets starts by selecting a set of * candidate sockets which have reference only from being in flight * (total_refs == inflight_refs). This condition is checked once during * the candidate collection phase, and candidates are marked as such, so * that non-candidates can later be ignored. While inflight_refs is * protected by unix_gc_lock, total_refs (file count) is not, hence this * is an instantaneous decision. * * Once a candidate, however, the socket must not be reinstalled into a * file descriptor while the garbage collection is in progress. * * If the above conditions are met, then the directed graph of * candidates (*) does not change while unix_gc_lock is held. * * Any operations that changes the file count through file descriptors * (dup, close, sendmsg) does not change the graph since candidates are * not installed in fds. * * Dequeing a candidate via recvmsg would install it into an fd, but * that takes unix_gc_lock to decrement the inflight count, so it's * serialized with garbage collection. * * MSG_PEEK is special in that it does not change the inflight count, * yet does install the socket into an fd. The following lock/unlock * pair is to ensure serialization with garbage collection. It must be * done between incrementing the file count and installing the file into * an fd. * * If garbage collection starts after the barrier provided by the * lock/unlock, then it will see the elevated refcount and not mark this * as a candidate. If a garbage collection is already in progress * before the file count was incremented, then the lock/unlock pair will * ensure that garbage collection is finished before progressing to * installing the fd. * * (*) A -> B where B is on the queue of A or B is on the queue of C * which is on the queue of listening socket A. */ UNIXCB(skb).fp = scm_fp_dup(scm->fp); if (!UNIXCB(skb).fp) return -ENOMEM; for (i = scm->fp->count - 1; i >= 0; i--) unix_inflight(scm->fp->user, scm->fp->fp[i]); return 0; spin_lock(&unix_gc_lock); spin_unlock(&unix_gc_lock); } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) Loading Loading @@ -2200,7 +2188,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; Loading Loading @@ -2441,7 +2429,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); Loading