Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f6d8bd05 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

inet: add RCU protection to inet->opt



We lack proper synchronization to manipulate inet->opt ip_options

Problem is ip_make_skb() calls ip_setup_cork() and
ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options),
without any protection against another thread manipulating inet->opt.

Another thread can change inet->opt pointer and free old one under us.

Use RCU to protect inet->opt (changed to inet->inet_opt).

Instead of handling atomic refcounts, just copy ip_options when
necessary, to avoid cache line dirtying.

We cant insert an rcu_head in struct ip_options since its included in
skb->cb[], so this patch is large because I had to introduce a new
ip_options_rcu structure.

Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0a14842f
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -57,7 +57,15 @@ struct ip_options {
	unsigned char	__data[0];
};

#define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
struct ip_options_rcu {
	struct rcu_head rcu;
	struct ip_options opt;
};

struct ip_options_data {
	struct ip_options_rcu	opt;
	char			data[40];
};

struct inet_request_sock {
	struct request_sock	req;
@@ -78,7 +86,7 @@ struct inet_request_sock {
				acked	   : 1,
				no_srccheck: 1;
	kmemcheck_bitfield_end(flags);
	struct ip_options	*opt;
	struct ip_options_rcu	*opt;
};

static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -140,7 +148,7 @@ struct inet_sock {
	__be16			inet_sport;
	__u16			inet_id;

	struct ip_options	*opt;
	struct ip_options_rcu __rcu	*inet_opt;
	__u8			tos;
	__u8			min_ttl;
	__u8			mc_ttl;
+6 −5
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
struct ipcm_cookie {
	__be32			addr;
	int			oif;
	struct ip_options	*opt;
	struct ip_options_rcu	*opt;
	__u8			tx_flags;
};

@@ -92,7 +92,7 @@ extern int igmp_mc_proc_init(void);

extern int		ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
					      __be32 saddr, __be32 daddr,
					      struct ip_options *opt);
					      struct ip_options_rcu *opt);
extern int		ip_rcv(struct sk_buff *skb, struct net_device *dev,
			       struct packet_type *pt, struct net_device *orig_dev);
extern int		ip_local_deliver(struct sk_buff *skb);
@@ -416,14 +416,15 @@ extern int ip_forward(struct sk_buff *skb);
 *	Functions provided by ip_options.c
 */
 
extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag);
extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
			     __be32 daddr, struct rtable *rt, int is_frag);
extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
extern void ip_options_fragment(struct sk_buff *skb);
extern int ip_options_compile(struct net *net,
			      struct ip_options *opt, struct sk_buff *skb);
extern int ip_options_get(struct net *net, struct ip_options **optp,
extern int ip_options_get(struct net *net, struct ip_options_rcu **optp,
			  unsigned char *data, int optlen);
extern int ip_options_get_from_user(struct net *net, struct ip_options **optp,
extern int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
				    unsigned char __user *data, int optlen);
extern void ip_options_undo(struct ip_options * opt);
extern void ip_forward_options(struct sk_buff *skb);
+10 −6
Original line number Diff line number Diff line
@@ -48,6 +48,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
	struct flowi4 fl4;
	struct rtable *rt;
	int err;
	struct ip_options_rcu *inet_opt;

	dp->dccps_role = DCCP_ROLE_CLIENT;

@@ -58,10 +59,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
		return -EAFNOSUPPORT;

	nexthop = daddr = usin->sin_addr.s_addr;
	if (inet->opt != NULL && inet->opt->srr) {

	inet_opt = rcu_dereference_protected(inet->inet_opt,
					     sock_owned_by_user(sk));
	if (inet_opt != NULL && inet_opt->opt.srr) {
		if (daddr == 0)
			return -EINVAL;
		nexthop = inet->opt->faddr;
		nexthop = inet_opt->opt.faddr;
	}

	orig_sport = inet->inet_sport;
@@ -78,7 +82,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
		return -ENETUNREACH;
	}

	if (inet->opt == NULL || !inet->opt->srr)
	if (inet_opt == NULL || !inet_opt->opt.srr)
		daddr = rt->rt_dst;

	if (inet->inet_saddr == 0)
@@ -89,8 +93,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
	inet->inet_daddr = daddr;

	inet_csk(sk)->icsk_ext_hdr_len = 0;
	if (inet->opt != NULL)
		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
	if (inet_opt)
		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
	/*
	 * Socket identity is still unknown (sport may be zero).
	 * However we set state to DCCP_REQUESTING and not releasing socket
@@ -405,7 +409,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
	newinet->inet_daddr	= ireq->rmt_addr;
	newinet->inet_rcv_saddr = ireq->loc_addr;
	newinet->inet_saddr	= ireq->loc_addr;
	newinet->opt	   = ireq->opt;
	newinet->inet_opt	= ireq->opt;
	ireq->opt	   = NULL;
	newinet->mc_index  = inet_iif(skb);
	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
+1 −1
Original line number Diff line number Diff line
@@ -573,7 +573,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,

	   First: no IPv4 options.
	 */
	newinet->opt = NULL;
	newinet->inet_opt = NULL;

	/* Clone RX bits */
	newnp->rxopt.all = np->rxopt.all;
+12 −5
Original line number Diff line number Diff line
@@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk)
	WARN_ON(sk->sk_wmem_queued);
	WARN_ON(sk->sk_forward_alloc);

	kfree(inet->opt);
	kfree(rcu_dereference_protected(inet->inet_opt, 1));
	dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
	sk_refcnt_debug_dec(sk);
}
@@ -1106,9 +1106,12 @@ static int inet_sk_reselect_saddr(struct sock *sk)
	struct flowi4 fl4;
	struct rtable *rt;
	__be32 new_saddr;
	struct ip_options_rcu *inet_opt;

	if (inet->opt && inet->opt->srr)
		daddr = inet->opt->faddr;
	inet_opt = rcu_dereference_protected(inet->inet_opt,
					     sock_owned_by_user(sk));
	if (inet_opt && inet_opt->opt.srr)
		daddr = inet_opt->opt.faddr;

	/* Query new route. */
	rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk),
@@ -1148,6 +1151,7 @@ int inet_sk_rebuild_header(struct sock *sk)
	struct inet_sock *inet = inet_sk(sk);
	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
	__be32 daddr;
	struct ip_options_rcu *inet_opt;
	int err;

	/* Route is OK, nothing to do. */
@@ -1155,9 +1159,12 @@ int inet_sk_rebuild_header(struct sock *sk)
		return 0;

	/* Reroute. */
	rcu_read_lock();
	inet_opt = rcu_dereference(inet->inet_opt);
	daddr = inet->inet_daddr;
	if (inet->opt && inet->opt->srr)
		daddr = inet->opt->faddr;
	if (inet_opt && inet_opt->opt.srr)
		daddr = inet_opt->opt.faddr;
	rcu_read_unlock();
	rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr,
				   inet->inet_dport, inet->inet_sport,
				   sk->sk_protocol, RT_CONN_FLAGS(sk),
Loading