Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0ed8ddf4 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

neigh: Protect neigh->ha[] with a seqlock



Add a seqlock in struct neighbour to protect neigh->ha[], and avoid
dirtying neighbour in stress situation (many different flows / dsts)

Dirtying takes place because of read_lock(&n->lock) and n->used writes.

Switching to a seqlock, and writing n->used only on jiffies changes
permits less dirtying.

Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d122179a
Loading
Loading
Loading
Loading
+15 −1
Original line number Diff line number Diff line
@@ -105,6 +105,7 @@ struct neighbour {
	atomic_t		refcnt;
	atomic_t		probes;
	rwlock_t		lock;
	seqlock_t		ha_lock;
	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
	struct hh_cache		*hh;
	int			(*output)(struct sk_buff *skb);
@@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh)

static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	neigh->used = jiffies;
	unsigned long now = ACCESS_ONCE(jiffies);
	
	if (neigh->used != now)
		neigh->used = now;
	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
		return __neigh_event_send(neigh, skb);
	return 0;
@@ -373,4 +377,14 @@ struct neighbour_cb {

#define NEIGH_CB(skb)	((struct neighbour_cb *)(skb)->cb)

static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
				     const struct net_device *dev)
{
	unsigned int seq;

	do {
		seq = read_seqbegin(&n->ha_lock);
		memcpy(dst, n->ha, dev->addr_len);
	} while (read_seqretry(&n->ha_lock, seq));
}
#endif
+30 −17
Original line number Diff line number Diff line
@@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)

	skb_queue_head_init(&n->arp_queue);
	rwlock_init(&n->lock);
	seqlock_init(&n->ha_lock);
	n->updated	  = n->used = now;
	n->nud_state	  = NUD_NONE;
	n->output	  = neigh_blackhole;
@@ -1015,7 +1016,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
}
EXPORT_SYMBOL(__neigh_event_send);

static void neigh_update_hhs(struct neighbour *neigh)
static void neigh_update_hhs(const struct neighbour *neigh)
{
	struct hh_cache *hh;
	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
	}

	if (lladdr != neigh->ha) {
		write_seqlock(&neigh->ha_lock);
		memcpy(&neigh->ha, lladdr, dev->addr_len);
		write_sequnlock(&neigh->ha_lock);
		neigh_update_hhs(neigh);
		if (!(new & NUD_CONNECTED))
			neigh->confirmed = jiffies -
@@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
{
	struct hh_cache *hh;

	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
	for (hh = n->hh; hh; hh = hh->hh_next) {
		if (hh->hh_type == protocol) {
			atomic_inc(&hh->hh_refcnt);
@@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
		kfree(hh);
		return;
	}
	read_unlock(&n->lock);
	write_lock(&n->lock);

	write_lock_bh(&n->lock);

	/* must check if another thread already did the insert */
	if (neigh_hh_lookup(n, dst, protocol)) {
@@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
		hh->hh_output = n->ops->output;

	hh->hh_next = n->hh;
	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
	n->hh	    = hh;

	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
		hh_cache_put(hh);
end:
	write_unlock(&n->lock);
	read_lock(&n->lock);
	write_unlock_bh(&n->lock);
}

/* This function can be used in contexts, where only old dev_queue_xmit
@@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb)
	if (!neigh_event_send(neigh, skb)) {
		int err;
		struct net_device *dev = neigh->dev;
		unsigned int seq;

		read_lock_bh(&neigh->lock);
		if (dev->header_ops->cache &&
		    !dst->hh &&
		    !(dst->flags & DST_NOCACHE))
			neigh_hh_init(neigh, dst, dst->ops->protocol);

		do {
			seq = read_seqbegin(&neigh->ha_lock);
			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
					      neigh->ha, NULL, skb->len);
		read_unlock_bh(&neigh->lock);
		} while (read_seqretry(&neigh->ha_lock, seq));

		if (err >= 0)
			rc = neigh->ops->queue_xmit(skb);
@@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
	struct dst_entry *dst = skb_dst(skb);
	struct neighbour *neigh = dst->neighbour;
	struct net_device *dev = neigh->dev;
	unsigned int seq;

	__skb_pull(skb, skb_network_offset(skb));

	read_lock_bh(&neigh->lock);
	do {
		seq = read_seqbegin(&neigh->ha_lock);
		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
				      neigh->ha, NULL, skb->len);
	read_unlock_bh(&neigh->lock);
	} while (read_seqretry(&neigh->ha_lock, seq));

	if (err >= 0)
		err = neigh->ops->queue_xmit(skb);
	else {
@@ -2148,11 +2157,15 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,

	read_lock_bh(&neigh->lock);
	ndm->ndm_state	 = neigh->nud_state;
	if ((neigh->nud_state & NUD_VALID) &&
	    nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
	if (neigh->nud_state & NUD_VALID) {
		char haddr[MAX_ADDR_LEN];

		neigh_ha_snapshot(haddr, neigh, neigh->dev);
		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
			read_unlock_bh(&neigh->lock);
			goto nla_put_failure;
		}
	}

	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
+2 −4
Original line number Diff line number Diff line
@@ -503,9 +503,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
	if (n) {
		n->used = jiffies;
		if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
			read_lock_bh(&n->lock);
			memcpy(haddr, n->ha, dev->addr_len);
			read_unlock_bh(&n->lock);
			neigh_ha_snapshot(haddr, n, dev);
			neigh_release(n);
			return 0;
		}
+4 −4
Original line number Diff line number Diff line
@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
	}
	if (neigh_event_send(n, skb_res) == 0) {
		int err;
		char haddr[MAX_ADDR_LEN];

		read_lock(&n->lock);
		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
				      n->ha, NULL, skb->len);
		read_unlock(&n->lock);
		neigh_ha_snapshot(haddr, n, dev);
		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
				      NULL, skb->len);

		if (err < 0) {
			neigh_release(n);