Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 152bff37 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bridge-improve-cache-utilization'



Nikolay Aleksandrov says:

====================
bridge: improve cache utilization

This is the first set which begins to deal with the bad bridge cache
access patterns. The first patch rearranges the bridge and port structs
a little so the frequently (and closely) accessed members are in the same
cache line. The second patch then moves the garbage collection to a
workqueue trying to improve system responsiveness under load (many fdbs)
and more importantly removes the need to check if the matched entry is
expired in __br_fdb_get which was a major source of false-sharing.
The third patch is a preparation for the final one which
If properly configured, i.e. ports bound to CPUs (thus updating "updated"
locally) then the bridge's HitM goes from 100% to 0%, but even without
binding we get a win because previously every lookup that iterated over
the hash chain caused false-sharing due to the first cache line being
used for both mac/vid and used/updated fields.

Some results from tests I've run:
(note that these were run in good conditions for the baseline, everything
 ran on a single NUMA node and there were only 3 fdbs)

1. baseline
100% Load HitM on the fdbs (between everyone who has done lookups and hit
                            one of the 3 hash chains of the communicating
                            src/dst fdbs)
Overall 5.06% Load HitM for the bridge, first place in the list

2. patched & ports bound to CPUs
0% Local load HitM, bridge is not even in the c2c report list
Also there's 3% consistent improvement in netperf tests.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 63dfef75 83a718d6
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -411,4 +411,5 @@ void br_dev_setup(struct net_device *dev)
	br_netfilter_rtable_init(br);
	br_netfilter_rtable_init(br);
	br_stp_timer_init(br);
	br_stp_timer_init(br);
	br_multicast_init(br);
	br_multicast_init(br);
	INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup);
}
}
+21 −13
Original line number Original line Diff line number Diff line
@@ -154,7 +154,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
	if (f->added_by_external_learn)
	if (f->added_by_external_learn)
		fdb_del_external_learn(f);
		fdb_del_external_learn(f);


	hlist_del_rcu(&f->hlist);
	hlist_del_init_rcu(&f->hlist);
	fdb_notify(br, f, RTM_DELNEIGH);
	fdb_notify(br, f, RTM_DELNEIGH);
	call_rcu(&f->rcu, fdb_rcu_free);
	call_rcu(&f->rcu, fdb_rcu_free);
}
}
@@ -290,34 +290,43 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
	spin_unlock_bh(&br->hash_lock);
	spin_unlock_bh(&br->hash_lock);
}
}


void br_fdb_cleanup(unsigned long _data)
void br_fdb_cleanup(struct work_struct *work)
{
{
	struct net_bridge *br = (struct net_bridge *)_data;
	struct net_bridge *br = container_of(work, struct net_bridge,
					     gc_work.work);
	unsigned long delay = hold_time(br);
	unsigned long delay = hold_time(br);
	unsigned long next_timer = jiffies + br->ageing_time;
	unsigned long work_delay = delay;
	unsigned long now = jiffies;
	int i;
	int i;


	spin_lock(&br->hash_lock);
	for (i = 0; i < BR_HASH_SIZE; i++) {
	for (i = 0; i < BR_HASH_SIZE; i++) {
		struct net_bridge_fdb_entry *f;
		struct net_bridge_fdb_entry *f;
		struct hlist_node *n;
		struct hlist_node *n;


		if (!br->hash[i].first)
			continue;

		spin_lock_bh(&br->hash_lock);
		hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
		hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
			unsigned long this_timer;
			unsigned long this_timer;

			if (f->is_static)
			if (f->is_static)
				continue;
				continue;
			if (f->added_by_external_learn)
			if (f->added_by_external_learn)
				continue;
				continue;
			this_timer = f->updated + delay;
			this_timer = f->updated + delay;
			if (time_before_eq(this_timer, jiffies))
			if (time_after(this_timer, now))
				work_delay = min(work_delay, this_timer - now);
			else
				fdb_delete(br, f);
				fdb_delete(br, f);
			else if (time_before(this_timer, next_timer))
				next_timer = this_timer;
		}
		}
		spin_unlock_bh(&br->hash_lock);
		cond_resched();
	}
	}
	spin_unlock(&br->hash_lock);


	mod_timer(&br->gc_timer, round_jiffies_up(next_timer));
	/* Cleanup minimum 10 milliseconds apart */
	work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
	mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
}
}


/* Completely flush all dynamic entries in forwarding database.*/
/* Completely flush all dynamic entries in forwarding database.*/
@@ -382,8 +391,6 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
				&br->hash[br_mac_hash(addr, vid)], hlist) {
				&br->hash[br_mac_hash(addr, vid)], hlist) {
		if (ether_addr_equal(fdb->addr.addr, addr) &&
		if (ether_addr_equal(fdb->addr.addr, addr) &&
		    fdb->vlan_id == vid) {
		    fdb->vlan_id == vid) {
			if (unlikely(has_expired(br, fdb)))
				break;
			return fdb;
			return fdb;
		}
		}
	}
	}
@@ -590,6 +597,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
				fdb->dst = source;
				fdb->dst = source;
				fdb_modified = true;
				fdb_modified = true;
			}
			}
			if (jiffies != fdb->updated)
				fdb->updated = jiffies;
				fdb->updated = jiffies;
			if (unlikely(added_by_user))
			if (unlikely(added_by_user))
				fdb->added_by_user = 1;
				fdb->added_by_user = 1;
+1 −1
Original line number Original line Diff line number Diff line
@@ -313,7 +313,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)


	br_vlan_flush(br);
	br_vlan_flush(br);
	br_multicast_dev_del(br);
	br_multicast_dev_del(br);
	del_timer_sync(&br->gc_timer);
	cancel_delayed_work_sync(&br->gc_work);


	br_sysfs_delbr(br->dev);
	br_sysfs_delbr(br->dev);
	unregister_netdevice_queue(br->dev, head);
	unregister_netdevice_queue(br->dev, head);
+2 −1
Original line number Original line Diff line number Diff line
@@ -198,6 +198,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
		if (dst->is_local)
		if (dst->is_local)
			return br_pass_frame_up(skb);
			return br_pass_frame_up(skb);


		if (jiffies != dst->used)
			dst->used = jiffies;
			dst->used = jiffies;
		br_forward(dst->dst, skb, local_rcv, false);
		br_forward(dst->dst, skb, local_rcv, false);
	} else {
	} else {
+1 −1
Original line number Original line Diff line number Diff line
@@ -149,7 +149,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
		b.hello_timer_value = br_timer_value(&br->hello_timer);
		b.hello_timer_value = br_timer_value(&br->hello_timer);
		b.tcn_timer_value = br_timer_value(&br->tcn_timer);
		b.tcn_timer_value = br_timer_value(&br->tcn_timer);
		b.topology_change_timer_value = br_timer_value(&br->topology_change_timer);
		b.topology_change_timer_value = br_timer_value(&br->topology_change_timer);
		b.gc_timer_value = br_timer_value(&br->gc_timer);
		b.gc_timer_value = br_timer_value(&br->gc_work.timer);
		rcu_read_unlock();
		rcu_read_unlock();


		if (copy_to_user((void __user *)args[1], &b, sizeof(b)))
		if (copy_to_user((void __user *)args[1], &b, sizeof(b)))
Loading