Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e4db5b61 authored by Florian Westphal's avatar Florian Westphal Committed by Steffen Klassert
Browse files

xfrm: policy: remove pcpu policy cache



Kristian Evensen says:
  In a project I am involved in, we are running ipsec (Strongswan) on
  different mt7621-based routers. Each router is configured as an
  initiator and has around ~30 tunnels to different responders (running
  on misc. devices). Before the flow cache was removed (kernel 4.9), we
  got a combined throughput of around 70Mbit/s for all tunnels on one
  router. However, we recently switched to kernel 4.14 (4.14.48), and
  the total throughput is somewhere around 57Mbit/s (best-case). I.e., a
  drop of around 20%. Reverting the flow cache removal restores, as
  expected, performance levels to that of kernel 4.9.

When pcpu xdst exists, it has to be validated first before it can be
used.

A negative hit thus increases cost vs. no-cache.

As number of tunnels increases, hit rate decreases so this pcpu caching
isn't a viable strategy.

Furthermore, the xdst cache also needs to run with BH off, so when
removing this the bh disable/enable pairs can be removed too.

Kristian tested a 4.14.y backport of this change and reported
increased performance:

  In our tests, the throughput reduction has been reduced from around -20%
  to -5%. We also see that the overall throughput is independent of the
  number of tunnels, while before the throughput was reduced as the number
  of tunnels increased.

Reported-by: default avatarKristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
parent f203b76d
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -332,7 +332,6 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
void km_policy_notify(struct xfrm_policy *xp, int dir,
		      const struct km_event *c);
void xfrm_policy_cache_flush(void);
void km_state_notify(struct xfrm_state *x, const struct km_event *c);

struct xfrm_tmpl;
+0 −10
Original line number Diff line number Diff line
@@ -307,12 +307,6 @@ static int xfrm_dev_register(struct net_device *dev)
	return xfrm_api_check(dev);
}

static int xfrm_dev_unregister(struct net_device *dev)
{
	xfrm_policy_cache_flush();
	return NOTIFY_DONE;
}

static int xfrm_dev_feat_change(struct net_device *dev)
{
	return xfrm_api_check(dev);
@@ -323,7 +317,6 @@ static int xfrm_dev_down(struct net_device *dev)
	if (dev->features & NETIF_F_HW_ESP)
		xfrm_dev_state_flush(dev_net(dev), dev, true);

	xfrm_policy_cache_flush();
	return NOTIFY_DONE;
}

@@ -335,9 +328,6 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
	case NETDEV_REGISTER:
		return xfrm_dev_register(dev);

	case NETDEV_UNREGISTER:
		return xfrm_dev_unregister(dev);

	case NETDEV_FEAT_CHANGE:
		return xfrm_dev_feat_change(dev);

+1 −138
Original line number Diff line number Diff line
@@ -45,8 +45,6 @@ struct xfrm_flo {
	u8 flags;
};

static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
static struct work_struct *xfrm_pcpu_work __read_mostly;
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;

@@ -1732,108 +1730,6 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,

}

static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
{
	this_cpu_write(xfrm_last_dst, xdst);
	if (old)
		dst_release(&old->u.dst);
}

static void __xfrm_pcpu_work_fn(void)
{
	struct xfrm_dst *old;

	old = this_cpu_read(xfrm_last_dst);
	if (old && !xfrm_bundle_ok(old))
		xfrm_last_dst_update(NULL, old);
}

static void xfrm_pcpu_work_fn(struct work_struct *work)
{
	local_bh_disable();
	rcu_read_lock();
	__xfrm_pcpu_work_fn();
	rcu_read_unlock();
	local_bh_enable();
}

void xfrm_policy_cache_flush(void)
{
	struct xfrm_dst *old;
	bool found = false;
	int cpu;

	might_sleep();

	local_bh_disable();
	rcu_read_lock();
	for_each_possible_cpu(cpu) {
		old = per_cpu(xfrm_last_dst, cpu);
		if (old && !xfrm_bundle_ok(old)) {
			if (smp_processor_id() == cpu) {
				__xfrm_pcpu_work_fn();
				continue;
			}
			found = true;
			break;
		}
	}

	rcu_read_unlock();
	local_bh_enable();

	if (!found)
		return;

	get_online_cpus();

	for_each_possible_cpu(cpu) {
		bool bundle_release;

		rcu_read_lock();
		old = per_cpu(xfrm_last_dst, cpu);
		bundle_release = old && !xfrm_bundle_ok(old);
		rcu_read_unlock();

		if (!bundle_release)
			continue;

		if (cpu_online(cpu)) {
			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
			continue;
		}

		rcu_read_lock();
		old = per_cpu(xfrm_last_dst, cpu);
		if (old && !xfrm_bundle_ok(old)) {
			per_cpu(xfrm_last_dst, cpu) = NULL;
			dst_release(&old->u.dst);
		}
		rcu_read_unlock();
	}

	put_online_cpus();
}

static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
				struct xfrm_state * const xfrm[],
				int num)
{
	const struct dst_entry *dst = &xdst->u.dst;
	int i;

	if (xdst->num_xfrms != num)
		return false;

	for (i = 0; i < num; i++) {
		if (!dst || dst->xfrm != xfrm[i])
			return false;
		dst = xfrm_dst_child(dst);
	}

	return xfrm_bundle_ok(xdst);
}

static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
			       const struct flowi *fl, u16 family,
@@ -1842,7 +1738,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
	struct net *net = xp_net(pols[0]);
	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
	struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
	struct xfrm_dst *xdst, *old;
	struct xfrm_dst *xdst;
	struct dst_entry *dst;
	int err;

@@ -1854,22 +1750,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
		return ERR_PTR(err);
	}

	xdst = this_cpu_read(xfrm_last_dst);
	if (xdst &&
	    xdst->u.dst.dev == dst_orig->dev &&
	    xdst->num_pols == num_pols &&
	    memcmp(xdst->pols, pols,
		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
	    xfrm_xdst_can_reuse(xdst, xfrm, err)) {
		dst_hold(&xdst->u.dst);
		xfrm_pols_put(pols, num_pols);
		while (err > 0)
			xfrm_state_put(xfrm[--err]);
		return xdst;
	}

	old = xdst;

	dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
	if (IS_ERR(dst)) {
		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1882,9 +1762,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
	xdst->policy_genid = atomic_read(&pols[0]->genid);

	atomic_set(&xdst->u.dst.__refcnt, 2);
	xfrm_last_dst_update(xdst, old);

	return xdst;
}

@@ -2085,11 +1962,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
	if (num_xfrms <= 0)
		goto make_dummy_bundle;

	local_bh_disable();
	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
					      xflo->dst_orig);
	local_bh_enable();

	if (IS_ERR(xdst)) {
		err = PTR_ERR(xdst);
		if (err == -EREMOTE) {
@@ -2181,11 +2055,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
				goto no_transform;
			}

			local_bh_disable();
			xdst = xfrm_resolve_and_create_bundle(
					pols, num_pols, fl,
					family, dst_orig);
			local_bh_enable();

			if (IS_ERR(xdst)) {
				xfrm_pols_put(pols, num_pols);
@@ -3035,15 +2907,6 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {

void __init xfrm_init(void)
{
	int i;

	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
				       GFP_KERNEL);
	BUG_ON(!xfrm_pcpu_work);

	for (i = 0; i < NR_CPUS; i++)
		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);

	register_pernet_subsys(&xfrm_net_ops);
	xfrm_dev_init();
	seqcount_init(&xfrm_policy_hash_generation);
+2 −3
Original line number Diff line number Diff line
@@ -735,10 +735,9 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
	}
out:
	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
	if (cnt) {
	if (cnt)
		err = 0;
		xfrm_policy_cache_flush();
	}

	return err;
}
EXPORT_SYMBOL(xfrm_state_flush);