Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9a95d9c6 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'Remove-rtnl-lock-dependency-from-all-action-implementations'



Vlad Buslov says:

====================
Remove rtnl lock dependency from all action implementations

Currently, all netlink protocol handlers for updating rules, actions and
qdiscs are protected with single global rtnl lock which removes any
possibility for parallelism. This patch set is a second step to remove
rtnl lock dependency from TC rules update path.

Recently, new rtnl registration flag RTNL_FLAG_DOIT_UNLOCKED was added.
Handlers registered with this flag are called without RTNL taken. End
goal is to have rule update handlers(RTM_NEWTFILTER, RTM_DELTFILTER,
etc.) to be registered with UNLOCKED flag to allow parallel execution.
However, there is no intention to completely remove or split rtnl lock
itself. This patch set addresses specific problems in implementation of
tc actions that prevent their control path from being executed
concurrently. Additional changes are required to refactor classifiers
API and individual classifiers for parallel execution. This patch set
lays groundwork to eventually register rule update handlers as
rtnl-unlocked.

Action API is already prepared for parallel execution with previous
patch set, which means that action ops that use action API for their
implementation do not require additional modifications. (delete, search,
etc.) Action API implements concurrency-safe reference counting and
guarantees that cleanup/delete is called only once, after last reference
to action is released.

The goal of this change is to update specific actions APIs that access
action private state directly, in order to be independent from external
locking. General approach is to re-use existing tcf_lock spinlock (used
by some action implementation to synchronize control path with data
path) to protect action private state from concurrent modification. If
action has rcu-protected pointer, tcf spinlock is used to protect its
update code, instead of relying on rtnl lock.

Some actions need to determine rtnl mutex status in order to release it.
For example, ife action can load additional kernel modules(meta ops) and
must make sure that no locks are held during module load. In such cases
'rtnl_held' argument is used to conditionally release rtnl mutex.

Changes from V1 to V2:
- Patch 12:
  - new patch
- Patch 14:
  - refactor gen_new_estimator() to reuse stats_lock when re-assigning
    rate estimator statistics pointer
- Remove mirred and tunnel_key helper function changes. (to be submitted
  and standalone patch)
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2b14e1ea e329bc42
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -101,6 +101,7 @@ struct tc_action_ops {
	void	(*stats_update)(struct tc_action *, u64, u32, u64);
	size_t  (*get_fill_size)(const struct tc_action *act);
	struct net_device *(*get_dev)(const struct tc_action *a);
	void	(*put_dev)(struct net_device *dev);
	int     (*delete)(struct net *net, u32 index);
};

+2 −2
Original line number Diff line number Diff line
@@ -59,13 +59,13 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
		      struct net_rate_estimator __rcu **rate_est,
		      spinlock_t *stats_lock,
		      spinlock_t *lock,
		      seqcount_t *running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
			  struct net_rate_estimator __rcu **ptr,
			  spinlock_t *stats_lock,
			  spinlock_t *lock,
			  seqcount_t *running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
+13 −8
Original line number Diff line number Diff line
@@ -112,7 +112,7 @@ static void est_timer(struct timer_list *t)
 * @bstats: basic statistics
 * @cpu_bstats: bstats per cpu
 * @rate_est: rate estimator statistics
 * @stats_lock: statistics lock
 * @lock: lock for statistics and control path
 * @running: qdisc running seqcount
 * @opt: rate estimator configuration TLV
 *
@@ -128,7 +128,7 @@ static void est_timer(struct timer_list *t)
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
		      struct net_rate_estimator __rcu **rate_est,
		      spinlock_t *stats_lock,
		      spinlock_t *lock,
		      seqcount_t *running,
		      struct nlattr *opt)
{
@@ -154,19 +154,22 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
	seqcount_init(&est->seq);
	intvl_log = parm->interval + 2;
	est->bstats = bstats;
	est->stats_lock = stats_lock;
	est->stats_lock = lock;
	est->running  = running;
	est->ewma_log = parm->ewma_log;
	est->intvl_log = intvl_log;
	est->cpu_bstats = cpu_bstats;

	if (stats_lock)
	if (lock)
		local_bh_disable();
	est_fetch_counters(est, &b);
	if (stats_lock)
	if (lock)
		local_bh_enable();
	est->last_bytes = b.bytes;
	est->last_packets = b.packets;

	if (lock)
		spin_lock_bh(lock);
	old = rcu_dereference_protected(*rate_est, 1);
	if (old) {
		del_timer_sync(&old->timer);
@@ -179,6 +182,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
	mod_timer(&est->timer, est->next_jiffies);

	rcu_assign_pointer(*rate_est, est);
	if (lock)
		spin_unlock_bh(lock);
	if (old)
		kfree_rcu(old, rcu);
	return 0;
@@ -209,7 +214,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
 * @bstats: basic statistics
 * @cpu_bstats: bstats per cpu
 * @rate_est: rate estimator statistics
 * @stats_lock: statistics lock
 * @lock: lock for statistics and control path
 * @running: qdisc running seqcount (might be NULL)
 * @opt: rate estimator configuration TLV
 *
@@ -221,11 +226,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
			  struct net_rate_estimator __rcu **rate_est,
			  spinlock_t *stats_lock,
			  spinlock_t *lock,
			  seqcount_t *running, struct nlattr *opt)
{
	return gen_new_estimator(bstats, cpu_bstats, rate_est,
				 stats_lock, running, opt);
				 lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);

+7 −3
Original line number Diff line number Diff line
@@ -143,11 +143,12 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
		.index   = prog->tcf_index,
		.refcnt  = refcount_read(&prog->tcf_refcnt) - ref,
		.bindcnt = atomic_read(&prog->tcf_bindcnt) - bind,
		.action  = prog->tcf_action,
	};
	struct tcf_t tm;
	int ret;

	spin_lock(&prog->tcf_lock);
	opt.action = prog->tcf_action;
	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
		goto nla_put_failure;

@@ -163,9 +164,11 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
			  TCA_ACT_BPF_PAD))
		goto nla_put_failure;

	spin_unlock(&prog->tcf_lock);
	return skb->len;

nla_put_failure:
	spin_unlock(&prog->tcf_lock);
	nlmsg_trim(skb, tp);
	return -1;
}
@@ -264,7 +267,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
{
	cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
	/* updates to prog->filter are prevented, since it's called either
	 * with rtnl lock or during final cleanup in rcu callback
	 * with tcf lock or during final cleanup in rcu callback
	 */
	cfg->filter = rcu_dereference_protected(prog->filter, 1);

@@ -336,8 +339,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
		goto out;

	prog = to_bpf(*act);
	ASSERT_RTNL();

	spin_lock(&prog->tcf_lock);
	if (res != ACT_P_CREATED)
		tcf_bpf_prog_fill_cfg(prog, &old);

@@ -349,6 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,

	prog->tcf_action = parm->action;
	rcu_assign_pointer(prog->filter, cfg.filter);
	spin_unlock(&prog->tcf_lock);

	if (res == ACT_P_CREATED) {
		tcf_idr_insert(tn, *act);
+15 −9
Original line number Diff line number Diff line
@@ -50,7 +50,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
			 struct netlink_ext_ack *extack)
{
	struct tc_action_net *tn = net_generic(net, csum_net_id);
	struct tcf_csum_params *params_old, *params_new;
	struct tcf_csum_params *params_new;
	struct nlattr *tb[TCA_CSUM_MAX + 1];
	struct tc_csum *parm;
	struct tcf_csum *p;
@@ -88,20 +88,22 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
	}

	p = to_tcf_csum(*a);
	ASSERT_RTNL();

	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
	if (unlikely(!params_new)) {
		tcf_idr_release(*a, bind);
		return -ENOMEM;
	}
	params_old = rtnl_dereference(p->params);
	params_new->update_flags = parm->update_flags;

	spin_lock(&p->tcf_lock);
	p->tcf_action = parm->action;
	params_new->update_flags = parm->update_flags;
	rcu_assign_pointer(p->params, params_new);
	if (params_old)
		kfree_rcu(params_old, rcu);
	rcu_swap_protected(p->params, params_new,
			   lockdep_is_held(&p->tcf_lock));
	spin_unlock(&p->tcf_lock);

	if (params_new)
		kfree_rcu(params_new, rcu);

	if (ret == ACT_P_CREATED)
		tcf_idr_insert(tn, *a);
@@ -599,11 +601,13 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
		.index   = p->tcf_index,
		.refcnt  = refcount_read(&p->tcf_refcnt) - ref,
		.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
		.action  = p->tcf_action,
	};
	struct tcf_t t;

	params = rtnl_dereference(p->params);
	spin_lock(&p->tcf_lock);
	params = rcu_dereference_protected(p->params,
					   lockdep_is_held(&p->tcf_lock));
	opt.action = p->tcf_action;
	opt.update_flags = params->update_flags;

	if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
@@ -612,10 +616,12 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
	tcf_tm_dump(&t, &p->tcf_tm);
	if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD))
		goto nla_put_failure;
	spin_unlock(&p->tcf_lock);

	return skb->len;

nla_put_failure:
	spin_unlock(&p->tcf_lock);
	nlmsg_trim(skb, b);
	return -1;
}
Loading