Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2ae6b594 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlxsw-Improve-IPv6-route-insertion-rate'



Ido Schimmel says:

====================
mlxsw: Improve IPv6 route insertion rate

Unlike IPv4, an IPv6 multipath route in the kernel is composed from
multiple sibling routes, each representing a single nexthop.

Therefore, an addition of a multipath route with N nexthops translates
to N in-kernel notifications. This is inefficient for device drivers
that need to program the route to the underlying device. Each time a new
nexthop is appended, a new nexthop group needs to be constructed and the
old one deleted.

This patchset improves the situation by sending a single notification
for a multipath route addition / deletion instead of one per-nexthop.
When adding thousands of multipath routes with 16 nexthops, I measured
an improvement of about x10 in the insertion rate.

Patches #1-#3 add a flag that indicates that in-kernel notifications
need to be suppressed and extend the IPv6 FIB notification info with
information about the number of sibling routes that are being notified.

Patches #4-#5 adjust the two current listeners to these notifications to
ignore notifications about IPv6 multipath routes.

Patches #6-#7 adds add / delete notifications for IPv6 multipath routes.

Patches #8-#14 do the same for mlxsw.

Patch #15 finally removes the limitations added in patches #4-#5 and
stops the kernel from sending a notification for each added / deleted
nexthop.

Patch #16 adds test cases.

v2 (David Ahern):
* Remove patch adjusting netdevsim to consume resources for each
  fib6_info. Instead, consume one resource for the entire multipath
  route
* Remove 'multipath_rt' usage in patch #10
* Remove 'multipath_rt' from 'struct fib6_entry_notifier_info' in patch
  #15. The member is only removed in this patch to prevent drivers from
  processing multipath routes twice during the series
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 714a485a 12ee8220
Loading
Loading
Loading
Loading
+153 −56
Original line number Diff line number Diff line
@@ -5278,17 +5278,21 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
static int
mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
				struct mlxsw_sp_fib6_entry *fib6_entry,
				struct fib6_info *rt)
				struct fib6_info **rt_arr, unsigned int nrt6)
{
	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
	int err;
	int err, i;

	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
	if (IS_ERR(mlxsw_sp_rt6))
		return PTR_ERR(mlxsw_sp_rt6);
	for (i = 0; i < nrt6; i++) {
		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
		if (IS_ERR(mlxsw_sp_rt6)) {
			err = PTR_ERR(mlxsw_sp_rt6);
			goto err_rt6_create;
		}

		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
		fib6_entry->nrt6++;
	}

	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
	if (err)
@@ -5297,29 +5301,40 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
	return 0;

err_nexthop6_group_update:
	i = nrt6;
err_rt6_create:
	for (i--; i >= 0; i--) {
		fib6_entry->nrt6--;
		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
					       struct mlxsw_sp_rt6, list);
		list_del(&mlxsw_sp_rt6->list);
		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
	}
	return err;
}

static void
mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
				struct mlxsw_sp_fib6_entry *fib6_entry,
				struct fib6_info *rt)
				struct fib6_info **rt_arr, unsigned int nrt6)
{
	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
	int i;

	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
	if (WARN_ON(!mlxsw_sp_rt6))
		return;
	for (i = 0; i < nrt6; i++) {
		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
							   rt_arr[i]);
		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
			continue;

		fib6_entry->nrt6--;
		list_del(&mlxsw_sp_rt6->list);
	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
	}

	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
}

static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
					 struct mlxsw_sp_fib_entry *fib_entry,
					 const struct fib6_info *rt)
@@ -5358,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
			   struct mlxsw_sp_fib_node *fib_node,
			   struct fib6_info *rt)
			   struct fib6_info **rt_arr, unsigned int nrt6)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;
	struct mlxsw_sp_fib_entry *fib_entry;
	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
	int err;
	int err, i;

	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
	if (!fib6_entry)
		return ERR_PTR(-ENOMEM);
	fib_entry = &fib6_entry->common;

	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
	INIT_LIST_HEAD(&fib6_entry->rt6_list);

	for (i = 0; i < nrt6; i++) {
		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
		if (IS_ERR(mlxsw_sp_rt6)) {
			err = PTR_ERR(mlxsw_sp_rt6);
			goto err_rt6_create;
		}
		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
		fib6_entry->nrt6++;
	}

	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);

	INIT_LIST_HEAD(&fib6_entry->rt6_list);
	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
	fib6_entry->nrt6 = 1;
	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
	if (err)
		goto err_nexthop6_group_get;
@@ -5390,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
	return fib6_entry;

err_nexthop6_group_get:
	i = nrt6;
err_rt6_create:
	for (i--; i >= 0; i--) {
		fib6_entry->nrt6--;
		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
					       struct mlxsw_sp_rt6, list);
		list_del(&mlxsw_sp_rt6->list);
		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
err_rt6_create:
	}
	kfree(fib6_entry);
	return ERR_PTR(err);
}
@@ -5435,16 +5459,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,

static int
mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
			       bool replace)
			       bool *p_replace)
{
	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
	struct mlxsw_sp_fib6_entry *fib6_entry;

	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);

	if (replace && WARN_ON(!fib6_entry))
		return -EINVAL;
	if (*p_replace && !fib6_entry)
		*p_replace = false;

	if (fib6_entry) {
		list_add_tail(&new6_entry->common.list,
@@ -5479,11 +5503,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)

static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
					 struct mlxsw_sp_fib6_entry *fib6_entry,
					 bool replace)
					 bool *p_replace)
{
	int err;

	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
	if (err)
		return err;

@@ -5556,10 +5580,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
}

static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
				    struct fib6_info *rt, bool replace)
				    struct fib6_info **rt_arr,
				    unsigned int nrt6, bool replace)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;
	struct mlxsw_sp_fib_node *fib_node;
	struct fib6_info *rt = rt_arr[0];
	int err;

	if (mlxsw_sp->router->aborted)
@@ -5584,19 +5610,21 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
	 */
	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
	if (fib6_entry) {
		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
						      rt_arr, nrt6);
		if (err)
			goto err_fib6_entry_nexthop_add;
		return 0;
	}

	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
						nrt6);
	if (IS_ERR(fib6_entry)) {
		err = PTR_ERR(fib6_entry);
		goto err_fib6_entry_create;
	}

	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
	if (err)
		goto err_fib6_node_entry_link;

@@ -5613,10 +5641,12 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
}

static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
				     struct fib6_info *rt)
				     struct fib6_info **rt_arr,
				     unsigned int nrt6)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;
	struct mlxsw_sp_fib_node *fib_node;
	struct fib6_info *rt = rt_arr[0];

	if (mlxsw_sp->router->aborted)
		return;
@@ -5628,11 +5658,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
	if (WARN_ON(!fib6_entry))
		return;

	/* If route is part of a multipath entry, but not the last one
	 * removed, then only reduce its nexthop group.
	/* If not all the nexthops are deleted, then only reduce the nexthop
	 * group.
	 */
	if (!list_is_singular(&fib6_entry->rt6_list)) {
		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
	if (nrt6 != fib6_entry->nrt6) {
		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
						nrt6);
		return;
	}

@@ -5893,10 +5924,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
}

struct mlxsw_sp_fib6_event_work {
	struct fib6_info **rt_arr;
	unsigned int nrt6;
};

struct mlxsw_sp_fib_event_work {
	struct work_struct work;
	union {
		struct fib6_entry_notifier_info fen6_info;
		struct mlxsw_sp_fib6_event_work fib6_work;
		struct fib_entry_notifier_info fen_info;
		struct fib_rule_notifier_info fr_info;
		struct fib_nh_notifier_info fnh_info;
@@ -5907,6 +5943,54 @@ struct mlxsw_sp_fib_event_work {
	unsigned long event;
};

static int
mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
			       struct fib6_entry_notifier_info *fen6_info)
{
	struct fib6_info *rt = fen6_info->rt;
	struct fib6_info **rt_arr;
	struct fib6_info *iter;
	unsigned int nrt6;
	int i = 0;

	nrt6 = fen6_info->nsiblings + 1;

	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
	if (!rt_arr)
		return -ENOMEM;

	fib6_work->rt_arr = rt_arr;
	fib6_work->nrt6 = nrt6;

	rt_arr[0] = rt;
	fib6_info_hold(rt);

	if (!fen6_info->nsiblings)
		return 0;

	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
		if (i == fen6_info->nsiblings)
			break;

		rt_arr[i + 1] = iter;
		fib6_info_hold(iter);
		i++;
	}
	WARN_ON_ONCE(i != fen6_info->nsiblings);

	return 0;
}

static void
mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
{
	int i;

	for (i = 0; i < fib6_work->nrt6; i++)
		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
	kfree(fib6_work->rt_arr);
}

static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
{
	struct mlxsw_sp_fib_event_work *fib_work =
@@ -5965,18 +6049,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)

	switch (fib_work->event) {
	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
	case FIB_EVENT_ENTRY_APPEND: /* fall through */
	case FIB_EVENT_ENTRY_ADD:
		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
					       fib_work->fen6_info.rt, replace);
					       fib_work->fib6_work.rt_arr,
					       fib_work->fib6_work.nrt6,
					       replace);
		if (err)
			mlxsw_sp_router_fib_abort(mlxsw_sp);
		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
		break;
	case FIB_EVENT_ENTRY_DEL:
		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
		mlxsw_sp_router_fib6_del(mlxsw_sp,
					 fib_work->fib6_work.rt_arr,
					 fib_work->fib6_work.nrt6);
		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
		break;
	case FIB_EVENT_RULE_ADD:
		/* if we get here, a rule was added that we do not support.
@@ -6065,22 +6152,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
	}
}

static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
				      struct fib_notifier_info *info)
{
	struct fib6_entry_notifier_info *fen6_info;
	int err;

	switch (fib_work->event) {
	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
	case FIB_EVENT_ENTRY_APPEND: /* fall through */
	case FIB_EVENT_ENTRY_ADD: /* fall through */
	case FIB_EVENT_ENTRY_DEL:
		fen6_info = container_of(info, struct fib6_entry_notifier_info,
					 info);
		fib_work->fen6_info = *fen6_info;
		fib6_info_hold(fib_work->fen6_info.rt);
		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
						     fen6_info);
		if (err)
			return err;
		break;
	}

	return 0;
}

static void
@@ -6221,7 +6312,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
		break;
	case AF_INET6:
		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
		mlxsw_sp_router_fib6_event(fib_work, info);
		err = mlxsw_sp_router_fib6_event(fib_work, info);
		if (err)
			goto err_fib_event;
		break;
	case RTNL_FAMILY_IP6MR:
	case RTNL_FAMILY_IPMR:
@@ -6233,6 +6326,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
	mlxsw_core_schedule_work(&fib_work->work);

	return NOTIFY_DONE;

err_fib_event:
	kfree(fib_work);
	return NOTIFY_BAD;
}

struct mlxsw_sp_rif *
+6 −0
Original line number Diff line number Diff line
@@ -377,6 +377,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_entry_notifier_info {
	struct fib_notifier_info info; /* must be first */
	struct fib6_info *rt;
	unsigned int nsiblings;
};

/*
@@ -450,6 +451,11 @@ int call_fib6_entry_notifiers(struct net *net,
			      enum fib_event_type event_type,
			      struct fib6_info *rt,
			      struct netlink_ext_ack *extack);
int call_fib6_multipath_entry_notifiers(struct net *net,
					enum fib_event_type event_type,
					struct fib6_info *rt,
					unsigned int nsiblings,
					struct netlink_ext_ack *extack);
void fib6_rt_update(struct net *net, struct fib6_info *rt,
		    struct nl_info *info);
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
+5 −1
Original line number Diff line number Diff line
@@ -378,13 +378,17 @@ struct nla_policy {
/**
 * struct nl_info - netlink source information
 * @nlh: Netlink message header of original request
 * @nl_net: Network namespace
 * @portid: Netlink PORTID of requesting application
 * @skip_notify: Skip netlink notifications to user space
 * @skip_notify_kernel: Skip selected in-kernel notifications
 */
struct nl_info {
	struct nlmsghdr		*nlh;
	struct net		*nl_net;
	u32			portid;
	bool			skip_notify;
	u8			skip_notify:1,
				skip_notify_kernel:1;
};

/**
+33 −11
Original line number Diff line number Diff line
@@ -381,6 +381,22 @@ int call_fib6_entry_notifiers(struct net *net,
	return call_fib6_notifiers(net, event_type, &info.info);
}

int call_fib6_multipath_entry_notifiers(struct net *net,
					enum fib_event_type event_type,
					struct fib6_info *rt,
					unsigned int nsiblings,
					struct netlink_ext_ack *extack)
{
	struct fib6_entry_notifier_info info = {
		.info.extack = extack,
		.rt = rt,
		.nsiblings = nsiblings,
	};

	rt->fib6_table->fib_seq++;
	return call_fib6_notifiers(net, event_type, &info.info);
}

struct fib6_dump_arg {
	struct net *net;
	struct notifier_block *nb;
@@ -1123,11 +1139,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add:
		nlflags |= NLM_F_CREATE;

		if (!info->skip_notify_kernel) {
			err = call_fib6_entry_notifiers(info->nl_net,
							FIB_EVENT_ENTRY_ADD,
							rt, extack);
			if (err)
				return err;
		}

		rcu_assign_pointer(rt->fib6_next, iter);
		fib6_info_hold(rt);
@@ -1152,11 +1170,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
			return -ENOENT;
		}

		if (!info->skip_notify_kernel) {
			err = call_fib6_entry_notifiers(info->nl_net,
							FIB_EVENT_ENTRY_REPLACE,
							rt, extack);
			if (err)
				return err;
		}

		fib6_info_hold(rt);
		rcu_assign_pointer(rt->fib6_node, fn);
@@ -1839,9 +1859,11 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,

	fib6_purge_rt(rt, fn, net);

	if (!info->skip_notify_kernel)
		call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
	if (!info->skip_notify)
		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);

	fib6_info_release(rt);
}

+21 −0
Original line number Diff line number Diff line
@@ -3718,6 +3718,12 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
				info->skip_notify = 1;
		}

		info->skip_notify_kernel = 1;
		call_fib6_multipath_entry_notifiers(net,
						    FIB_EVENT_ENTRY_DEL,
						    rt,
						    rt->fib6_nsiblings,
						    NULL);
		list_for_each_entry_safe(sibling, next_sibling,
					 &rt->fib6_siblings,
					 fib6_siblings) {
@@ -4965,6 +4971,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
{
	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
	struct nl_info *info = &cfg->fc_nlinfo;
	enum fib_event_type event_type;
	struct fib6_config r_cfg;
	struct rtnexthop *rtnh;
	struct fib6_info *rt;
@@ -5042,6 +5049,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
	 */
	info->skip_notify = 1;

	/* For add and replace, send one notification with all nexthops. For
	 * append, send one notification with all appended nexthops.
	 */
	info->skip_notify_kernel = 1;

	err_nh = NULL;
	list_for_each_entry(nh, &rt6_nh_list, next) {
		err = __ip6_ins_rt(nh->fib6_info, info, extack);
@@ -5078,6 +5090,15 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
		nhn++;
	}

	event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
	err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
						  rt_notif, nhn - 1, extack);
	if (err) {
		/* Delete all the siblings that were just added */
		err_nh = NULL;
		goto add_errout;
	}

	/* success ... tell user about new route */
	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
	goto cleanup;
Loading