Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2c59f06c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-Kernel-side-filtering-for-route-dumps'



David Ahern says:

====================
net: Kernel side filtering for route dumps

Implement kernel side filtering of route dumps by protocol (e.g., which
routing daemon installed the route), route type (e.g., unicast), table
id and nexthop device.

iproute2 has been doing this filtering in userspace for years; pushing
the filters to the kernel side reduces the amount of data the kernel
sends and reduces wasted cycles on both sides processing unwanted data.
These initial options provide a huge improvement for efficiently
examining routes on large scale systems.

v2
- better handling of requests for a specific table. Rather than walking
  the hash of all tables, lookup the specific table and dump it
- refactor mr_rtm_dumproute moving the loop over the table into a
  helper that can be invoked directly
- add hook to return NLM_F_DUMP_FILTERED in DONE message to ensure
  it is returned even when the dump returns nothing
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e8567951 e4e92fb1
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/fib_notifier.h>
#include <net/ip_fib.h>

/**
 * struct vif_device - interface representor for multicast routing
@@ -283,6 +284,12 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);

int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
		   struct mr_mfc *c, struct rtmsg *rtm);
int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
		  struct netlink_callback *cb,
		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
			      u32 portid, u32 seq, struct mr_mfc *c,
			      int cmd, int flags),
		  spinlock_t *lock, struct fib_dump_filter *filter);
int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
		     struct mr_table *(*iter)(struct net *net,
					      struct mr_table *mrt),
@@ -290,7 +297,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
				 struct sk_buff *skb,
				 u32 portid, u32 seq, struct mr_mfc *c,
				 int cmd, int flags),
		     spinlock_t *lock);
		     spinlock_t *lock, struct fib_dump_filter *filter);

int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
	    int (*rules_dump)(struct net *net,
@@ -340,7 +347,7 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
			     struct sk_buff *skb,
			     u32 portid, u32 seq, struct mr_mfc *c,
			     int cmd, int flags),
		 spinlock_t *lock)
		 spinlock_t *lock, struct fib_dump_filter *filter)
{
	return -EINVAL;
}
+1 −0
Original line number Diff line number Diff line
@@ -180,6 +180,7 @@ struct netlink_callback {
	u16			family;
	u16			min_dump_alloc;
	bool			strict_check;
	u16			answer_flags;
	unsigned int		prev_seq, seq;
	long			args[6];
};
+1 −0
Original line number Diff line number Diff line
@@ -174,6 +174,7 @@ struct rt6_rtnl_dump_arg {
	struct sk_buff *skb;
	struct netlink_callback *cb;
	struct net *net;
	struct fib_dump_filter filter;
};

int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
+14 −3
Original line number Diff line number Diff line
@@ -222,6 +222,16 @@ struct fib_table {
	unsigned long		__data[0];
};

struct fib_dump_filter {
	u32			table_id;
	/* filter_set is an optimization that an entry is set */
	bool			filter_set;
	unsigned char		protocol;
	unsigned char		rt_type;
	unsigned int		flags;
	struct net_device	*dev;
};

int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
		     struct fib_result *res, int fib_flags);
int fib_table_insert(struct net *, struct fib_table *, struct fib_config *,
@@ -229,7 +239,7 @@ int fib_table_insert(struct net *, struct fib_table *, struct fib_config *,
int fib_table_delete(struct net *, struct fib_table *, struct fib_config *,
		     struct netlink_ext_ack *extack);
int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
		   struct netlink_callback *cb);
		   struct netlink_callback *cb, struct fib_dump_filter *filter);
int fib_table_flush(struct net *net, struct fib_table *table);
struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
void fib_table_flush_external(struct fib_table *table);
@@ -453,6 +463,7 @@ static inline void fib_proc_exit(struct net *net)

u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);

int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
			  struct netlink_ext_ack *extack);
int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
			  struct fib_dump_filter *filter,
			  struct netlink_callback *cb);
#endif  /* _NET_FIB_H */
+65 −11
Original line number Diff line number Diff line
@@ -802,10 +802,16 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
	return err;
}

int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
			  struct netlink_ext_ack *extack)
int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
			  struct fib_dump_filter *filter,
			  struct netlink_callback *cb)
{
	struct netlink_ext_ack *extack = cb->extack;
	struct nlattr *tb[RTA_MAX + 1];
	struct rtmsg *rtm;
	int err, i;

	ASSERT_RTNL();

	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
		NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
@@ -814,8 +820,7 @@ int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,

	rtm = nlmsg_data(nlh);
	if (rtm->rtm_dst_len || rtm->rtm_src_len  || rtm->rtm_tos   ||
	    rtm->rtm_table   || rtm->rtm_protocol || rtm->rtm_scope ||
	    rtm->rtm_type) {
	    rtm->rtm_scope) {
		NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
		return -EINVAL;
	}
@@ -824,10 +829,43 @@ int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
		return -EINVAL;
	}

	if (nlmsg_attrlen(nlh, sizeof(*rtm))) {
		NL_SET_ERR_MSG(extack, "Invalid data after header in FIB dump request");
	filter->flags    = rtm->rtm_flags;
	filter->protocol = rtm->rtm_protocol;
	filter->rt_type  = rtm->rtm_type;
	filter->table_id = rtm->rtm_table;

	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
				 rtm_ipv4_policy, extack);
	if (err < 0)
		return err;

	for (i = 0; i <= RTA_MAX; ++i) {
		int ifindex;

		if (!tb[i])
			continue;

		switch (i) {
		case RTA_TABLE:
			filter->table_id = nla_get_u32(tb[i]);
			break;
		case RTA_OIF:
			ifindex = nla_get_u32(tb[i]);
			filter->dev = __dev_get_by_index(net, ifindex);
			if (!filter->dev)
				return -ENODEV;
			break;
		default:
			NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
			return -EINVAL;
		}
	}

	if (filter->flags || filter->protocol || filter->rt_type ||
	    filter->table_id || filter->dev) {
		filter->filter_set = 1;
		cb->answer_flags = NLM_F_DUMP_FILTERED;
	}

	return 0;
}
@@ -837,6 +875,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
	const struct nlmsghdr *nlh = cb->nlh;
	struct net *net = sock_net(skb->sk);
	struct fib_dump_filter filter = {};
	unsigned int h, s_h;
	unsigned int e = 0, s_e;
	struct fib_table *tb;
@@ -844,15 +883,30 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
	int dumped = 0, err;

	if (cb->strict_check) {
		err = ip_valid_fib_dump_req(nlh, cb->extack);
		err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
		if (err < 0)
			return err;
	} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
		struct rtmsg *rtm = nlmsg_data(nlh);

		filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
	}

	if (nlmsg_len(nlh) >= sizeof(struct rtmsg) &&
	    ((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED)
	/* fib entries are never clones and ipv4 does not use prefix flag */
	if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED))
		return skb->len;

	if (filter.table_id) {
		tb = fib_get_table(net, filter.table_id);
		if (!tb) {
			NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
			return -ENOENT;
		}

		err = fib_table_dump(tb, skb, cb, &filter);
		return skb->len ? : err;
	}

	s_h = cb->args[0];
	s_e = cb->args[1];

@@ -867,7 +921,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
			if (dumped)
				memset(&cb->args[2], 0, sizeof(cb->args) -
						 2 * sizeof(cb->args[0]));
			err = fib_table_dump(tb, skb, cb);
			err = fib_table_dump(tb, skb, cb, &filter);
			if (err < 0) {
				if (likely(skb->len))
					goto out;
Loading