Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8ca712c3 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-speedup-netns-create-delete-time'



Eric Dumazet says:

====================
net: speedup netns create/delete time

When rate of netns creation/deletion is high enough,
we observe softlockups in cleanup_net() caused by huge list
of netns and way too many rcu_barrier() calls.

This patch series does some optimizations in kobject,
and add batching to tunnels so that netns dismantles are
less costly.

IPv6 addrlabels also get a per netns list, and tcp_metrics
also benefit from batch flushing.

This gives me one order of magnitude gain.
(~50 ms -> ~5 ms for one netns create/delete pair)

Tested:

for i in `seq 1 40`
do
 (for j in `seq 1 100` ; do  unshare -n /bin/true >/dev/null ; done) &
done
wait ; grep net_namespace /proc/slabinfo

Before patch series :

$ time ./add_del_unshare.sh
net_namespace        116    258   5504    1    2 : tunables    8    4    0 : slabdata    116    258      0

real	3m24.910s
user	0m0.747s
sys	0m43.162s

After :
$ time ./add_del_unshare.sh
net_namespace        135    291   5504    1    2 : tunables    8    4    0 : slabdata    135    291      0

real	0m22.117s
user	0m0.728s
sys	0m35.328s
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 752fbcc3 64bc1781
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -258,7 +258,8 @@ int ip_tunnel_get_iflink(const struct net_device *dev);
int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
		       struct rtnl_link_ops *ops, char *devname);

void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
			   struct rtnl_link_ops *ops);

void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		    const struct iphdr *tnl_params, const u8 protocol);
+5 −0
Original line number Diff line number Diff line
@@ -89,6 +89,11 @@ struct netns_ipv6 {
	atomic_t		fib6_sernum;
	struct seg6_pernet_data *seg6_data;
	struct fib_notifier_ops	*notifier_ops;
	struct {
		struct hlist_head head;
		spinlock_t	lock;
		u32		seq;
	} ip6addrlbl_table;
};

#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+51 −43
Original line number Diff line number Diff line
@@ -294,6 +294,55 @@ static void cleanup_uevent_env(struct subprocess_info *info)
}
#endif

static int kobject_uevent_net_broadcast(struct kobject *kobj,
					struct kobj_uevent_env *env,
					const char *action_string,
					const char *devpath)
{
	int retval = 0;
#if defined(CONFIG_NET)
	struct sk_buff *skb = NULL;
	struct uevent_sock *ue_sk;

	/* send netlink message */
	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
		struct sock *uevent_sock = ue_sk->sk;

		if (!netlink_has_listeners(uevent_sock, 1))
			continue;

		if (!skb) {
			/* allocate message with the maximum possible size */
			size_t len = strlen(action_string) + strlen(devpath) + 2;
			char *scratch;

			retval = -ENOMEM;
			skb = alloc_skb(len + env->buflen, GFP_KERNEL);
			if (!skb)
				continue;

			/* add header */
			scratch = skb_put(skb, len);
			sprintf(scratch, "%s@%s", action_string, devpath);

			skb_put_data(skb, env->buf, env->buflen);

			NETLINK_CB(skb).dst_group = 1;
		}

		retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb),
						    0, 1, GFP_KERNEL,
						    kobj_bcast_filter,
						    kobj);
		/* ENOBUFS should be handled in userspace */
		if (retval == -ENOBUFS || retval == -ESRCH)
			retval = 0;
	}
	consume_skb(skb);
#endif
	return retval;
}

/**
 * kobject_uevent_env - send an uevent with environmental data
 *
@@ -316,9 +365,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
	const struct kset_uevent_ops *uevent_ops;
	int i = 0;
	int retval = 0;
#ifdef CONFIG_NET
	struct uevent_sock *ue_sk;
#endif

	pr_debug("kobject: '%s' (%p): %s\n",
		 kobject_name(kobj), kobj, __func__);
@@ -427,46 +473,8 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
		mutex_unlock(&uevent_sock_mutex);
		goto exit;
	}

#if defined(CONFIG_NET)
	/* send netlink message */
	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
		struct sock *uevent_sock = ue_sk->sk;
		struct sk_buff *skb;
		size_t len;

		if (!netlink_has_listeners(uevent_sock, 1))
			continue;

		/* allocate message with the maximum possible size */
		len = strlen(action_string) + strlen(devpath) + 2;
		skb = alloc_skb(len + env->buflen, GFP_KERNEL);
		if (skb) {
			char *scratch;

			/* add header */
			scratch = skb_put(skb, len);
			sprintf(scratch, "%s@%s", action_string, devpath);

			/* copy keys to our continuous event payload buffer */
			for (i = 0; i < env->envp_idx; i++) {
				len = strlen(env->envp[i]) + 1;
				scratch = skb_put(skb, len);
				strcpy(scratch, env->envp[i]);
			}

			NETLINK_CB(skb).dst_group = 1;
			retval = netlink_broadcast_filtered(uevent_sock, skb,
							    0, 1, GFP_KERNEL,
							    kobj_bcast_filter,
							    kobj);
			/* ENOBUFS should be handled in userspace */
			if (retval == -ENOBUFS || retval == -ESRCH)
				retval = 0;
		} else
			retval = -ENOMEM;
	}
#endif
	retval = kobject_uevent_net_broadcast(kobj, env, action_string,
					      devpath);
	mutex_unlock(&uevent_sock_mutex);

#ifdef CONFIG_UEVENT_HELPER
+9 −13
Original line number Diff line number Diff line
@@ -1013,15 +1013,14 @@ static int __net_init ipgre_init_net(struct net *net)
	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}

static void __net_exit ipgre_exit_net(struct net *net)
static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
{
	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
	ip_tunnel_delete_net(itn, &ipgre_link_ops);
	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
}

static struct pernet_operations ipgre_net_ops = {
	.init = ipgre_init_net,
	.exit = ipgre_exit_net,
	.exit_batch = ipgre_exit_batch_net,
	.id   = &ipgre_net_id,
	.size = sizeof(struct ip_tunnel_net),
};
@@ -1540,15 +1539,14 @@ static int __net_init ipgre_tap_init_net(struct net *net)
	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}

static void __net_exit ipgre_tap_exit_net(struct net *net)
static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
{
	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
}

static struct pernet_operations ipgre_tap_net_ops = {
	.init = ipgre_tap_init_net,
	.exit = ipgre_tap_exit_net,
	.exit_batch = ipgre_tap_exit_batch_net,
	.id   = &gre_tap_net_id,
	.size = sizeof(struct ip_tunnel_net),
};
@@ -1559,16 +1557,14 @@ static int __net_init erspan_init_net(struct net *net)
				  &erspan_link_ops, "erspan0");
}

static void __net_exit erspan_exit_net(struct net *net)
static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
{
	struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);

	ip_tunnel_delete_net(itn, &erspan_link_ops);
	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
}

static struct pernet_operations erspan_net_ops = {
	.init = erspan_init_net,
	.exit = erspan_exit_net,
	.exit_batch = erspan_exit_batch_net,
	.id   = &erspan_net_id,
	.size = sizeof(struct ip_tunnel_net),
};
+9 −3
Original line number Diff line number Diff line
@@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
	}
}

void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
			   struct rtnl_link_ops *ops)
{
	struct ip_tunnel_net *itn;
	struct net *net;
	LIST_HEAD(list);

	rtnl_lock();
	list_for_each_entry(net, net_list, exit_list) {
		itn = net_generic(net, id);
		ip_tunnel_destroy(itn, &list, ops);
	}
	unregister_netdevice_many(&list);
	rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);

int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
		      struct ip_tunnel_parm *p, __u32 fwmark)
Loading