Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 30723673 authored by David S. Miller's avatar David S. Miller
Browse files

pkt_sched: Manage qdisc list inside of root qdisc.



Idea is from Patrick McHardy.

Instead of managing the list of qdiscs on the device level, manage it
in the root qdisc of a netdev_queue.  This solves all kinds of
visibility issues during qdisc destruction.

The way to iterate over all qdiscs of a netdev_queue is to visit
the netdev_queue->qdisc, and then traverse it's list.

The only special case is to ignore builting qdiscs at the root when
dumping or doing a qdisc_lookup().  That was not needed previously
because builtin qdiscs were not added to the device's qdisc_list.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 72b25a91
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -636,8 +636,6 @@ struct net_device
	unsigned int		real_num_tx_queues;

	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
	spinlock_t		qdisc_list_lock;
	struct list_head	qdisc_list;

/*
 * One part is mostly used on xmit path (device)
+0 −2
Original line number Diff line number Diff line
@@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev)
	net = dev_net(dev);

	spin_lock_init(&dev->addr_list_lock);
	spin_lock_init(&dev->qdisc_list_lock);
	INIT_LIST_HEAD(&dev->qdisc_list);
	netdev_init_queue_locks(dev);

	dev->iflink = -1;
+131 −44
Original line number Diff line number Diff line
@@ -185,12 +185,21 @@ EXPORT_SYMBOL(unregister_qdisc);

struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
	struct Qdisc *q;
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		struct Qdisc *q, *txq_root = txq->qdisc;

	list_for_each_entry(q, &dev->qdisc_list, list) {
		if (!(txq_root->flags & TCQ_F_BUILTIN) &&
		    txq_root->handle == handle)
			return txq_root;

		list_for_each_entry(q, &txq_root->list, list) {
			if (q->handle == handle)
				return q;
		}
	}
	return NULL;
}

@@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
				goto err_out3;
			}
		}
		spin_lock_bh(&dev->qdisc_list_lock);
		list_add_tail(&sch->list, &dev->qdisc_list);
		spin_unlock_bh(&dev->qdisc_list_lock);
		if (parent)
			list_add_tail(&sch->list, &dev_queue->qdisc->list);

		return sch;
	}
@@ -1037,13 +1045,57 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
	return -EINVAL;
}

static bool tc_qdisc_dump_ignore(struct Qdisc *q)
{
	return (q->flags & TCQ_F_BUILTIN) ? true : false;
}

static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
			      struct netlink_callback *cb,
			      int *q_idx_p, int s_q_idx)
{
	int ret = 0, q_idx = *q_idx_p;
	struct Qdisc *q;

	if (!root)
		return 0;

	q = root;
	if (q_idx < s_q_idx) {
		q_idx++;
	} else {
		if (!tc_qdisc_dump_ignore(q) &&
		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
			goto done;
		q_idx++;
	}
	list_for_each_entry(q, &root->list, list) {
		if (q_idx < s_q_idx) {
			q_idx++;
			continue;
		}
		if (!tc_qdisc_dump_ignore(q) && 
		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
			goto done;
		q_idx++;
	}

out:
	*q_idx_p = q_idx;
	return ret;
done:
	ret = -1;
	goto out;
}

static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	int idx, q_idx;
	int s_idx, s_q_idx;
	struct net_device *dev;
	struct Qdisc *q;

	if (net != &init_net)
		return 0;
@@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
	read_lock(&dev_base_lock);
	idx = 0;
	for_each_netdev(&init_net, dev) {
		struct netdev_queue *dev_queue;

		if (idx < s_idx)
			goto cont;
		if (idx > s_idx)
			s_q_idx = 0;
		q_idx = 0;
		list_for_each_entry(q, &dev->qdisc_list, list) {
			if (q_idx < s_q_idx) {
				q_idx++;
				continue;
			}
			if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)

		dev_queue = netdev_get_tx_queue(dev, 0);
		if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
			goto done;
			q_idx++;
		}

		dev_queue = &dev->rx_queue;
		if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
			goto done;

cont:
		idx++;
	}
@@ -1285,35 +1338,20 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
}

static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
				struct tcmsg *tcm, struct netlink_callback *cb,
				int *t_p, int s_t)
{
	struct net *net = sock_net(skb->sk);
	int t;
	int s_t;
	struct net_device *dev;
	struct Qdisc *q;
	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
	struct qdisc_dump_args arg;

	if (net != &init_net)
		return 0;

	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
		return 0;
	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
		return 0;

	s_t = cb->args[0];
	t = 0;

	list_for_each_entry(q, &dev->qdisc_list, list) {
		if (t < s_t || !q->ops->cl_ops ||
	if (tc_qdisc_dump_ignore(q) ||
	    *t_p < s_t || !q->ops->cl_ops ||
	    (tcm->tcm_parent &&
	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
			t++;
			continue;
		(*t_p)++;
		return 0;
	}
		if (t > s_t)
	if (*t_p > s_t)
		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
	arg.w.fn = qdisc_class_dump;
	arg.skb = skb;
@@ -1324,10 +1362,59 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
	q->ops->cl_ops->walk(q, &arg.w);
	cb->args[1] = arg.w.count;
	if (arg.w.stop)
			break;
		t++;
		return -1;
	(*t_p)++;
	return 0;
}

static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
			       struct tcmsg *tcm, struct netlink_callback *cb,
			       int *t_p, int s_t)
{
	struct Qdisc *q;

	if (!root)
		return 0;

	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
		return -1;

	list_for_each_entry(q, &root->list, list) {
		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
			return -1;
	}

	return 0;
}

static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
	struct net *net = sock_net(skb->sk);
	struct netdev_queue *dev_queue;
	struct net_device *dev;
	int t, s_t;

	if (net != &init_net)
		return 0;

	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
		return 0;
	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
		return 0;

	s_t = cb->args[0];
	t = 0;

	dev_queue = netdev_get_tx_queue(dev, 0);
	if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
		goto done;

	dev_queue = &dev->rx_queue;
	if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
		goto done;

done:
	cb->args[0] = t;

	dev_put(dev);
+2 −8
Original line number Diff line number Diff line
@@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head)

void qdisc_destroy(struct Qdisc *qdisc)
{
	struct net_device *dev = qdisc_dev(qdisc);

	if (qdisc->flags & TCQ_F_BUILTIN ||
	    !atomic_dec_and_test(&qdisc->refcnt))
		return;

	spin_lock_bh(&dev->qdisc_list_lock);
	if (qdisc->parent)
		list_del(&qdisc->list);
	spin_unlock_bh(&dev->qdisc_list_lock);

	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
@@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev,
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
		spin_lock_bh(&dev->qdisc_list_lock);
		list_add_tail(&qdisc->list, &dev->qdisc_list);
		spin_unlock_bh(&dev->qdisc_list_lock);
	} else {
		qdisc =  &noqueue_qdisc;
	}