Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a88eb6be authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tipc-link-starvation'



Jon Maloy says:

====================
tipc: improve interaction socket-link

We fix a very real starvation problem that may occur when a link
encounters send buffer congestion. At the same time we make the
interaction between the socket and link layer simpler and more
consistent.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents aa276dd7 365ad353
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -174,7 +174,7 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq)
 *                    and to identified node local sockets
 * @net: the applicable net namespace
 * @list: chain of buffers containing message
 * Consumes the buffer chain, except when returning -ELINKCONG
 * Consumes the buffer chain.
 * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
 */
int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
@@ -197,7 +197,7 @@ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
	tipc_bcast_unlock(net);

	/* Don't send to local node if adding to link failed */
	if (unlikely(rc)) {
	if (unlikely(rc && (rc != -ELINKCONG))) {
		__skb_queue_purge(&rcvq);
		return rc;
	}
@@ -206,7 +206,7 @@ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
	tipc_bcbase_xmit(net, &xmitq);
	tipc_sk_mcast_rcv(net, &rcvq, &inputq);
	__skb_queue_purge(list);
	return 0;
	return rc;
}

/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link
+32 −43
Original line number Diff line number Diff line
@@ -776,60 +776,47 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)

/**
 * link_schedule_user - schedule a message sender for wakeup after congestion
 * @link: congested link
 * @list: message that was attempted sent
 * @l: congested link
 * @hdr: header of message that is being sent
 * Create pseudo msg to send back to user when congestion abates
 * Does not consume buffer list
 */
static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
{
	struct tipc_msg *msg = buf_msg(skb_peek(list));
	int imp = msg_importance(msg);
	u32 oport = msg_origport(msg);
	u32 addr = tipc_own_addr(link->net);
	u32 dnode = tipc_own_addr(l->net);
	u32 dport = msg_origport(hdr);
	struct sk_buff *skb;

	/* This really cannot happen...  */
	if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) {
		pr_warn("%s<%s>, send queue full", link_rst_msg, link->name);
		return -ENOBUFS;
	}
	/* Non-blocking sender: */
	if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending)
		return -ELINKCONG;

	/* Create and schedule wakeup pseudo message */
	skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
			      addr, addr, oport, 0, 0);
			      dnode, l->addr, dport, 0, 0);
	if (!skb)
		return -ENOBUFS;
	TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list);
	TIPC_SKB_CB(skb)->chain_imp = imp;
	skb_queue_tail(&link->wakeupq, skb);
	link->stats.link_congs++;
	msg_set_dest_droppable(buf_msg(skb), true);
	TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
	skb_queue_tail(&l->wakeupq, skb);
	l->stats.link_congs++;
	return -ELINKCONG;
}

/**
 * link_prepare_wakeup - prepare users for wakeup after congestion
 * @link: congested link
 * Move a number of waiting users, as permitted by available space in
 * the send queue, from link wait queue to node wait queue for wakeup
 * @l: congested link
 * Wake up a number of waiting users, as permitted by available space
 * in the send queue
 */
void link_prepare_wakeup(struct tipc_link *l)
{
	int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,};
	int imp, lim;
	struct sk_buff *skb, *tmp;
	int imp, i = 0;

	skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
		imp = TIPC_SKB_CB(skb)->chain_imp;
		lim = l->backlog[imp].limit;
		pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
		if ((pnd[imp] + l->backlog[imp].len) >= lim)
			break;
		if (l->backlog[imp].len < l->backlog[imp].limit) {
			skb_unlink(skb, &l->wakeupq);
			skb_queue_tail(l->inputq, skb);
		} else if (i++ > 10) {
			break;
		}
	}
}

@@ -869,8 +856,7 @@ void tipc_link_reset(struct tipc_link *l)
 * @list: chain of buffers containing message
 * @xmitq: returned list of packets to be sent by caller
 *
 * Consumes the buffer chain, except when returning -ELINKCONG,
 * since the caller then may want to make more send attempts.
 * Consumes the buffer chain.
 * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
 * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
 */
@@ -879,7 +865,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
{
	struct tipc_msg *hdr = buf_msg(skb_peek(list));
	unsigned int maxwin = l->window;
	unsigned int i, imp = msg_importance(hdr);
	int imp = msg_importance(hdr);
	unsigned int mtu = l->mtu;
	u16 ack = l->rcv_nxt - 1;
	u16 seqno = l->snd_nxt;
@@ -888,19 +874,22 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
	struct sk_buff_head *backlogq = &l->backlogq;
	struct sk_buff *skb, *_skb, *bskb;
	int pkt_cnt = skb_queue_len(list);
	int rc = 0;

	/* Match msg importance against this and all higher backlog limits: */
	if (!skb_queue_empty(backlogq)) {
		for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
			if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
				return link_schedule_user(l, list);
		}
	}
	if (unlikely(msg_size(hdr) > mtu)) {
		skb_queue_purge(list);
		return -EMSGSIZE;
	}

	/* Allow oversubscription of one data msg per source at congestion */
	if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
		if (imp == TIPC_SYSTEM_IMPORTANCE) {
			pr_warn("%s<%s>, link overflow", link_rst_msg, l->name);
			return -ENOBUFS;
		}
		rc = link_schedule_user(l, hdr);
	}

	if (pkt_cnt > 1) {
		l->stats.sent_fragmented++;
		l->stats.sent_fragments += pkt_cnt;
@@ -946,7 +935,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
		skb_queue_splice_tail_init(list, backlogq);
	}
	l->snd_nxt = seqno;
	return 0;
	return rc;
}

void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
+0 −2
Original line number Diff line number Diff line
@@ -98,8 +98,6 @@ struct tipc_skb_cb {
	u32 bytes_read;
	struct sk_buff *tail;
	bool validated;
	bool wakeup_pending;
	u16 chain_sz;
	u16 chain_imp;
	u16 ackers;
};
+72 −28
Original line number Diff line number Diff line
@@ -608,7 +608,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 * Returns non-zero if any off-node ports overlap
 */
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
			      u32 limit, struct tipc_plist *dports)
			      u32 limit, struct list_head *dports)
{
	struct name_seq *seq;
	struct sub_seq *sseq;
@@ -633,7 +633,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
		info = sseq->info;
		list_for_each_entry(publ, &info->node_list, node_list) {
			if (publ->scope <= limit)
				tipc_plist_push(dports, publ->ref);
				u32_push(dports, publ->ref);
		}

		if (info->cluster_list_size != info->node_list_size)
@@ -1022,40 +1022,84 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
	return skb->len;
}

void tipc_plist_push(struct tipc_plist *pl, u32 port)
struct u32_item {
	struct list_head list;
	u32 value;
};

bool u32_find(struct list_head *l, u32 value)
{
	struct tipc_plist *nl;
	struct u32_item *item;

	if (likely(!pl->port)) {
		pl->port = port;
		return;
	list_for_each_entry(item, l, list) {
		if (item->value == value)
			return true;
	}
	if (pl->port == port)
		return;
	list_for_each_entry(nl, &pl->list, list) {
		if (nl->port == port)
			return;
	return false;
}

bool u32_push(struct list_head *l, u32 value)
{
	struct u32_item *item;

	list_for_each_entry(item, l, list) {
		if (item->value == value)
			return false;
	}
	nl = kmalloc(sizeof(*nl), GFP_ATOMIC);
	if (nl) {
		nl->port = port;
		list_add(&nl->list, &pl->list);
	item = kmalloc(sizeof(*item), GFP_ATOMIC);
	if (unlikely(!item))
		return false;

	item->value = value;
	list_add(&item->list, l);
	return true;
}

u32 u32_pop(struct list_head *l)
{
	struct u32_item *item;
	u32 value = 0;

	if (list_empty(l))
		return 0;
	item = list_first_entry(l, typeof(*item), list);
	value = item->value;
	list_del(&item->list);
	kfree(item);
	return value;
}

u32 tipc_plist_pop(struct tipc_plist *pl)
bool u32_del(struct list_head *l, u32 value)
{
	struct tipc_plist *nl;
	u32 port = 0;

	if (likely(list_empty(&pl->list))) {
		port = pl->port;
		pl->port = 0;
		return port;
	}
	nl = list_first_entry(&pl->list, typeof(*nl), list);
	port = nl->port;
	list_del(&nl->list);
	kfree(nl);
	return port;
	struct u32_item *item, *tmp;

	list_for_each_entry_safe(item, tmp, l, list) {
		if (item->value != value)
			continue;
		list_del(&item->list);
		kfree(item);
		return true;
	}
	return false;
}

void u32_list_purge(struct list_head *l)
{
	struct u32_item *item, *tmp;

	list_for_each_entry_safe(item, tmp, l, list) {
		list_del(&item->list);
		kfree(item);
	}
}

int u32_list_len(struct list_head *l)
{
	struct u32_item *item;
	int i = 0;

	list_for_each_entry(item, l, list) {
		i++;
	}
	return i;
}
+7 −14
Original line number Diff line number Diff line
@@ -99,7 +99,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);

u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
			      u32 limit, struct tipc_plist *dports);
			      u32 limit, struct list_head *dports);
struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
					 u32 upper, u32 scope, u32 port_ref,
					 u32 key);
@@ -116,18 +116,11 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);

struct tipc_plist {
	struct list_head list;
	u32 port;
};

static inline void tipc_plist_init(struct tipc_plist *pl)
{
	INIT_LIST_HEAD(&pl->list);
	pl->port = 0;
}

void tipc_plist_push(struct tipc_plist *pl, u32 port);
u32 tipc_plist_pop(struct tipc_plist *pl);
bool u32_push(struct list_head *l, u32 value);
u32 u32_pop(struct list_head *l);
bool u32_find(struct list_head *l, u32 value);
bool u32_del(struct list_head *l, u32 value);
void u32_list_purge(struct list_head *l);
int u32_list_len(struct list_head *l);

#endif
Loading