Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 06021292 authored by Eliezer Tamir's avatar Eliezer Tamir Committed by David S. Miller
Browse files

net: add low latency socket poll



Adds an ndo_ll_poll method and the code that supports it.
This method can be used by low latency applications to busy-poll
Ethernet device queues directly from the socket code.
sysctl_net_ll_poll controls how many microseconds to poll.
Default is zero (disabled).
Individual protocol support will be added by subsequent patches.

Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: default avatarJesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: default avatarEliezer Tamir <eliezer.tamir@linux.intel.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Tested-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent af12fa6e
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -50,6 +50,13 @@ The maximum number of packets that kernel can handle on a NAPI interrupt,
it's a Per-CPU variable.
Default: 64

low_latency_poll
----------------
Low latency busy poll timeout. (needs CONFIG_NET_LL_RX_POLL)
Approximate time in us to spin waiting for packets on the device queue.
Recommended value is 50. May increase power usage.
Default: 0 (off)

rmem_default
------------

+3 −0
Original line number Diff line number Diff line
@@ -971,6 +971,9 @@ struct net_device_ops {
						     struct netpoll_info *info,
						     gfp_t gfp);
	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
#endif
#ifdef CONFIG_NET_LL_RX_POLL
	int			(*ndo_ll_poll)(struct napi_struct *dev);
#endif
	int			(*ndo_set_vf_mac)(struct net_device *dev,
						  int queue, u8 *mac);
+6 −2
Original line number Diff line number Diff line
@@ -386,6 +386,7 @@ typedef unsigned char *sk_buff_data_t;
 *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
 *	@dma_cookie: a cookie to one of several possible DMA operations
 *		done by skb DMA functions
  *	@napi_id: id of the NAPI struct this skb came from
 *	@secmark: security marking
 *	@mark: Generic packet mark
 *	@dropcount: total number of sk_receive_queue overflows
@@ -500,8 +501,11 @@ struct sk_buff {
	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
	kmemcheck_bitfield_end(flags2);

#ifdef CONFIG_NET_DMA
#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL
	union {
		unsigned int	napi_id;
		dma_cookie_t	dma_cookie;
	};
#endif
#ifdef CONFIG_NETWORK_SECMARK
	__u32			secmark;

include/net/ll_poll.h

0 → 100644
+148 −0
Original line number Diff line number Diff line
/*
 * Low Latency Sockets
 * Copyright(c) 2013 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Author: Eliezer Tamir
 *
 * Contact Information:
 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
 */

/*
 * For now this depends on CONFIG_X86_TSC
 */

#ifndef _LINUX_NET_LL_POLL_H
#define _LINUX_NET_LL_POLL_H

#include <linux/netdevice.h>
#include <net/ip.h>

#ifdef CONFIG_NET_LL_RX_POLL

struct napi_struct;
extern unsigned long sysctl_net_ll_poll __read_mostly;

/* return values from ndo_ll_poll */
#define LL_FLUSH_FAILED		-1
#define LL_FLUSH_BUSY		-2

/* we don't mind a ~2.5% imprecision */
#define TSC_MHZ (tsc_khz >> 10)

static inline cycles_t ll_end_time(void)
{
	return TSC_MHZ * ACCESS_ONCE(sysctl_net_ll_poll) + get_cycles();
}

static inline bool sk_valid_ll(struct sock *sk)
{
	return sysctl_net_ll_poll && sk->sk_napi_id &&
	       !need_resched() && !signal_pending(current);
}

static inline bool can_poll_ll(cycles_t end_time)
{
	return !time_after((unsigned long)get_cycles(),
			    (unsigned long)end_time);
}

static inline bool sk_poll_ll(struct sock *sk, int nonblock)
{
	cycles_t end_time = ll_end_time();
	const struct net_device_ops *ops;
	struct napi_struct *napi;
	int rc = false;

	/*
	 * rcu read lock for napi hash
	 * bh so we don't race with net_rx_action
	 */
	rcu_read_lock_bh();

	napi = napi_by_id(sk->sk_napi_id);
	if (!napi)
		goto out;

	ops = napi->dev->netdev_ops;
	if (!ops->ndo_ll_poll)
		goto out;

	do {

		rc = ops->ndo_ll_poll(napi);

		if (rc == LL_FLUSH_FAILED)
			break; /* permanent failure */

		if (rc > 0)
			/* local bh are disabled so it is ok to use _BH */
			NET_ADD_STATS_BH(sock_net(sk),
					 LINUX_MIB_LOWLATENCYRXPACKETS, rc);

	} while (skb_queue_empty(&sk->sk_receive_queue)
			&& can_poll_ll(end_time) && !nonblock);

	rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
	rcu_read_unlock_bh();
	return rc;
}

/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi)
{
	skb->napi_id = napi->napi_id;
}

/* used in the protocol hanlder to propagate the napi_id to the socket */
static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
{
	sk->sk_napi_id = skb->napi_id;
}

#else /* CONFIG_NET_LL_RX_POLL */

static inline cycles_t ll_end_time(void)
{
	return 0;
}

static inline bool sk_valid_ll(struct sock *sk)
{
	return false;
}

static inline bool sk_poll_ll(struct sock *sk, int nonblock)
{
	return false;
}

static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi)
{
}

static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
{
}

static inline bool can_poll_ll(cycles_t end_time)
{
	return false;
}

#endif /* CONFIG_NET_LL_RX_POLL */
#endif /* _LINUX_NET_LL_POLL_H */
+4 −0
Original line number Diff line number Diff line
@@ -229,6 +229,7 @@ struct cg_proto;
  *	@sk_omem_alloc: "o" is "option" or "other"
  *	@sk_wmem_queued: persistent queue size
  *	@sk_forward_alloc: space allocated forward
  *	@sk_napi_id: id of the last napi context to receive data for sk
  *	@sk_allocation: allocation mode
  *	@sk_sndbuf: size of send buffer in bytes
  *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
@@ -324,6 +325,9 @@ struct sock {
	int			sk_forward_alloc;
#ifdef CONFIG_RPS
	__u32			sk_rxhash;
#endif
#ifdef CONFIG_NET_LL_RX_POLL
	unsigned int		sk_napi_id;
#endif
	atomic_t		sk_drops;
	int			sk_rcvbuf;
Loading