Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c445477d authored by Ben Hutchings's avatar Ben Hutchings Committed by David S. Miller
Browse files

net: RPS: Enable hardware acceleration of RFS



Allow drivers for multiqueue hardware with flow filter tables to
accelerate RFS.  The driver must:

1. Set net_device::rx_cpu_rmap to a cpu_rmap of the RX completion
IRQs (in queue order).  This will provide a mapping from CPUs to the
queues for which completions are handled nearest to them.

2. Implement net_device_ops::ndo_rx_flow_steer.  This operation adds
or replaces a filter steering the given flow to the given RX queue, if
possible.

3. Periodically remove filters for which rps_may_expire_flow() returns
true.

Signed-off-by: default avatarBen Hutchings <bhutchings@solarflare.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c39649c3
Loading
Loading
Loading
Loading
+30 −3
Original line number Diff line number Diff line
@@ -554,14 +554,16 @@ struct rps_map {
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))

/*
 * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
 * tail pointer for that CPU's input queue at the time of last enqueue.
 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
 * tail pointer for that CPU's input queue at the time of last enqueue, and
 * a hardware filter index.
 */
struct rps_dev_flow {
	u16 cpu;
	u16 fill;
	u16 filter;
	unsigned int last_qtail;
};
#define RPS_NO_FILTER 0xffff

/*
 * The rps_dev_flow_table structure contains a table of flow mappings.
@@ -611,6 +613,11 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,

extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;

#ifdef CONFIG_RFS_ACCEL
extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
				u32 flow_id, u16 filter_id);
#endif

/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
	struct rps_map __rcu		*rps_map;
@@ -769,6 +776,13 @@ struct netdev_tc_txq {
 * 	is always called from the stack with the rtnl lock held and netif tx
 * 	queues stopped. This allows the netdevice to perform queue management
 * 	safely.
 *
 *	RFS acceleration.
 * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
 *			    u16 rxq_index, u32 flow_id);
 *	Set hardware filter for RFS.  rxq_index is the target queue index;
 *	flow_id is a flow ID to be passed to rps_may_expire_flow() later.
 *	Return the filter ID on success, or a negative error code.
 */
#define HAVE_NET_DEVICE_OPS
struct net_device_ops {
@@ -842,6 +856,12 @@ struct net_device_ops {
	int			(*ndo_fcoe_get_wwn)(struct net_device *dev,
						    u64 *wwn, int type);
#endif
#ifdef CONFIG_RFS_ACCEL
	int			(*ndo_rx_flow_steer)(struct net_device *dev,
						     const struct sk_buff *skb,
						     u16 rxq_index,
						     u32 flow_id);
#endif
};

/*
@@ -1056,6 +1076,13 @@ struct net_device {

	/* Number of RX queues currently active in device */
	unsigned int		real_num_rx_queues;

#ifdef CONFIG_RFS_ACCEL
	/* CPU reverse-mapping for RX completion interrupts, indexed
	 * by RX queue number.  Assigned by driver.  This must only be
	 * set if the ndo_rx_flow_steer operation is defined. */
	struct cpu_rmap		*rx_cpu_rmap;
#endif
#endif

	rx_handler_func_t __rcu	*rx_handler;
+6 −0
Original line number Diff line number Diff line
@@ -221,6 +221,12 @@ config RPS
	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
	default y

config RFS_ACCEL
	boolean
	depends on RPS && GENERIC_HARDIRQS
	select CPU_RMAP
	default y

config XPS
	boolean
	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+91 −6
Original line number Diff line number Diff line
@@ -132,6 +132,7 @@
#include <trace/events/skb.h>
#include <linux/pci.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>

#include "net-sysfs.h"

@@ -2588,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash);
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);

static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
	    struct rps_dev_flow *rflow, u16 next_cpu)
{
	u16 tcpu;

	tcpu = rflow->cpu = next_cpu;
	if (tcpu != RPS_NO_CPU) {
#ifdef CONFIG_RFS_ACCEL
		struct netdev_rx_queue *rxqueue;
		struct rps_dev_flow_table *flow_table;
		struct rps_dev_flow *old_rflow;
		u32 flow_id;
		u16 rxq_index;
		int rc;

		/* Should we steer this flow to a different hardware queue? */
		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap)
			goto out;
		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
		if (rxq_index == skb_get_rx_queue(skb))
			goto out;

		rxqueue = dev->_rx + rxq_index;
		flow_table = rcu_dereference(rxqueue->rps_flow_table);
		if (!flow_table)
			goto out;
		flow_id = skb->rxhash & flow_table->mask;
		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
							rxq_index, flow_id);
		if (rc < 0)
			goto out;
		old_rflow = rflow;
		rflow = &flow_table->flows[flow_id];
		rflow->cpu = next_cpu;
		rflow->filter = rc;
		if (old_rflow->filter == rflow->filter)
			old_rflow->filter = RPS_NO_FILTER;
	out:
#endif
		rflow->last_qtail =
			per_cpu(softnet_data, tcpu).input_queue_head;
	}

	return rflow;
}

/*
 * get_rps_cpu is called from netif_receive_skb and returns the target
 * CPU from the RPS map of the receiving queue for a given skb.
@@ -2658,12 +2706,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
		if (unlikely(tcpu != next_cpu) &&
		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
		      rflow->last_qtail)) >= 0)) {
			tcpu = rflow->cpu = next_cpu;
			if (tcpu != RPS_NO_CPU)
				rflow->last_qtail = per_cpu(softnet_data,
				    tcpu).input_queue_head;
		}
		      rflow->last_qtail)) >= 0))
			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);

		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
			*rflowp = rflow;
			cpu = tcpu;
@@ -2684,6 +2729,46 @@ done:
	return cpu;
}

#ifdef CONFIG_RFS_ACCEL

/**
 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
 * @dev: Device on which the filter was set
 * @rxq_index: RX queue index
 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
 *
 * Drivers that implement ndo_rx_flow_steer() should periodically call
 * this function for each installed filter and remove the filters for
 * which it returns %true.
 */
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
			 u32 flow_id, u16 filter_id)
{
	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
	struct rps_dev_flow_table *flow_table;
	struct rps_dev_flow *rflow;
	bool expire = true;
	int cpu;

	rcu_read_lock();
	flow_table = rcu_dereference(rxqueue->rps_flow_table);
	if (flow_table && flow_id <= flow_table->mask) {
		rflow = &flow_table->flows[flow_id];
		cpu = ACCESS_ONCE(rflow->cpu);
		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
			   rflow->last_qtail) <
		     (int)(10 * flow_table->mask)))
			expire = false;
	}
	rcu_read_unlock();
	return expire;
}
EXPORT_SYMBOL(rps_may_expire_flow);

#endif /* CONFIG_RFS_ACCEL */

/* Called from hardirq (IPI) context */
static void rps_trigger_softirq(void *data)
{