Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a5fedd43 authored by Florian Westphal's avatar Florian Westphal Committed by Pablo Neira Ayuso
Browse files

netfilter: move skb_gso_segment into nfnetlink_queue module



skb_gso_segment is expensive, so it would be nice if we could
avoid it in the future. However, userspace needs to be prepared
to receive larger-than-mtu-packets (which will also have incorrect
l3/l4 checksums), so we cannot simply remove it.

The plan is to add a per-queue feature flag that userspace can
set when binding the queue.

The problem is that in nf_queue, we only have a queue number,
not the queue context/configuration settings.

This patch should have no impact other than the skb_gso_segment
call now being in a function that has access to the queue config
data.

A new size attribute in nf_queue_entry is needed so
nfnetlink_queue can duplicate the entry of the gso skb
when segmenting the skb while also copying the route key.

The follow up patch adds switch to disable skb_gso_segment when
queue config says so.

Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 4bd60443
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -9,10 +9,13 @@ struct nf_queue_entry {

	struct nf_hook_ops	*elem;
	u_int8_t		pf;
	u16			size; /* sizeof(entry) + saved route keys */
	unsigned int		hook;
	struct net_device	*indev;
	struct net_device	*outdev;
	int			(*okfn)(struct sk_buff *);

	/* extra space to store route keys */
};

#define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry))
@@ -27,4 +30,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);

bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);

#endif /* _NF_QUEUE_H */
+9 −87
Original line number Diff line number Diff line
@@ -45,7 +45,7 @@ void nf_unregister_queue_handler(void)
}
EXPORT_SYMBOL(nf_unregister_queue_handler);

static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
	/* Release those devices we held, or Alexey will kill me. */
	if (entry->indev)
@@ -65,9 +65,10 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
	/* Drop reference to owner of hook which queued us. */
	module_put(entry->elem->owner);
}
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);

/* Bump dev refs so they don't vanish while packet is out */
static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
	if (!try_module_get(entry->elem->owner))
		return false;
@@ -92,12 +93,13 @@ static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)

	return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);

/*
 * Any packet that leaves via this function must come back
 * through nf_reinject().
 */
static int __nf_queue(struct sk_buff *skb,
int nf_queue(struct sk_buff *skb,
		      struct nf_hook_ops *elem,
		      u_int8_t pf, unsigned int hook,
		      struct net_device *indev,
@@ -137,6 +139,7 @@ static int __nf_queue(struct sk_buff *skb,
		.indev	= indev,
		.outdev	= outdev,
		.okfn	= okfn,
		.size	= sizeof(*entry) + afinfo->route_key_size,
	};

	if (!nf_queue_entry_get_refs(entry)) {
@@ -163,87 +166,6 @@ static int __nf_queue(struct sk_buff *skb,
	return status;
}

#ifdef CONFIG_BRIDGE_NETFILTER
/* When called from bridge netfilter, skb->data must point to MAC header
 * before calling skb_gso_segment(). Else, original MAC header is lost
 * and segmented skbs will be sent to wrong destination.
 */
static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
{
	if (skb->nf_bridge)
		__skb_push(skb, skb->network_header - skb->mac_header);
}

static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
{
	if (skb->nf_bridge)
		__skb_pull(skb, skb->network_header - skb->mac_header);
}
#else
#define nf_bridge_adjust_skb_data(s) do {} while (0)
#define nf_bridge_adjust_segmented_data(s) do {} while (0)
#endif

int nf_queue(struct sk_buff *skb,
	     struct nf_hook_ops *elem,
	     u_int8_t pf, unsigned int hook,
	     struct net_device *indev,
	     struct net_device *outdev,
	     int (*okfn)(struct sk_buff *),
	     unsigned int queuenum)
{
	struct sk_buff *segs;
	int err = -EINVAL;
	unsigned int queued;

	if (!skb_is_gso(skb))
		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
				  queuenum);

	switch (pf) {
	case NFPROTO_IPV4:
		skb->protocol = htons(ETH_P_IP);
		break;
	case NFPROTO_IPV6:
		skb->protocol = htons(ETH_P_IPV6);
		break;
	}

	nf_bridge_adjust_skb_data(skb);
	segs = skb_gso_segment(skb, 0);
	/* Does not use PTR_ERR to limit the number of error codes that can be
	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
	 * 'ignore this hook'.
	 */
	if (IS_ERR(segs))
		goto out_err;
	queued = 0;
	err = 0;
	do {
		struct sk_buff *nskb = segs->next;

		segs->next = NULL;
		if (err == 0) {
			nf_bridge_adjust_segmented_data(segs);
			err = __nf_queue(segs, elem, pf, hook, indev,
					   outdev, okfn, queuenum);
		}
		if (err == 0)
			queued++;
		else
			kfree_skb(segs);
		segs = nskb;
	} while (segs);

	if (queued) {
		kfree_skb(skb);
		return 0;
	}
  out_err:
	nf_bridge_adjust_segmented_data(skb);
	return err;
}

void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
	struct sk_buff *skb = entry->skb;
@@ -283,7 +205,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
		local_bh_enable();
		break;
	case NF_QUEUE:
		err = __nf_queue(skb, elem, entry->pf, entry->hook,
		err = nf_queue(skb, elem, entry->pf, entry->hook,
				entry->indev, entry->outdev, entry->okfn,
				verdict >> NF_VERDICT_QBITS);
		if (err < 0) {
+137 −17
Original line number Diff line number Diff line
@@ -477,28 +477,13 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
}

static int
nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
			struct nf_queue_entry *entry)
{
	struct sk_buff *nskb;
	struct nfqnl_instance *queue;
	int err = -ENOBUFS;
	__be32 *packet_id_ptr;
	int failopen = 0;
	struct net *net = dev_net(entry->indev ?
				  entry->indev : entry->outdev);
	struct nfnl_queue_net *q = nfnl_queue_pernet(net);

	/* rcu_read_lock()ed by nf_hook_slow() */
	queue = instance_lookup(q, queuenum);
	if (!queue) {
		err = -ESRCH;
		goto err_out;
	}

	if (queue->copy_mode == NFQNL_COPY_NONE) {
		err = -EINVAL;
		goto err_out;
	}

	nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
	if (nskb == NULL) {
@@ -547,6 +532,141 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
	return err;
}

static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
	struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
	if (entry) {
		if (nf_queue_entry_get_refs(entry))
			return entry;
		kfree(entry);
	}
	return NULL;
}

#ifdef CONFIG_BRIDGE_NETFILTER
/* When called from bridge netfilter, skb->data must point to MAC header
 * before calling skb_gso_segment(). Else, original MAC header is lost
 * and segmented skbs will be sent to wrong destination.
 */
static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
{
	if (skb->nf_bridge)
		__skb_push(skb, skb->network_header - skb->mac_header);
}

static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
{
	if (skb->nf_bridge)
		__skb_pull(skb, skb->network_header - skb->mac_header);
}
#else
#define nf_bridge_adjust_skb_data(s) do {} while (0)
#define nf_bridge_adjust_segmented_data(s) do {} while (0)
#endif

static void free_entry(struct nf_queue_entry *entry)
{
	nf_queue_entry_release_refs(entry);
	kfree(entry);
}

static int
__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
			   struct sk_buff *skb, struct nf_queue_entry *entry)
{
	int ret = -ENOMEM;
	struct nf_queue_entry *entry_seg;

	nf_bridge_adjust_segmented_data(skb);

	if (skb->next == NULL) { /* last packet, no need to copy entry */
		struct sk_buff *gso_skb = entry->skb;
		entry->skb = skb;
		ret = __nfqnl_enqueue_packet(net, queue, entry);
		if (ret)
			entry->skb = gso_skb;
		return ret;
	}

	skb->next = NULL;

	entry_seg = nf_queue_entry_dup(entry);
	if (entry_seg) {
		entry_seg->skb = skb;
		ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
		if (ret)
			free_entry(entry_seg);
	}
	return ret;
}

static int
nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
{
	unsigned int queued;
	struct nfqnl_instance *queue;
	struct sk_buff *skb, *segs;
	int err = -ENOBUFS;
	struct net *net = dev_net(entry->indev ?
				  entry->indev : entry->outdev);
	struct nfnl_queue_net *q = nfnl_queue_pernet(net);

	/* rcu_read_lock()ed by nf_hook_slow() */
	queue = instance_lookup(q, queuenum);
	if (!queue)
		return -ESRCH;

	if (queue->copy_mode == NFQNL_COPY_NONE)
		return -EINVAL;

	if (!skb_is_gso(entry->skb))
		return __nfqnl_enqueue_packet(net, queue, entry);

	skb = entry->skb;

	switch (entry->pf) {
	case NFPROTO_IPV4:
		skb->protocol = htons(ETH_P_IP);
		break;
	case NFPROTO_IPV6:
		skb->protocol = htons(ETH_P_IPV6);
		break;
	}

	nf_bridge_adjust_skb_data(skb);
	segs = skb_gso_segment(skb, 0);
	/* Does not use PTR_ERR to limit the number of error codes that can be
	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to
	 * mean 'ignore this hook'.
	 */
	if (IS_ERR(segs))
		goto out_err;
	queued = 0;
	err = 0;
	do {
		struct sk_buff *nskb = segs->next;
		if (err == 0)
			err = __nfqnl_enqueue_packet_gso(net, queue,
							segs, entry);
		if (err == 0)
			queued++;
		else
			kfree_skb(segs);
		segs = nskb;
	} while (segs);

	if (queued) {
		if (err) /* some segments are already queued */
			free_entry(entry);
		kfree_skb(skb);
		return 0;
	}
 out_err:
	nf_bridge_adjust_segmented_data(skb);
	return err;
}

static int
nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
{