Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9aa28f2b authored by David S. Miller's avatar David S. Miller
Browse files


Pablo Neira Ayuso says: <pablo@netfilter.org>

====================
nftables updates for net-next

The following patchset contains nftables updates for your net-next tree,
they are:

* Add set operation to the meta expression by means of the select_ops()
  infrastructure, this allows us to set the packet mark among other things.
  From Arturo Borrero Gonzalez.

* Fix wrong format in sscanf in nf_tables_set_alloc_name(), from Daniel
  Borkmann.

* Add new queue expression to nf_tables. These comes with two previous patches
  to prepare this new feature, one to add mask in nf_tables_core to
  evaluate the queue verdict appropriately and another to refactor common
  code with xt_NFQUEUE, from Eric Leblond.

* Do not hide nftables from Kconfig if nfnetlink is not enabled, also from
  Eric Leblond.

* Add the reject expression to nf_tables, this adds the missing TCP RST
  support. It comes with an initial patch to refactor common code with
  xt_NFQUEUE, again from Eric Leblond.

* Remove an unused variable assignment in nf_tables_dump_set(), from Michal
  Nazarewicz.

* Remove the nft_meta_target code, now that Arturo added the set operation
  to the meta expression, from me.

* Add help information for nf_tables to Kconfig, also from me.

* Allow to dump all sets by specifying NFPROTO_UNSPEC, similar feature is
  available to other nf_tables objects, requested by Arturo, from me.

* Expose the table usage counter, so we can know how many chains are using
  this table without dumping the list of chains, from Tomasz Bursztyka.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6a8c4796 c9c8e485
Loading
Loading
Loading
Loading
+128 −0
Original line number Original line Diff line number Diff line
#ifndef _IPV4_NF_REJECT_H
#define _IPV4_NF_REJECT_H

#include <net/ip.h>
#include <net/tcp.h>
#include <net/route.h>
#include <net/dst.h>

static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
{
	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}

/* Send RST reply */
static void nf_send_reset(struct sk_buff *oldskb, int hook)
{
	struct sk_buff *nskb;
	const struct iphdr *oiph;
	struct iphdr *niph;
	const struct tcphdr *oth;
	struct tcphdr _otcph, *tcph;

	/* IP header checks: fragment. */
	if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
		return;

	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
				 sizeof(_otcph), &_otcph);
	if (oth == NULL)
		return;

	/* No RST for RST. */
	if (oth->rst)
		return;

	if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
		return;

	/* Check checksum */
	if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
		return;
	oiph = ip_hdr(oldskb);

	nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
			 LL_MAX_HEADER, GFP_ATOMIC);
	if (!nskb)
		return;

	skb_reserve(nskb, LL_MAX_HEADER);

	skb_reset_network_header(nskb);
	niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
	niph->version	= 4;
	niph->ihl	= sizeof(struct iphdr) / 4;
	niph->tos	= 0;
	niph->id	= 0;
	niph->frag_off	= htons(IP_DF);
	niph->protocol	= IPPROTO_TCP;
	niph->check	= 0;
	niph->saddr	= oiph->daddr;
	niph->daddr	= oiph->saddr;

	skb_reset_transport_header(nskb);
	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
	memset(tcph, 0, sizeof(*tcph));
	tcph->source	= oth->dest;
	tcph->dest	= oth->source;
	tcph->doff	= sizeof(struct tcphdr) / 4;

	if (oth->ack)
		tcph->seq = oth->ack_seq;
	else {
		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
				      oldskb->len - ip_hdrlen(oldskb) -
				      (oth->doff << 2));
		tcph->ack = 1;
	}

	tcph->rst	= 1;
	tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
				    niph->daddr, 0);
	nskb->ip_summed = CHECKSUM_PARTIAL;
	nskb->csum_start = (unsigned char *)tcph - nskb->head;
	nskb->csum_offset = offsetof(struct tcphdr, check);

	/* ip_route_me_harder expects skb->dst to be set */
	skb_dst_set_noref(nskb, skb_dst(oldskb));

	nskb->protocol = htons(ETH_P_IP);
	if (ip_route_me_harder(nskb, RTN_UNSPEC))
		goto free_nskb;

	niph->ttl	= ip4_dst_hoplimit(skb_dst(nskb));

	/* "Never happens" */
	if (nskb->len > dst_mtu(skb_dst(nskb)))
		goto free_nskb;

	nf_ct_attach(nskb, oldskb);

#ifdef CONFIG_BRIDGE_NETFILTER
	/* If we use ip_local_out for bridged traffic, the MAC source on
	 * the RST will be ours, instead of the destination's.  This confuses
	 * some routers/firewalls, and they drop the packet.  So we need to
	 * build the eth header using the original destination's MAC as the
	 * source, and send the RST packet directly.
	 */
	if (oldskb->nf_bridge) {
		struct ethhdr *oeth = eth_hdr(oldskb);
		nskb->dev = oldskb->nf_bridge->physindev;
		niph->tot_len = htons(nskb->len);
		ip_send_check(niph);
		if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
				    oeth->h_source, oeth->h_dest, nskb->len) < 0)
			goto free_nskb;
		dev_queue_xmit(nskb);
	} else
#endif
		ip_local_out(nskb);

	return;

 free_nskb:
	kfree_skb(nskb);
}


#endif /* _IPV4_NF_REJECT_H */
+171 −0
Original line number Original line Diff line number Diff line
#ifndef _IPV6_NF_REJECT_H
#define _IPV6_NF_REJECT_H

#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_fib.h>
#include <net/ip6_checksum.h>
#include <linux/netfilter_ipv6.h>

static inline void
nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
	     unsigned int hooknum)
{
	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
		skb_in->dev = net->loopback_dev;

	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
}

/* Send RST reply */
static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
{
	struct sk_buff *nskb;
	struct tcphdr otcph, *tcph;
	unsigned int otcplen, hh_len;
	int tcphoff, needs_ack;
	const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
	struct ipv6hdr *ip6h;
#define DEFAULT_TOS_VALUE	0x0U
	const __u8 tclass = DEFAULT_TOS_VALUE;
	struct dst_entry *dst = NULL;
	u8 proto;
	__be16 frag_off;
	struct flowi6 fl6;

	if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
	    (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
		pr_debug("addr is not unicast.\n");
		return;
	}

	proto = oip6h->nexthdr;
	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);

	if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
		pr_debug("Cannot get TCP header.\n");
		return;
	}

	otcplen = oldskb->len - tcphoff;

	/* IP header checks: fragment, too short. */
	if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
		pr_debug("proto(%d) != IPPROTO_TCP, "
			 "or too short. otcplen = %d\n",
			 proto, otcplen);
		return;
	}

	if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
		BUG();

	/* No RST for RST. */
	if (otcph.rst) {
		pr_debug("RST is set\n");
		return;
	}

	/* Check checksum. */
	if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
		pr_debug("TCP checksum is invalid\n");
		return;
	}

	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_TCP;
	fl6.saddr = oip6h->daddr;
	fl6.daddr = oip6h->saddr;
	fl6.fl6_sport = otcph.dest;
	fl6.fl6_dport = otcph.source;
	security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
	dst = ip6_route_output(net, NULL, &fl6);
	if (dst == NULL || dst->error) {
		dst_release(dst);
		return;
	}
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
	if (IS_ERR(dst))
		return;

	hh_len = (dst->dev->hard_header_len + 15)&~15;
	nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
			 + sizeof(struct tcphdr) + dst->trailer_len,
			 GFP_ATOMIC);

	if (!nskb) {
		net_dbg_ratelimited("cannot alloc skb\n");
		dst_release(dst);
		return;
	}

	skb_dst_set(nskb, dst);

	skb_reserve(nskb, hh_len + dst->header_len);

	skb_put(nskb, sizeof(struct ipv6hdr));
	skb_reset_network_header(nskb);
	ip6h = ipv6_hdr(nskb);
	ip6_flow_hdr(ip6h, tclass, 0);
	ip6h->hop_limit = ip6_dst_hoplimit(dst);
	ip6h->nexthdr = IPPROTO_TCP;
	ip6h->saddr = oip6h->daddr;
	ip6h->daddr = oip6h->saddr;

	skb_reset_transport_header(nskb);
	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
	/* Truncate to length (no data) */
	tcph->doff = sizeof(struct tcphdr)/4;
	tcph->source = otcph.dest;
	tcph->dest = otcph.source;

	if (otcph.ack) {
		needs_ack = 0;
		tcph->seq = otcph.ack_seq;
		tcph->ack_seq = 0;
	} else {
		needs_ack = 1;
		tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
				      + otcplen - (otcph.doff<<2));
		tcph->seq = 0;
	}

	/* Reset flags */
	((u_int8_t *)tcph)[13] = 0;
	tcph->rst = 1;
	tcph->ack = needs_ack;
	tcph->window = 0;
	tcph->urg_ptr = 0;
	tcph->check = 0;

	/* Adjust TCP checksum */
	tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
				      &ipv6_hdr(nskb)->daddr,
				      sizeof(struct tcphdr), IPPROTO_TCP,
				      csum_partial(tcph,
						   sizeof(struct tcphdr), 0));

	nf_ct_attach(nskb, oldskb);

#ifdef CONFIG_BRIDGE_NETFILTER
	/* If we use ip6_local_out for bridged traffic, the MAC source on
	 * the RST will be ours, instead of the destination's.  This confuses
	 * some routers/firewalls, and they drop the packet.  So we need to
	 * build the eth header using the original destination's MAC as the
	 * source, and send the RST packet directly.
	 */
	if (oldskb->nf_bridge) {
		struct ethhdr *oeth = eth_hdr(oldskb);
		nskb->dev = oldskb->nf_bridge->physindev;
		nskb->protocol = htons(ETH_P_IPV6);
		ip6h->payload_len = htons(sizeof(struct tcphdr));
		if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
				    oeth->h_source, oeth->h_dest, nskb->len) < 0)
			return;
		dev_queue_xmit(nskb);
	} else
#endif
		ip6_local_out(nskb);
}

#endif /* _IPV6_NF_REJECT_H */
+62 −0
Original line number Original line Diff line number Diff line
#ifndef _NF_QUEUE_H
#ifndef _NF_QUEUE_H
#define _NF_QUEUE_H
#define _NF_QUEUE_H


#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jhash.h>

/* Each queued (to userspace) skbuff has one of these. */
/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
struct nf_queue_entry {
	struct list_head	list;
	struct list_head	list;
@@ -33,4 +37,62 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);


static inline void init_hashrandom(u32 *jhash_initval)
{
	while (*jhash_initval == 0)
		*jhash_initval = prandom_u32();
}

static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval)
{
	const struct iphdr *iph = ip_hdr(skb);

	/* packets in either direction go into same queue */
	if ((__force u32)iph->saddr < (__force u32)iph->daddr)
		return jhash_3words((__force u32)iph->saddr,
			(__force u32)iph->daddr, iph->protocol, jhash_initval);

	return jhash_3words((__force u32)iph->daddr,
			(__force u32)iph->saddr, iph->protocol, jhash_initval);
}

#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
{
	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
	u32 a, b, c;

	if ((__force u32)ip6h->saddr.s6_addr32[3] <
	    (__force u32)ip6h->daddr.s6_addr32[3]) {
		a = (__force u32) ip6h->saddr.s6_addr32[3];
		b = (__force u32) ip6h->daddr.s6_addr32[3];
	} else {
		b = (__force u32) ip6h->saddr.s6_addr32[3];
		a = (__force u32) ip6h->daddr.s6_addr32[3];
	}

	if ((__force u32)ip6h->saddr.s6_addr32[1] <
	    (__force u32)ip6h->daddr.s6_addr32[1])
		c = (__force u32) ip6h->saddr.s6_addr32[1];
	else
		c = (__force u32) ip6h->daddr.s6_addr32[1];

	return jhash_3words(a, b, c, jhash_initval);
}
#endif

static inline u32
nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
	     u32 jhash_initval)
{
	if (family == NFPROTO_IPV4)
		queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32;
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
	else if (family == NFPROTO_IPV6)
		queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32;
#endif

	return queue;
}

#endif /* _NF_QUEUE_H */
#endif /* _NF_QUEUE_H */
+24 −0
Original line number Original line Diff line number Diff line
@@ -110,11 +110,13 @@ enum nft_table_flags {
 *
 *
 * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
 * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
 * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
 * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
 * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
 */
 */
enum nft_table_attributes {
enum nft_table_attributes {
	NFTA_TABLE_UNSPEC,
	NFTA_TABLE_UNSPEC,
	NFTA_TABLE_NAME,
	NFTA_TABLE_NAME,
	NFTA_TABLE_FLAGS,
	NFTA_TABLE_FLAGS,
	NFTA_TABLE_USE,
	__NFTA_TABLE_MAX
	__NFTA_TABLE_MAX
};
};
#define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
#define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
@@ -553,11 +555,13 @@ enum nft_meta_keys {
 *
 *
 * @NFTA_META_DREG: destination register (NLA_U32)
 * @NFTA_META_DREG: destination register (NLA_U32)
 * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
 * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
 * @NFTA_META_SREG: source register (NLA_U32)
 */
 */
enum nft_meta_attributes {
enum nft_meta_attributes {
	NFTA_META_UNSPEC,
	NFTA_META_UNSPEC,
	NFTA_META_DREG,
	NFTA_META_DREG,
	NFTA_META_KEY,
	NFTA_META_KEY,
	NFTA_META_SREG,
	__NFTA_META_MAX
	__NFTA_META_MAX
};
};
#define NFTA_META_MAX		(__NFTA_META_MAX - 1)
#define NFTA_META_MAX		(__NFTA_META_MAX - 1)
@@ -657,6 +661,26 @@ enum nft_log_attributes {
};
};
#define NFTA_LOG_MAX		(__NFTA_LOG_MAX - 1)
#define NFTA_LOG_MAX		(__NFTA_LOG_MAX - 1)


/**
 * enum nft_queue_attributes - nf_tables queue expression netlink attributes
 *
 * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
 * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
 * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
 */
enum nft_queue_attributes {
	NFTA_QUEUE_UNSPEC,
	NFTA_QUEUE_NUM,
	NFTA_QUEUE_TOTAL,
	NFTA_QUEUE_FLAGS,
	__NFTA_QUEUE_MAX
};
#define NFTA_QUEUE_MAX		(__NFTA_QUEUE_MAX - 1)

#define NFT_QUEUE_FLAG_BYPASS		0x01 /* for compatibility with v2 */
#define NFT_QUEUE_FLAG_CPU_FANOUT	0x02 /* use current CPU (no hashing) */
#define NFT_QUEUE_FLAG_MASK		0x03

/**
/**
 * enum nft_reject_types - nf_tables reject expression reject types
 * enum nft_reject_types - nf_tables reject expression reject types
 *
 *
+14 −4
Original line number Original line Diff line number Diff line
@@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT
config NF_TABLES_IPV4
config NF_TABLES_IPV4
	depends on NF_TABLES
	depends on NF_TABLES
	tristate "IPv4 nf_tables support"
	tristate "IPv4 nf_tables support"

	help
config NFT_REJECT_IPV4
	  This option enables the IPv4 support for nf_tables.
	depends on NF_TABLES_IPV4
	tristate "nf_tables IPv4 reject support"


config NFT_CHAIN_ROUTE_IPV4
config NFT_CHAIN_ROUTE_IPV4
	depends on NF_TABLES_IPV4
	depends on NF_TABLES_IPV4
	tristate "IPv4 nf_tables route chain support"
	tristate "IPv4 nf_tables route chain support"
	help
	  This option enables the "route" chain for IPv4 in nf_tables. This
	  chain type is used to force packet re-routing after mangling header
	  fields such as the source, destination, type of service and
	  the packet mark.


config NFT_CHAIN_NAT_IPV4
config NFT_CHAIN_NAT_IPV4
	depends on NF_TABLES_IPV4
	depends on NF_TABLES_IPV4
	depends on NF_NAT_IPV4 && NFT_NAT
	depends on NF_NAT_IPV4 && NFT_NAT
	tristate "IPv4 nf_tables nat chain support"
	tristate "IPv4 nf_tables nat chain support"
	help
	  This option enables the "nat" chain for IPv4 in nf_tables. This
	  chain type is used to perform Network Address Translation (NAT)
	  packet transformations such as the source, destination address and
	  source and destination ports.


config NF_TABLES_ARP
config NF_TABLES_ARP
	depends on NF_TABLES
	depends on NF_TABLES
	tristate "ARP nf_tables support"
	tristate "ARP nf_tables support"
	help
	  This option enables the ARP support for nf_tables.


config IP_NF_IPTABLES
config IP_NF_IPTABLES
	tristate "IP tables support (required for filtering/masq/NAT)"
	tristate "IP tables support (required for filtering/masq/NAT)"
Loading