Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca69d710 authored by David S. Miller's avatar David S. Miller
Browse files


Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next tree.
They are:

* nf_tables set timeout infrastructure from Patrick Mchardy.

1) Add support for set timeout support.

2) Add support for set element timeouts using the new set extension
   infrastructure.

4) Add garbage collection helper functions to get rid of stale elements.
   Elements are accumulated in a batch that are asynchronously released
   via RCU when the batch is full.

5) Add garbage collection synchronization helpers. This introduces a new
   element busy bit to address concurrent access from the netlink API and the
   garbage collector.

5) Add timeout support for the nft_hash set implementation. The garbage
   collector peridically checks for stale elements from the workqueue.

* iptables/nftables cgroup fixes:

6) Ignore non full-socket objects from the input path, otherwise cgroup
   match may crash, from Daniel Borkmann.

7) Fix cgroup in nf_tables.

8) Save some cycles from xt_socket by skipping packet header parsing when
   skb->sk is already set because of early demux. Also from Daniel.

* br_netfilter updates from Florian Westphal.

9) Save frag_max_size and restore it from the forward path too.

10) Use a per-cpu area to restore the original source MAC address when traffic
    is DNAT'ed.

11) Add helper functions to access physical devices.

12) Use these new physdev helper function from xt_physdev.

13) Add another nf_bridge_info_get() helper function to fetch the br_netfilter
    state information.

14) Annotate original layer 2 protocol number in nf_bridge info, instead of
    using kludgy flags.

15) Also annotate the pkttype mangling when the packet travels back and forth
    from the IP to the bridge layer, instead of using a flag.

* More nf_tables set enhancement from Patrick:

16) Fix possible usage of set variant that doesn't support timeouts.

17) Avoid spurious "set is full" errors from Netlink API when there are pending
    stale elements scheduled to be released.

18) Restrict loop checks to set maps.

19) Add support for dynamic set updates from the packet path.

20) Add support to store optional user data (eg. comments) per set element.

BTW, I have also pulled net-next into nf-next to anticipate the conflict
resolution between your okfn() signature changes and Florian's br_netfilter
updates.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3ab1a30f aadd51aa
Loading
Loading
Loading
Loading
+23 −5
Original line number Original line Diff line number Diff line
@@ -2,7 +2,7 @@
#define __LINUX_BRIDGE_NETFILTER_H
#define __LINUX_BRIDGE_NETFILTER_H


#include <uapi/linux/netfilter_bridge.h>
#include <uapi/linux/netfilter_bridge.h>

#include <linux/skbuff.h>


enum nf_br_hook_priorities {
enum nf_br_hook_priorities {
	NF_BR_PRI_FIRST = INT_MIN,
	NF_BR_PRI_FIRST = INT_MIN,
@@ -17,15 +17,12 @@ enum nf_br_hook_priorities {


#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)


#define BRNF_PKT_TYPE			0x01
#define BRNF_BRIDGED_DNAT		0x02
#define BRNF_BRIDGED_DNAT		0x02
#define BRNF_NF_BRIDGE_PREROUTING	0x08
#define BRNF_NF_BRIDGE_PREROUTING	0x08
#define BRNF_8021Q			0x10
#define BRNF_PPPoE			0x20


static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
{
	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
	if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
		return PPPOE_SES_HLEN;
		return PPPOE_SES_HLEN;
	return 0;
	return 0;
}
}
@@ -40,6 +37,27 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
		skb_dst_drop(skb);
		skb_dst_drop(skb);
}
}


static inline int nf_bridge_get_physinif(const struct sk_buff *skb)
{
	return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0;
}

static inline int nf_bridge_get_physoutif(const struct sk_buff *skb)
{
	return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0;
}

static inline struct net_device *
nf_bridge_get_physindev(const struct sk_buff *skb)
{
	return skb->nf_bridge ? skb->nf_bridge->physindev : NULL;
}

static inline struct net_device *
nf_bridge_get_physoutdev(const struct sk_buff *skb)
{
	return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
}
#else
#else
#define br_drop_fake_rtable(skb)	        do { } while (0)
#define br_drop_fake_rtable(skb)	        do { } while (0)
#endif /* CONFIG_BRIDGE_NETFILTER */
#endif /* CONFIG_BRIDGE_NETFILTER */
+7 −1
Original line number Original line Diff line number Diff line
@@ -166,10 +166,16 @@ struct nf_conntrack {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
struct nf_bridge_info {
	atomic_t		use;
	atomic_t		use;
	enum {
		BRNF_PROTO_UNCHANGED,
		BRNF_PROTO_8021Q,
		BRNF_PROTO_PPPOE
	} orig_proto;
	bool			pkt_otherhost;
	unsigned int		mask;
	unsigned int		mask;
	struct net_device	*physindev;
	struct net_device	*physindev;
	struct net_device	*physoutdev;
	struct net_device	*physoutdev;
	unsigned long		data[32 / sizeof(unsigned long)];
	char			neigh_header[8];
};
};
#endif
#endif


+154 −1
Original line number Original line Diff line number Diff line
@@ -195,6 +195,7 @@ struct nft_set_estimate {
};
};


struct nft_set_ext;
struct nft_set_ext;
struct nft_expr;


/**
/**
 *	struct nft_set_ops - nf_tables set operations
 *	struct nft_set_ops - nf_tables set operations
@@ -217,6 +218,15 @@ struct nft_set_ops {
	bool				(*lookup)(const struct nft_set *set,
	bool				(*lookup)(const struct nft_set *set,
						  const struct nft_data *key,
						  const struct nft_data *key,
						  const struct nft_set_ext **ext);
						  const struct nft_set_ext **ext);
	bool				(*update)(struct nft_set *set,
						  const struct nft_data *key,
						  void *(*new)(struct nft_set *,
							       const struct nft_expr *,
							       struct nft_data []),
						  const struct nft_expr *expr,
						  struct nft_data data[],
						  const struct nft_set_ext **ext);

	int				(*insert)(const struct nft_set *set,
	int				(*insert)(const struct nft_set *set,
						  const struct nft_set_elem *elem);
						  const struct nft_set_elem *elem);
	void				(*activate)(const struct nft_set *set,
	void				(*activate)(const struct nft_set *set,
@@ -257,6 +267,9 @@ void nft_unregister_set(struct nft_set_ops *ops);
 * 	@dtype: data type (verdict or numeric type defined by userspace)
 * 	@dtype: data type (verdict or numeric type defined by userspace)
 * 	@size: maximum set size
 * 	@size: maximum set size
 * 	@nelems: number of elements
 * 	@nelems: number of elements
 * 	@ndeact: number of deactivated elements queued for removal
 * 	@timeout: default timeout value in msecs
 * 	@gc_int: garbage collection interval in msecs
 *	@policy: set parameterization (see enum nft_set_policies)
 *	@policy: set parameterization (see enum nft_set_policies)
 * 	@ops: set ops
 * 	@ops: set ops
 * 	@pnet: network namespace
 * 	@pnet: network namespace
@@ -272,7 +285,10 @@ struct nft_set {
	u32				ktype;
	u32				ktype;
	u32				dtype;
	u32				dtype;
	u32				size;
	u32				size;
	u32				nelems;
	atomic_t			nelems;
	u32				ndeact;
	u64				timeout;
	u32				gc_int;
	u16				policy;
	u16				policy;
	/* runtime data below here */
	/* runtime data below here */
	const struct nft_set_ops	*ops ____cacheline_aligned;
	const struct nft_set_ops	*ops ____cacheline_aligned;
@@ -289,16 +305,27 @@ static inline void *nft_set_priv(const struct nft_set *set)
	return (void *)set->data;
	return (void *)set->data;
}
}


static inline struct nft_set *nft_set_container_of(const void *priv)
{
	return (void *)priv - offsetof(struct nft_set, data);
}

struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
				     const struct nlattr *nla);
				     const struct nlattr *nla);
struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
					  const struct nlattr *nla);
					  const struct nlattr *nla);


static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
	return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
}

/**
/**
 *	struct nft_set_binding - nf_tables set binding
 *	struct nft_set_binding - nf_tables set binding
 *
 *
 *	@list: set bindings list node
 *	@list: set bindings list node
 *	@chain: chain containing the rule bound to the set
 *	@chain: chain containing the rule bound to the set
 *	@flags: set action flags
 *
 *
 *	A set binding contains all information necessary for validation
 *	A set binding contains all information necessary for validation
 *	of new elements added to a bound set.
 *	of new elements added to a bound set.
@@ -306,6 +333,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
struct nft_set_binding {
struct nft_set_binding {
	struct list_head		list;
	struct list_head		list;
	const struct nft_chain		*chain;
	const struct nft_chain		*chain;
	u32				flags;
};
};


int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
@@ -319,12 +347,18 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 *	@NFT_SET_EXT_KEY: element key
 *	@NFT_SET_EXT_KEY: element key
 *	@NFT_SET_EXT_DATA: mapping data
 *	@NFT_SET_EXT_DATA: mapping data
 *	@NFT_SET_EXT_FLAGS: element flags
 *	@NFT_SET_EXT_FLAGS: element flags
 *	@NFT_SET_EXT_TIMEOUT: element timeout
 *	@NFT_SET_EXT_EXPIRATION: element expiration time
 *	@NFT_SET_EXT_USERDATA: user data associated with the element
 *	@NFT_SET_EXT_NUM: number of extension types
 *	@NFT_SET_EXT_NUM: number of extension types
 */
 */
enum nft_set_extensions {
enum nft_set_extensions {
	NFT_SET_EXT_KEY,
	NFT_SET_EXT_KEY,
	NFT_SET_EXT_DATA,
	NFT_SET_EXT_DATA,
	NFT_SET_EXT_FLAGS,
	NFT_SET_EXT_FLAGS,
	NFT_SET_EXT_TIMEOUT,
	NFT_SET_EXT_EXPIRATION,
	NFT_SET_EXT_USERDATA,
	NFT_SET_EXT_NUM
	NFT_SET_EXT_NUM
};
};


@@ -421,14 +455,96 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
	return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
	return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
}
}


static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
{
	return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
}

static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext)
{
	return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
}

static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext)
{
	return nft_set_ext(ext, NFT_SET_EXT_USERDATA);
}

static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
{
	return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
	       time_is_before_eq_jiffies(*nft_set_ext_expiration(ext));
}

static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
						   void *elem)
						   void *elem)
{
{
	return elem + set->ops->elemsize;
	return elem + set->ops->elemsize;
}
}


void *nft_set_elem_init(const struct nft_set *set,
			const struct nft_set_ext_tmpl *tmpl,
			const struct nft_data *key,
			const struct nft_data *data,
			u64 timeout, gfp_t gfp);
void nft_set_elem_destroy(const struct nft_set *set, void *elem);
void nft_set_elem_destroy(const struct nft_set *set, void *elem);


/**
 *	struct nft_set_gc_batch_head - nf_tables set garbage collection batch
 *
 *	@rcu: rcu head
 *	@set: set the elements belong to
 *	@cnt: count of elements
 */
struct nft_set_gc_batch_head {
	struct rcu_head			rcu;
	const struct nft_set		*set;
	unsigned int			cnt;
};

#define NFT_SET_GC_BATCH_SIZE	((PAGE_SIZE -				  \
				  sizeof(struct nft_set_gc_batch_head)) / \
				 sizeof(void *))

/**
 *	struct nft_set_gc_batch - nf_tables set garbage collection batch
 *
 * 	@head: GC batch head
 * 	@elems: garbage collection elements
 */
struct nft_set_gc_batch {
	struct nft_set_gc_batch_head	head;
	void				*elems[NFT_SET_GC_BATCH_SIZE];
};

struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
						gfp_t gfp);
void nft_set_gc_batch_release(struct rcu_head *rcu);

static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
{
	if (gcb != NULL)
		call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
}

static inline struct nft_set_gc_batch *
nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
		       gfp_t gfp)
{
	if (gcb != NULL) {
		if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
			return gcb;
		nft_set_gc_batch_complete(gcb);
	}
	return nft_set_gc_batch_alloc(set, gfp);
}

static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
					void *elem)
{
	gcb->elems[gcb->head.cnt++] = elem;
}

/**
/**
 *	struct nft_expr_type - nf_tables expression type
 *	struct nft_expr_type - nf_tables expression type
 *
 *
@@ -750,6 +866,8 @@ static inline u8 nft_genmask_cur(const struct net *net)
	return 1 << ACCESS_ONCE(net->nft.gencursor);
	return 1 << ACCESS_ONCE(net->nft.gencursor);
}
}


#define NFT_GENMASK_ANY		((1 << 0) | (1 << 1))

/*
/*
 * Set element transaction helpers
 * Set element transaction helpers
 */
 */
@@ -766,6 +884,41 @@ static inline void nft_set_elem_change_active(const struct nft_set *set,
	ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
	ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
}
}


/*
 * We use a free bit in the genmask field to indicate the element
 * is busy, meaning it is currently being processed either by
 * the netlink API or GC.
 *
 * Even though the genmask is only a single byte wide, this works
 * because the extension structure if fully constant once initialized,
 * so there are no non-atomic write accesses unless it is already
 * marked busy.
 */
#define NFT_SET_ELEM_BUSY_MASK	(1 << 2)

#if defined(__LITTLE_ENDIAN_BITFIELD)
#define NFT_SET_ELEM_BUSY_BIT	2
#elif defined(__BIG_ENDIAN_BITFIELD)
#define NFT_SET_ELEM_BUSY_BIT	(BITS_PER_LONG - BITS_PER_BYTE + 2)
#else
#error
#endif

static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
{
	unsigned long *word = (unsigned long *)ext;

	BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
	return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
}

static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
{
	unsigned long *word = (unsigned long *)ext;

	clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
}

/**
/**
 *	struct nft_trans - nf_tables object update in transaction
 *	struct nft_trans - nf_tables object update in transaction
 *
 *
+3 −0
Original line number Original line Diff line number Diff line
@@ -31,6 +31,9 @@ void nft_cmp_module_exit(void);
int nft_lookup_module_init(void);
int nft_lookup_module_init(void);
void nft_lookup_module_exit(void);
void nft_lookup_module_exit(void);


int nft_dynset_module_init(void);
void nft_dynset_module_exit(void);

int nft_bitwise_module_init(void);
int nft_bitwise_module_init(void);
void nft_bitwise_module_exit(void);
void nft_bitwise_module_exit(void);


+39 −0
Original line number Original line Diff line number Diff line
@@ -208,12 +208,14 @@ enum nft_rule_compat_attributes {
 * @NFT_SET_CONSTANT: set contents may not change while bound
 * @NFT_SET_CONSTANT: set contents may not change while bound
 * @NFT_SET_INTERVAL: set contains intervals
 * @NFT_SET_INTERVAL: set contains intervals
 * @NFT_SET_MAP: set is used as a dictionary
 * @NFT_SET_MAP: set is used as a dictionary
 * @NFT_SET_TIMEOUT: set uses timeouts
 */
 */
enum nft_set_flags {
enum nft_set_flags {
	NFT_SET_ANONYMOUS		= 0x1,
	NFT_SET_ANONYMOUS		= 0x1,
	NFT_SET_CONSTANT		= 0x2,
	NFT_SET_CONSTANT		= 0x2,
	NFT_SET_INTERVAL		= 0x4,
	NFT_SET_INTERVAL		= 0x4,
	NFT_SET_MAP			= 0x8,
	NFT_SET_MAP			= 0x8,
	NFT_SET_TIMEOUT			= 0x10,
};
};


/**
/**
@@ -252,6 +254,8 @@ enum nft_set_desc_attributes {
 * @NFTA_SET_POLICY: selection policy (NLA_U32)
 * @NFTA_SET_POLICY: selection policy (NLA_U32)
 * @NFTA_SET_DESC: set description (NLA_NESTED)
 * @NFTA_SET_DESC: set description (NLA_NESTED)
 * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
 * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
 * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64)
 * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
 */
 */
enum nft_set_attributes {
enum nft_set_attributes {
	NFTA_SET_UNSPEC,
	NFTA_SET_UNSPEC,
@@ -265,6 +269,8 @@ enum nft_set_attributes {
	NFTA_SET_POLICY,
	NFTA_SET_POLICY,
	NFTA_SET_DESC,
	NFTA_SET_DESC,
	NFTA_SET_ID,
	NFTA_SET_ID,
	NFTA_SET_TIMEOUT,
	NFTA_SET_GC_INTERVAL,
	__NFTA_SET_MAX
	__NFTA_SET_MAX
};
};
#define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
#define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
@@ -284,12 +290,18 @@ enum nft_set_elem_flags {
 * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
 * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
 * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
 * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
 * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
 * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
 * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64)
 * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
 * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
 */
 */
enum nft_set_elem_attributes {
enum nft_set_elem_attributes {
	NFTA_SET_ELEM_UNSPEC,
	NFTA_SET_ELEM_UNSPEC,
	NFTA_SET_ELEM_KEY,
	NFTA_SET_ELEM_KEY,
	NFTA_SET_ELEM_DATA,
	NFTA_SET_ELEM_DATA,
	NFTA_SET_ELEM_FLAGS,
	NFTA_SET_ELEM_FLAGS,
	NFTA_SET_ELEM_TIMEOUT,
	NFTA_SET_ELEM_EXPIRATION,
	NFTA_SET_ELEM_USERDATA,
	__NFTA_SET_ELEM_MAX
	__NFTA_SET_ELEM_MAX
};
};
#define NFTA_SET_ELEM_MAX	(__NFTA_SET_ELEM_MAX - 1)
#define NFTA_SET_ELEM_MAX	(__NFTA_SET_ELEM_MAX - 1)
@@ -505,6 +517,33 @@ enum nft_lookup_attributes {
};
};
#define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)
#define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)


enum nft_dynset_ops {
	NFT_DYNSET_OP_ADD,
	NFT_DYNSET_OP_UPDATE,
};

/**
 * enum nft_dynset_attributes - dynset expression attributes
 *
 * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING)
 * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32)
 * @NFTA_DYNSET_OP: operation (NLA_U32)
 * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32)
 * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
 * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
 */
enum nft_dynset_attributes {
	NFTA_DYNSET_UNSPEC,
	NFTA_DYNSET_SET_NAME,
	NFTA_DYNSET_SET_ID,
	NFTA_DYNSET_OP,
	NFTA_DYNSET_SREG_KEY,
	NFTA_DYNSET_SREG_DATA,
	NFTA_DYNSET_TIMEOUT,
	__NFTA_DYNSET_MAX,
};
#define NFTA_DYNSET_MAX		(__NFTA_DYNSET_MAX - 1)

/**
/**
 * enum nft_payload_bases - nf_tables payload expression offset bases
 * enum nft_payload_bases - nf_tables payload expression offset bases
 *
 *
Loading