Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b976c2c4 authored by Bruno Martins's avatar Bruno Martins
Browse files

Merge tag 'ASB-2024-01-05_11-5.4' of...

Merge tag 'ASB-2024-01-05_11-5.4' of https://android.googlesource.com/kernel/common into android13-5.4-lahaina

https://source.android.com/docs/security/bulletin/2024-01-01

* tag 'ASB-2024-01-05_11-5.4' of https://android.googlesource.com/kernel/common:
  UPSTREAM: ipv4: igmp: fix refcnt uaf issue when receiving igmp query packet
  ANDROID: Snapshot Mainline's version of checkpatch.pl
  UPSTREAM: nvmet-tcp: Fix a possible UAF in queue intialization setup
  UPSTREAM: nvmet-tcp: move send/recv error handling in the send/recv methods instead of call-sites
  UPSTREAM: netfilter: nf_tables: remove busy mark and gc batch API
  UPSTREAM: netfilter: nft_set_hash: mark set element as dead when deleting from packet path
  UPSTREAM: netfilter: nf_tables: adapt set backend to use GC transaction API
  UPSTREAM: netfilter: nf_tables: GC transaction API to avoid race with control plane
  UPSTREAM: netfilter: nft_set_rbtree: fix overlap expiration walk
  UPSTREAM: netfilter: nft_set_rbtree: fix null deref on element insertion
  UPSTREAM: netfilter: nft_set_rbtree: Switch to node list walk for overlap detection
  UPSTREAM: netfilter: nf_tables: drop map element references from preparation phase
  UPSTREAM: netfilter: nftables: rename set element data activation/deactivation functions

 Conflicts:
	scripts/checkpatch.pl

Change-Id: I2d2a3adcb627da605dd3a6a40c724bd7fb8115c5
parents 1d0ac46c 70db018a
Loading
Loading
Loading
Loading
+26 −24
Original line number Diff line number Diff line
@@ -321,6 +321,15 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
		kernel_sock_shutdown(queue->sock, SHUT_RDWR);
}

static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
{
	queue->rcv_state = NVMET_TCP_RECV_ERR;
	if (status == -EPIPE || status == -ECONNRESET)
		kernel_sock_shutdown(queue->sock, SHUT_RDWR);
	else
		nvmet_tcp_fatal_error(queue);
}

static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
{
	struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
@@ -714,11 +723,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,

	for (i = 0; i < budget; i++) {
		ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
		if (ret <= 0)
		if (unlikely(ret < 0)) {
			nvmet_tcp_socket_error(queue, ret);
			goto done;
		} else if (ret == 0) {
			break;
		}
		(*sends)++;
	}

done:
	return ret;
}

@@ -816,15 +829,11 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
	iov.iov_len = sizeof(*icresp);
	ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
	if (ret < 0)
		goto free_crypto;
		return ret; /* queue removal will cleanup */

	queue->state = NVMET_TCP_Q_LIVE;
	nvmet_prepare_receive_pdu(queue);
	return 0;
free_crypto:
	if (queue->hdr_digest || queue->data_digest)
		nvmet_tcp_free_crypto(queue);
	return ret;
}

static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
@@ -1167,11 +1176,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,

	for (i = 0; i < budget; i++) {
		ret = nvmet_tcp_try_recv_one(queue);
		if (ret <= 0)
		if (unlikely(ret < 0)) {
			nvmet_tcp_socket_error(queue, ret);
			goto done;
		} else if (ret == 0) {
			break;
		}
		(*recvs)++;
	}

done:
	return ret;
}

@@ -1196,27 +1209,16 @@ static void nvmet_tcp_io_work(struct work_struct *w)
		pending = false;

		ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
		if (ret > 0) {
		if (ret > 0)
			pending = true;
		} else if (ret < 0) {
			if (ret == -EPIPE || ret == -ECONNRESET)
				kernel_sock_shutdown(queue->sock, SHUT_RDWR);
			else
				nvmet_tcp_fatal_error(queue);
		else if (ret < 0)
			return;
		}

		ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
		if (ret > 0) {
			/* transmitted message/data */
		if (ret > 0)
			pending = true;
		} else if (ret < 0) {
			if (ret == -EPIPE || ret == -ECONNRESET)
				kernel_sock_shutdown(queue->sock, SHUT_RDWR);
			else
				nvmet_tcp_fatal_error(queue);
		else if (ret < 0)
			return;
		}

	} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);

+46 −75
Original line number Diff line number Diff line
@@ -371,7 +371,8 @@ struct nft_set_ops {
	int				(*init)(const struct nft_set *set,
						const struct nft_set_desc *desc,
						const struct nlattr * const nla[]);
	void				(*destroy)(const struct nft_set *set);
	void				(*destroy)(const struct nft_ctx *ctx,
						   const struct nft_set *set);
	void				(*gc_init)(const struct nft_set *set);

	unsigned int			elemsize;
@@ -401,6 +402,7 @@ void nft_unregister_set(struct nft_set_type *type);
 *
 *	@list: table set list node
 *	@bindings: list of set bindings
 *	@refs: internal refcounting for async set destruction
 *	@table: table this set belongs to
 *	@net: netnamespace this set belongs to
 * 	@name: name of the set
@@ -427,6 +429,7 @@ void nft_unregister_set(struct nft_set_type *type);
struct nft_set {
	struct list_head		list;
	struct list_head		bindings;
	refcount_t			refs;
	struct nft_table		*table;
	possible_net_t			net;
	char				*name;
@@ -445,7 +448,8 @@ struct nft_set {
	unsigned char			*udata;
	/* runtime data below here */
	const struct nft_set_ops	*ops ____cacheline_aligned;
	u16				flags:14,
	u16				flags:13,
					dead:1,
					genmask:2;
	u8				klen;
	u8				dlen;
@@ -665,62 +669,8 @@ void *nft_set_elem_init(const struct nft_set *set,
			u64 timeout, u64 expiration, gfp_t gfp);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
			  bool destroy_expr);

/**
 *	struct nft_set_gc_batch_head - nf_tables set garbage collection batch
 *
 *	@rcu: rcu head
 *	@set: set the elements belong to
 *	@cnt: count of elements
 */
struct nft_set_gc_batch_head {
	struct rcu_head			rcu;
	const struct nft_set		*set;
	unsigned int			cnt;
};

#define NFT_SET_GC_BATCH_SIZE	((PAGE_SIZE -				  \
				  sizeof(struct nft_set_gc_batch_head)) / \
				 sizeof(void *))

/**
 *	struct nft_set_gc_batch - nf_tables set garbage collection batch
 *
 * 	@head: GC batch head
 * 	@elems: garbage collection elements
 */
struct nft_set_gc_batch {
	struct nft_set_gc_batch_head	head;
	void				*elems[NFT_SET_GC_BATCH_SIZE];
};

struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
						gfp_t gfp);
void nft_set_gc_batch_release(struct rcu_head *rcu);

static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
{
	if (gcb != NULL)
		call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
}

static inline struct nft_set_gc_batch *
nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
		       gfp_t gfp)
{
	if (gcb != NULL) {
		if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
			return gcb;
		nft_set_gc_batch_complete(gcb);
	}
	return nft_set_gc_batch_alloc(set, gfp);
}

static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
					void *elem)
{
	gcb->elems[gcb->head.cnt++] = elem;
}
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
				const struct nft_set *set, void *elem);

struct nft_expr_ops;
/**
@@ -1348,39 +1298,30 @@ static inline void nft_set_elem_change_active(const struct net *net,

#endif /* IS_ENABLED(CONFIG_NF_TABLES) */

/*
 * We use a free bit in the genmask field to indicate the element
 * is busy, meaning it is currently being processed either by
 * the netlink API or GC.
 *
 * Even though the genmask is only a single byte wide, this works
 * because the extension structure if fully constant once initialized,
 * so there are no non-atomic write accesses unless it is already
 * marked busy.
 */
#define NFT_SET_ELEM_BUSY_MASK	(1 << 2)
#define NFT_SET_ELEM_DEAD_MASK (1 << 2)

#if defined(__LITTLE_ENDIAN_BITFIELD)
#define NFT_SET_ELEM_BUSY_BIT	2
#define NFT_SET_ELEM_DEAD_BIT	2
#elif defined(__BIG_ENDIAN_BITFIELD)
#define NFT_SET_ELEM_BUSY_BIT	(BITS_PER_LONG - BITS_PER_BYTE + 2)
#define NFT_SET_ELEM_DEAD_BIT	(BITS_PER_LONG - BITS_PER_BYTE + 2)
#else
#error
#endif

static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
static inline void nft_set_elem_dead(struct nft_set_ext *ext)
{
	unsigned long *word = (unsigned long *)ext;

	BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
	return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
	set_bit(NFT_SET_ELEM_DEAD_BIT, word);
}

static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
{
	unsigned long *word = (unsigned long *)ext;

	clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
	BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
	return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
}

/**
@@ -1487,6 +1428,35 @@ struct nft_trans_flowtable {
#define nft_trans_flowtable(trans)	\
	(((struct nft_trans_flowtable *)trans->data)->flowtable)

#define NFT_TRANS_GC_BATCHCOUNT                256

struct nft_trans_gc {
	struct list_head	list;
	struct net		*net;
	struct nft_set		*set;
	u32			seq;
	u8			count;
	void			*priv[NFT_TRANS_GC_BATCHCOUNT];
	struct rcu_head		rcu;
};

struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
					unsigned int gc_seq, gfp_t gfp);
void nft_trans_gc_destroy(struct nft_trans_gc *trans);

struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
					      unsigned int gc_seq, gfp_t gfp);
void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);

struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);

void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);

void nft_setelem_data_deactivate(const struct net *net,
				 const struct nft_set *set,
				 struct nft_set_elem *elem);

int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);

@@ -1507,6 +1477,7 @@ struct nftables_pernet {
	struct mutex		commit_mutex;
	unsigned int		base_seq;
	u8			validate_state;
	unsigned int		gc_seq;
};

#endif /* _NET_NF_TABLES_H */
+4 −2
Original line number Diff line number Diff line
@@ -218,8 +218,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
	int tv = prandom_u32() % max_delay;

	im->tm_running = 1;
	if (!mod_timer(&im->timer, jiffies+tv+2))
		refcount_inc(&im->refcnt);
	if (refcount_inc_not_zero(&im->refcnt)) {
		if (mod_timer(&im->timer, jiffies + tv + 2))
			ip_ma_put(im);
	}
}

static void igmp_gq_start_timer(struct in_device *in_dev)
+300 −46
Original line number Diff line number Diff line
@@ -26,12 +26,15 @@
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))

unsigned int nf_tables_net_id __read_mostly;
EXPORT_SYMBOL_GPL(nf_tables_net_id);

static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
static u64 table_handle;

enum {
@@ -88,6 +91,9 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state)
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);

static void nft_trans_gc_work(struct work_struct *work);
static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);

static void nft_ctx_init(struct nft_ctx *ctx,
			 struct net *net,
			 const struct sk_buff *skb,
@@ -403,6 +409,27 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
	return 0;
}

static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
				  struct nft_set *set,
				  const struct nft_set_iter *iter,
				  struct nft_set_elem *elem)
{
	nft_setelem_data_deactivate(ctx->net, set, elem);

	return 0;
}

static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
{
	struct nft_set_iter iter = {
		.genmask	= nft_genmask_next(ctx->net),
		.fn		= nft_mapelem_deactivate,
	};

	set->ops->walk(ctx, set, &iter);
	WARN_ON_ONCE(iter.err);
}

static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
	int err;
@@ -411,6 +438,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
	if (err < 0)
		return err;

	if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
		nft_map_deactivate(ctx, set);

	nft_deactivate_next(ctx->net, set);
	nft_use_dec(&ctx->table->use);

@@ -3810,6 +3840,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
	}

	INIT_LIST_HEAD(&set->bindings);
	refcount_set(&set->refs, 1);
	set->table = table;
	write_pnet(&set->net, net);
	set->ops   = ops;
@@ -3840,7 +3871,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
	return 0;

err4:
	ops->destroy(set);
	ops->destroy(&ctx, set);
err3:
	kfree(set->name);
err2:
@@ -3852,15 +3883,22 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
	return err;
}

static void nft_set_put(struct nft_set *set)
{
	if (refcount_dec_and_test(&set->refs)) {
		kfree(set->name);
		kvfree(set);
	}
}

static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
	if (WARN_ON(set->use > 0))
		return;

	set->ops->destroy(set);
	set->ops->destroy(ctx, set);
	module_put(to_set_type(set->ops)->owner);
	kfree(set->name);
	kvfree(set);
	nft_set_put(set);
}

static int nf_tables_delset(struct net *net, struct sock *nlsk,
@@ -3981,10 +4019,39 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
	}
}

static void nft_setelem_data_activate(const struct net *net,
				      const struct nft_set *set,
				      struct nft_set_elem *elem);

static int nft_mapelem_activate(const struct nft_ctx *ctx,
				struct nft_set *set,
				const struct nft_set_iter *iter,
				struct nft_set_elem *elem)
{
	nft_setelem_data_activate(ctx->net, set, elem);

	return 0;
}

static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
{
	struct nft_set_iter iter = {
		.genmask	= nft_genmask_next(ctx->net),
		.fn		= nft_mapelem_activate,
	};

	set->ops->walk(ctx, set, &iter);
	WARN_ON_ONCE(iter.err);
}

void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
{
	if (nft_set_is_anonymous(set))
	if (nft_set_is_anonymous(set)) {
		if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
			nft_map_activate(ctx, set);

		nft_clear(ctx->net, set);
	}

	nft_use_inc_restore(&set->use);
}
@@ -4005,13 +4072,20 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
		nft_use_dec(&set->use);
		break;
	case NFT_TRANS_PREPARE:
		if (nft_set_is_anonymous(set))
			nft_deactivate_next(ctx->net, set);
		if (nft_set_is_anonymous(set)) {
			if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
				nft_map_deactivate(ctx, set);

			nft_deactivate_next(ctx->net, set);
		}
		nft_use_dec(&set->use);
		return;
	case NFT_TRANS_ABORT:
	case NFT_TRANS_RELEASE:
		if (nft_set_is_anonymous(set) &&
		    set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
			nft_map_deactivate(ctx, set);

		nft_use_dec(&set->use);
		/* fall through */
	default:
@@ -4578,6 +4652,7 @@ void *nft_set_elem_init(const struct nft_set *set,
	return elem;
}

/* Drop references and destroy. Called from gc, dynset and abort path. */
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
			  bool destroy_expr)
{
@@ -4606,10 +4681,10 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);

/* Only called from commit path, nft_set_elem_deactivate() already deals with
 * the refcounting from the preparation phase.
/* Destroy element. References have been already dropped in the preparation
 * path via nft_setelem_data_deactivate().
 */
static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
				const struct nft_set *set, void *elem)
{
	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
@@ -4618,6 +4693,7 @@ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
		nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext));
	kfree(elem);
}
EXPORT_SYMBOL_GPL(nf_tables_set_elem_destroy);

static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
			    const struct nlattr *attr, u32 nlmsg_flags)
@@ -4803,7 +4879,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
	if (trans == NULL)
		goto err4;

	ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
	ext->genmask = nft_genmask_cur(ctx->net);

	err = set->ops->insert(ctx->net, set, &elem, &ext2);
	if (err) {
		if (err == -EEXIST) {
@@ -4923,7 +5000,7 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
	}
}

static void nft_set_elem_activate(const struct net *net,
static void nft_setelem_data_activate(const struct net *net,
				      const struct nft_set *set,
				      struct nft_set_elem *elem)
{
@@ -4935,7 +5012,7 @@ static void nft_set_elem_activate(const struct net *net,
		nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}

static void nft_set_elem_deactivate(const struct net *net,
void nft_setelem_data_deactivate(const struct net *net,
				 const struct nft_set *set,
				 struct nft_set_elem *elem)
{
@@ -4946,6 +5023,7 @@ static void nft_set_elem_deactivate(const struct net *net,
	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
		nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
EXPORT_SYMBOL_GPL(nft_setelem_data_deactivate);

static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
			   const struct nlattr *attr)
@@ -5004,7 +5082,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
	kfree(elem.priv);
	elem.priv = priv;

	nft_set_elem_deactivate(ctx->net, set, &elem);
	nft_setelem_data_deactivate(ctx->net, set, &elem);

	nft_trans_elem(trans) = elem;
	nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -5038,7 +5116,7 @@ static int nft_flush_set(const struct nft_ctx *ctx,
	}
	set->ndeact++;

	nft_set_elem_deactivate(ctx->net, set, elem);
	nft_setelem_data_deactivate(ctx->net, set, elem);
	nft_trans_elem_set(trans) = set;
	nft_trans_elem(trans) = *elem;
	nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -5095,31 +5173,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
	return err;
}

void nft_set_gc_batch_release(struct rcu_head *rcu)
{
	struct nft_set_gc_batch *gcb;
	unsigned int i;

	gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
	for (i = 0; i < gcb->head.cnt; i++)
		nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
	kfree(gcb);
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);

struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
						gfp_t gfp)
{
	struct nft_set_gc_batch *gcb;

	gcb = kzalloc(sizeof(*gcb), gfp);
	if (gcb == NULL)
		return gcb;
	gcb->head.set = set;
	return gcb;
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);

/*
 * Stateful objects
 */
@@ -6896,6 +6949,186 @@ static void nft_chain_del(struct nft_chain *chain)
	list_del_rcu(&chain->list);
}

static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
					struct nft_trans_gc *trans)
{
	void **priv = trans->priv;
	unsigned int i;

	for (i = 0; i < trans->count; i++) {
		struct nft_set_elem elem = {
			.priv = priv[i],
		};

		nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
		trans->set->ops->remove(trans->net, trans->set, &elem);
	}
}

void nft_trans_gc_destroy(struct nft_trans_gc *trans)
{
	nft_set_put(trans->set);
	put_net(trans->net);
	kfree(trans);
}
EXPORT_SYMBOL_GPL(nft_trans_gc_destroy);

static void nft_trans_gc_trans_free(struct rcu_head *rcu)
{
	struct nft_set_elem elem = {};
	struct nft_trans_gc *trans;
	struct nft_ctx ctx = {};
	unsigned int i;

	trans = container_of(rcu, struct nft_trans_gc, rcu);
	ctx.net = read_pnet(&trans->set->net);

	for (i = 0; i < trans->count; i++) {
		elem.priv = trans->priv[i];
		atomic_dec(&trans->set->nelems);

		nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
	}

	nft_trans_gc_destroy(trans);
}

static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
{
	struct nftables_pernet *nft_net;
	struct nft_ctx ctx = {};

	nft_net = net_generic(trans->net, nf_tables_net_id);

	mutex_lock(&nft_net->commit_mutex);

	/* Check for race with transaction, otherwise this batch refers to
	 * stale objects that might not be there anymore. Skip transaction if
	 * set has been destroyed from control plane transaction in case gc
	 * worker loses race.
	 */
	if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
		mutex_unlock(&nft_net->commit_mutex);
		return false;
	}

	ctx.net = trans->net;
	ctx.table = trans->set->table;

	nft_trans_gc_setelem_remove(&ctx, trans);
	mutex_unlock(&nft_net->commit_mutex);

	return true;
}

static void nft_trans_gc_work(struct work_struct *work)
{
	struct nft_trans_gc *trans, *next;
	LIST_HEAD(trans_gc_list);

	spin_lock(&nf_tables_destroy_list_lock);
	list_splice_init(&nf_tables_gc_list, &trans_gc_list);
	spin_unlock(&nf_tables_destroy_list_lock);

	list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
		list_del(&trans->list);
		if (!nft_trans_gc_work_done(trans)) {
			nft_trans_gc_destroy(trans);
			continue;
		}
		call_rcu(&trans->rcu, nft_trans_gc_trans_free);
	}
}

struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
					unsigned int gc_seq, gfp_t gfp)
{
	struct net *net = read_pnet(&set->net);
	struct nft_trans_gc *trans;

	trans = kzalloc(sizeof(*trans), gfp);
	if (!trans)
		return NULL;

	refcount_inc(&set->refs);
	trans->set = set;
	trans->net = get_net(net);
	trans->seq = gc_seq;

	return trans;
}
EXPORT_SYMBOL_GPL(nft_trans_gc_alloc);

void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
{
	trans->priv[trans->count++] = priv;
}
EXPORT_SYMBOL_GPL(nft_trans_gc_elem_add);

static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
{
	spin_lock(&nf_tables_gc_list_lock);
	list_add_tail(&trans->list, &nf_tables_gc_list);
	spin_unlock(&nf_tables_gc_list_lock);

	schedule_work(&trans_gc_work);
}

static int nft_trans_gc_space(struct nft_trans_gc *trans)
{
	return NFT_TRANS_GC_BATCHCOUNT - trans->count;
}

struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
					      unsigned int gc_seq, gfp_t gfp)
{
	if (nft_trans_gc_space(gc))
		return gc;

	nft_trans_gc_queue_work(gc);

	return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
}
EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async);

void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
{
	if (trans->count == 0) {
		nft_trans_gc_destroy(trans);
		return;
	}

	nft_trans_gc_queue_work(trans);
}
EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done);

struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
{
	if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
		return NULL;

	if (nft_trans_gc_space(gc))
		return gc;

	call_rcu(&gc->rcu, nft_trans_gc_trans_free);

	return nft_trans_gc_alloc(gc->set, 0, gfp);
}
EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync);

void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
{
	WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));

	if (trans->count == 0) {
		nft_trans_gc_destroy(trans);
		return;
	}

	call_rcu(&trans->rcu, nft_trans_gc_trans_free);
}
EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done);

static void nf_tables_module_autoload_cleanup(struct net *net)
{
	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
@@ -6950,6 +7183,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
	struct nft_trans_elem *te;
	struct nft_chain *chain;
	struct nft_table *table;
	unsigned int gc_seq;
	int err;

	if (list_empty(&nft_net->commit_list)) {
@@ -7006,6 +7240,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
	while (++nft_net->base_seq == 0)
		;

	/* Bump gc counter, it becomes odd, this is the busy mark. */
	gc_seq = READ_ONCE(nft_net->gc_seq);
	WRITE_ONCE(nft_net->gc_seq, ++gc_seq);

	/* step 3. Start new generation, rules_gen_X now in use. */
	net->nft.gencursor = nft_gencursor_next(net);

@@ -7083,6 +7321,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
			nft_trans_destroy(trans);
			break;
		case NFT_MSG_DELSET:
			nft_trans_set(trans)->dead = 1;
			list_del_rcu(&nft_trans_set(trans)->list);
			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
					     NFT_MSG_DELSET, GFP_KERNEL);
@@ -7144,6 +7383,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
	}

	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);

	WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
	nf_tables_commit_release(net);

	return 0;
@@ -7265,6 +7506,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
		case NFT_MSG_DELSET:
			nft_use_inc_restore(&trans->ctx.table->use);
			nft_clear(trans->ctx.net, nft_trans_set(trans));
			if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
				nft_map_activate(&trans->ctx, nft_trans_set(trans));
			nft_trans_destroy(trans);
			break;
		case NFT_MSG_NEWSETELEM:
@@ -7279,7 +7522,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
		case NFT_MSG_DELSETELEM:
			te = (struct nft_trans_elem *)trans->data;

			nft_set_elem_activate(net, te->set, &te->elem);
			nft_setelem_data_activate(net, te->set, &te->elem);
			te->set->ops->activate(net, te->set, &te->elem);
			te->set->ndeact--;

@@ -7959,6 +8202,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
	list_for_each_entry_safe(set, ns, &table->sets, list) {
		list_del(&set->list);
		nft_use_dec(&table->use);
		if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
			nft_map_deactivate(&ctx, set);

		nft_set_destroy(&ctx, set);
	}
	list_for_each_entry_safe(obj, ne, &table->objects, list) {
@@ -7997,6 +8243,7 @@ static int __net_init nf_tables_init_net(struct net *net)
	mutex_init(&nft_net->commit_mutex);
	nft_net->base_seq = 1;
	nft_net->validate_state = NFT_VALIDATE_SKIP;
	nft_net->gc_seq = 0;

	return 0;
}
@@ -8023,10 +8270,16 @@ static void __net_exit nf_tables_exit_net(struct net *net)
	WARN_ON_ONCE(!list_empty(&nft_net->module_list));
}

static void nf_tables_exit_batch(struct list_head *net_exit_list)
{
	flush_work(&trans_gc_work);
}

static struct pernet_operations nf_tables_net_ops = {
	.init		= nf_tables_init_net,
	.pre_exit	= nf_tables_pre_exit_net,
	.exit		= nf_tables_exit_net,
	.exit_batch	= nf_tables_exit_batch,
	.id		= &nf_tables_net_id,
	.size		= sizeof(struct nftables_pernet),
};
@@ -8091,6 +8344,7 @@ static void __exit nf_tables_module_exit(void)
	nft_chain_filter_fini();
	nft_chain_route_fini();
	unregister_pernet_subsys(&nf_tables_net_ops);
	cancel_work_sync(&trans_gc_work);
	cancel_work_sync(&trans_destroy_work);
	rcu_barrier();
	rhltable_destroy(&nft_objname_ht);
+3 −2
Original line number Diff line number Diff line
@@ -270,13 +270,14 @@ static int nft_bitmap_init(const struct nft_set *set,
	return 0;
}

static void nft_bitmap_destroy(const struct nft_set *set)
static void nft_bitmap_destroy(const struct nft_ctx *ctx,
			       const struct nft_set *set)
{
	struct nft_bitmap *priv = nft_set_priv(set);
	struct nft_bitmap_elem *be, *n;

	list_for_each_entry_safe(be, n, &priv->list, head)
		nft_set_elem_destroy(set, be, true);
		nf_tables_set_elem_destroy(ctx, set, be);
}

static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
Loading