Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 53b87627 authored by Pablo Neira Ayuso's avatar Pablo Neira Ayuso
Browse files

Merge branch 'master' of git://blackhole.kfki.hu/nf-next



Jozsef Kadlecsik says:

====================
ipset patches for nf-next

Please consider to apply the next bunch of patches for ipset. First
comes the small changes, then the bugfixes and at the end the RCU
related patches.

* Use MSEC_PER_SEC consistently instead of the number.
* Use SET_WITH_*() helpers to test set extensions from Sergey Popovich.
* Check extensions attributes before getting extensions from Sergey Popovich.
* Permit CIDR equal to the host address CIDR in IPv6 from Sergey Popovich.
* Make sure we always return line number on batch in the case of error
  from Sergey Popovich.
* Check CIDR value only when attribute is given from Sergey Popovich.
* Fix cidr handling for hash:*net* types, reported by Jonathan Johnson.
* Fix parallel resizing and listing of the same set so that the original
  set is kept for the whole dumping.
* Make sure listing doesn't grab a set which is just being destroyed.
* Remove rbtree from ip_set_hash_netiface.c in order to introduce RCU.
* Replace rwlock_t with spinlock_t in "struct ip_set", change the locking
  in the core and simplifications in the timeout routines.
* Introduce RCU locking in bitmap:* types with a slight modification in the
  logic on how an element is added.
* Introduce RCU locking in hash:* types. This is the most complex part of
  the changes.
* Introduce RCU locking in list type where standard rculist is used.
* Fix coding styles reported by checkpatch.pl.
====================

Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parents f09becc7 ca0f6a5c
Loading
Loading
Loading
Loading
+17 −12
Original line number Original line Diff line number Diff line
@@ -108,8 +108,13 @@ struct ip_set_counter {
	atomic64_t packets;
	atomic64_t packets;
};
};


struct ip_set_comment_rcu {
	struct rcu_head rcu;
	char str[0];
};

struct ip_set_comment {
struct ip_set_comment {
	char *str;
	struct ip_set_comment_rcu __rcu *c;
};
};


struct ip_set_skbinfo {
struct ip_set_skbinfo {
@@ -176,6 +181,9 @@ struct ip_set_type_variant {
	/* List elements */
	/* List elements */
	int (*list)(const struct ip_set *set, struct sk_buff *skb,
	int (*list)(const struct ip_set *set, struct sk_buff *skb,
		    struct netlink_callback *cb);
		    struct netlink_callback *cb);
	/* Keep listing private when resizing runs parallel */
	void (*uref)(struct ip_set *set, struct netlink_callback *cb,
		     bool start);


	/* Return true if "b" set is the same as "a"
	/* Return true if "b" set is the same as "a"
	 * according to the create set parameters */
	 * according to the create set parameters */
@@ -223,7 +231,7 @@ struct ip_set {
	/* The name of the set */
	/* The name of the set */
	char name[IPSET_MAXNAMELEN];
	char name[IPSET_MAXNAMELEN];
	/* Lock protecting the set data */
	/* Lock protecting the set data */
	rwlock_t lock;
	spinlock_t lock;
	/* References to the set */
	/* References to the set */
	u32 ref;
	u32 ref;
	/* The core set type */
	/* The core set type */
@@ -346,7 +354,6 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
	       (skbinfo->skbqueue &&
	       (skbinfo->skbqueue &&
		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
			     cpu_to_be16(skbinfo->skbqueue)));
			     cpu_to_be16(skbinfo->skbqueue)));

}
}


static inline void
static inline void
@@ -380,12 +387,12 @@ ip_set_init_counter(struct ip_set_counter *counter,


/* Netlink CB args */
/* Netlink CB args */
enum {
enum {
	IPSET_CB_NET = 0,
	IPSET_CB_NET = 0,	/* net namespace */
	IPSET_CB_DUMP,
	IPSET_CB_DUMP,		/* dump single set/all sets */
	IPSET_CB_INDEX,
	IPSET_CB_INDEX,		/* set index */
	IPSET_CB_ARG0,
	IPSET_CB_PRIVATE,	/* set private data */
	IPSET_CB_ARG0,		/* type specific */
	IPSET_CB_ARG1,
	IPSET_CB_ARG1,
	IPSET_CB_ARG2,
};
};


/* register and unregister set references */
/* register and unregister set references */
@@ -545,8 +552,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
	  .timeout = (set)->timeout }
	  .timeout = (set)->timeout }


#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))

#define IPSET_CONCAT(a, b)		a##b
#define IPSET_CONCAT(a, b)		a##b
#define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)
#define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)


+27 −11
Original line number Original line Diff line number Diff line
@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
	return nla_data(tb);
	return nla_data(tb);
}
}


/* Called from uadd only, protected by the set spinlock.
 * The kadt functions don't use the comment extensions in any way.
 */
static inline void
static inline void
ip_set_init_comment(struct ip_set_comment *comment,
ip_set_init_comment(struct ip_set_comment *comment,
		    const struct ip_set_ext *ext)
		    const struct ip_set_ext *ext)
{
{
	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
	size_t len = ext->comment ? strlen(ext->comment) : 0;
	size_t len = ext->comment ? strlen(ext->comment) : 0;


	if (unlikely(comment->str)) {
	if (unlikely(c)) {
		kfree(comment->str);
		kfree_rcu(c, rcu);
		comment->str = NULL;
		rcu_assign_pointer(comment->c, NULL);
	}
	}
	if (!len)
	if (!len)
		return;
		return;
	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
		len = IPSET_MAX_COMMENT_SIZE;
		len = IPSET_MAX_COMMENT_SIZE;
	comment->str = kzalloc(len + 1, GFP_ATOMIC);
	c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
	if (unlikely(!comment->str))
	if (unlikely(!c))
		return;
		return;
	strlcpy(comment->str, ext->comment, len + 1);
	strlcpy(c->str, ext->comment, len + 1);
	rcu_assign_pointer(comment->c, c);
}
}


/* Used only when dumping a set, protected by rcu_read_lock_bh() */
static inline int
static inline int
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
{
{
	if (!comment->str)
	struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);

	if (!c)
		return 0;
		return 0;
	return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
	return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}
}


/* Called from uadd/udel, flush or the garbage collectors protected
 * by the set spinlock.
 * Called when the set is destroyed and when there can't be any user
 * of the set data anymore.
 */
static inline void
static inline void
ip_set_comment_free(struct ip_set_comment *comment)
ip_set_comment_free(struct ip_set_comment *comment)
{
{
	if (unlikely(!comment->str))
	struct ip_set_comment_rcu *c;

	c = rcu_dereference_protected(comment->c, 1);
	if (unlikely(!c))
		return;
		return;
	kfree(comment->str);
	kfree_rcu(c, rcu);
	comment->str = NULL;
	rcu_assign_pointer(comment->c, NULL);
}
}


#endif
#endif
+11 −16
Original line number Original line Diff line number Diff line
@@ -40,38 +40,33 @@ ip_set_timeout_uget(struct nlattr *tb)
}
}


static inline bool
static inline bool
ip_set_timeout_test(unsigned long timeout)
ip_set_timeout_expired(unsigned long *t)
{
{
	return timeout == IPSET_ELEM_PERMANENT ||
	return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
	       time_is_after_jiffies(timeout);
}

static inline bool
ip_set_timeout_expired(unsigned long *timeout)
{
	return *timeout != IPSET_ELEM_PERMANENT &&
	       time_is_before_jiffies(*timeout);
}
}


static inline void
static inline void
ip_set_timeout_set(unsigned long *timeout, u32 t)
ip_set_timeout_set(unsigned long *timeout, u32 value)
{
{
	if (!t) {
	unsigned long t;

	if (!value) {
		*timeout = IPSET_ELEM_PERMANENT;
		*timeout = IPSET_ELEM_PERMANENT;
		return;
		return;
	}
	}


	*timeout = msecs_to_jiffies(t * 1000) + jiffies;
	t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
	if (*timeout == IPSET_ELEM_PERMANENT)
	if (t == IPSET_ELEM_PERMANENT)
		/* Bingo! :-) */
		/* Bingo! :-) */
		(*timeout)--;
		t--;
	*timeout = t;
}
}


static inline u32
static inline u32
ip_set_timeout_get(unsigned long *timeout)
ip_set_timeout_get(unsigned long *timeout)
{
{
	return *timeout == IPSET_ELEM_PERMANENT ? 0 :
	return *timeout == IPSET_ELEM_PERMANENT ? 0 :
		jiffies_to_msecs(*timeout - jiffies)/1000;
		jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
}
}


#endif	/* __KERNEL__ */
#endif	/* __KERNEL__ */
+3 −3
Original line number Original line Diff line number Diff line
@@ -15,12 +15,12 @@
/* The protocol version */
/* The protocol version */
#define IPSET_PROTOCOL		6
#define IPSET_PROTOCOL		6


/* The maximum permissible comment length we will accept over netlink */
#define IPSET_MAX_COMMENT_SIZE	255

/* The max length of strings including NUL: set and type identifiers */
/* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN	32
#define IPSET_MAXNAMELEN	32


/* The maximum permissible comment length we will accept over netlink */
#define IPSET_MAX_COMMENT_SIZE	255

/* Message types and commands */
/* Message types and commands */
enum ipset_cmd {
enum ipset_cmd {
	IPSET_CMD_NONE,
	IPSET_CMD_NONE,
+29 −15
Original line number Original line Diff line number Diff line
@@ -144,10 +144,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,


	if (ret == IPSET_ADD_FAILED) {
	if (ret == IPSET_ADD_FAILED) {
		if (SET_WITH_TIMEOUT(set) &&
		if (SET_WITH_TIMEOUT(set) &&
		    ip_set_timeout_expired(ext_timeout(x, set)))
		    ip_set_timeout_expired(ext_timeout(x, set))) {
			ret = 0;
			ret = 0;
		else if (!(flags & IPSET_FLAG_EXIST))
		} else if (!(flags & IPSET_FLAG_EXIST)) {
			set_bit(e->id, map->members);
			return -IPSET_ERR_EXIST;
			return -IPSET_ERR_EXIST;
		}
		/* Element is re-added, cleanup extensions */
		/* Element is re-added, cleanup extensions */
		ip_set_ext_destroy(set, x);
		ip_set_ext_destroy(set, x);
	}
	}
@@ -165,6 +167,10 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
		ip_set_init_comment(ext_comment(x, set), ext);
		ip_set_init_comment(ext_comment(x, set), ext);
	if (SET_WITH_SKBINFO(set))
	if (SET_WITH_SKBINFO(set))
		ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
		ip_set_init_skbinfo(ext_skbinfo(x, set), ext);

	/* Activate element */
	set_bit(e->id, map->members);

	return 0;
	return 0;
}
}


@@ -203,10 +209,13 @@ mtype_list(const struct ip_set *set,
	struct nlattr *adt, *nested;
	struct nlattr *adt, *nested;
	void *x;
	void *x;
	u32 id, first = cb->args[IPSET_CB_ARG0];
	u32 id, first = cb->args[IPSET_CB_ARG0];
	int ret = 0;


	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
	if (!adt)
	if (!adt)
		return -EMSGSIZE;
		return -EMSGSIZE;
	/* Extensions may be replaced */
	rcu_read_lock();
	for (; cb->args[IPSET_CB_ARG0] < map->elements;
	for (; cb->args[IPSET_CB_ARG0] < map->elements;
	     cb->args[IPSET_CB_ARG0]++) {
	     cb->args[IPSET_CB_ARG0]++) {
		id = cb->args[IPSET_CB_ARG0];
		id = cb->args[IPSET_CB_ARG0];
@@ -222,8 +231,10 @@ mtype_list(const struct ip_set *set,
		if (!nested) {
		if (!nested) {
			if (id == first) {
			if (id == first) {
				nla_nest_cancel(skb, adt);
				nla_nest_cancel(skb, adt);
				return -EMSGSIZE;
				ret = -EMSGSIZE;
			} else
				goto out;
			}

			goto nla_put_failure;
			goto nla_put_failure;
		}
		}
		if (mtype_do_list(skb, map, id, set->dsize))
		if (mtype_do_list(skb, map, id, set->dsize))
@@ -238,16 +249,18 @@ mtype_list(const struct ip_set *set,
	/* Set listing finished */
	/* Set listing finished */
	cb->args[IPSET_CB_ARG0] = 0;
	cb->args[IPSET_CB_ARG0] = 0;


	return 0;
	goto out;


nla_put_failure:
nla_put_failure:
	nla_nest_cancel(skb, nested);
	nla_nest_cancel(skb, nested);
	if (unlikely(id == first)) {
	if (unlikely(id == first)) {
		cb->args[IPSET_CB_ARG0] = 0;
		cb->args[IPSET_CB_ARG0] = 0;
		return -EMSGSIZE;
		ret = -EMSGSIZE;
	}
	}
	ipset_nest_end(skb, adt);
	ipset_nest_end(skb, adt);
	return 0;
out:
	rcu_read_unlock();
	return ret;
}
}


static void
static void
@@ -259,8 +272,9 @@ mtype_gc(unsigned long ul_set)
	u32 id;
	u32 id;


	/* We run parallel with other readers (test element)
	/* We run parallel with other readers (test element)
	 * but adding/deleting new entries is locked out */
	 * but adding/deleting new entries is locked out
	read_lock_bh(&set->lock);
	 */
	spin_lock_bh(&set->lock);
	for (id = 0; id < map->elements; id++)
	for (id = 0; id < map->elements; id++)
		if (mtype_gc_test(id, map, set->dsize)) {
		if (mtype_gc_test(id, map, set->dsize)) {
			x = get_ext(set, map, id);
			x = get_ext(set, map, id);
@@ -269,7 +283,7 @@ mtype_gc(unsigned long ul_set)
				ip_set_ext_destroy(set, x);
				ip_set_ext_destroy(set, x);
			}
			}
		}
		}
	read_unlock_bh(&set->lock);
	spin_unlock_bh(&set->lock);


	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
	add_timer(&map->gc);
	add_timer(&map->gc);
Loading