Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ac3247ba authored by Harald Welte's avatar Harald Welte Committed by David S. Miller
Browse files

[NETFILTER]: connection tracking event notifiers



This adds a notifier chain based event mechanism for ip_conntrack state
changes.  As opposed to the previous implementations in patch-o-matic, we
do no longer need a field in the skb to achieve this.

Thanks to the valuable input from Patrick McHardy and Rusty on the idea
of a per_cpu implementation.

Signed-off-by: default avatarHarald Welte <laforge@netfilter.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent abc3bc58
Loading
Loading
Loading
Loading
+144 −0
Original line number Diff line number Diff line
@@ -65,6 +65,63 @@ enum ip_conntrack_status {

	/* Both together */
	IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),

	/* Connection is dying (removed from lists), can not be unset. */
	IPS_DYING_BIT = 9,
	IPS_DYING = (1 << IPS_DYING_BIT),
};

/* Connection tracking event bits */
enum ip_conntrack_events
{
	/* New conntrack */
	IPCT_NEW_BIT = 0,
	IPCT_NEW = (1 << IPCT_NEW_BIT),

	/* Expected connection */
	IPCT_RELATED_BIT = 1,
	IPCT_RELATED = (1 << IPCT_RELATED_BIT),

	/* Destroyed conntrack */
	IPCT_DESTROY_BIT = 2,
	IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),

	/* Timer has been refreshed */
	IPCT_REFRESH_BIT = 3,
	IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),

	/* Status has changed */
	IPCT_STATUS_BIT = 4,
	IPCT_STATUS = (1 << IPCT_STATUS_BIT),

	/* Update of protocol info */
	IPCT_PROTOINFO_BIT = 5,
	IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),

	/* Volatile protocol info */
	IPCT_PROTOINFO_VOLATILE_BIT = 6,
	IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),

	/* New helper for conntrack */
	IPCT_HELPER_BIT = 7,
	IPCT_HELPER = (1 << IPCT_HELPER_BIT),

	/* Update of helper info */
	IPCT_HELPINFO_BIT = 8,
	IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),

	/* Volatile helper info */
	IPCT_HELPINFO_VOLATILE_BIT = 9,
	IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),

	/* NAT info */
	IPCT_NATINFO_BIT = 10,
	IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
};

enum ip_conntrack_expect_events {
	IPEXP_NEW_BIT = 0,
	IPEXP_NEW = (1 << IPEXP_NEW_BIT),
};

#ifdef __KERNEL__
@@ -280,6 +337,11 @@ static inline int is_confirmed(struct ip_conntrack *ct)
	return test_bit(IPS_CONFIRMED_BIT, &ct->status);
}

static inline int is_dying(struct ip_conntrack *ct)
{
	return test_bit(IPS_DYING_BIT, &ct->status);
}

extern unsigned int ip_conntrack_htable_size;
 
struct ip_conntrack_stat
@@ -303,6 +365,88 @@ struct ip_conntrack_stat

#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)

#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
#include <linux/notifier.h>

struct ip_conntrack_ecache {
	struct ip_conntrack *ct;
	unsigned int events;
};
DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);

#define CONNTRACK_ECACHE(x)	(__get_cpu_var(ip_conntrack_ecache).x)
 
extern struct notifier_block *ip_conntrack_chain;
extern struct notifier_block *ip_conntrack_expect_chain;

static inline int ip_conntrack_register_notifier(struct notifier_block *nb)
{
	return notifier_chain_register(&ip_conntrack_chain, nb);
}

static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb)
{
	return notifier_chain_unregister(&ip_conntrack_chain, nb);
}

static inline int 
ip_conntrack_expect_register_notifier(struct notifier_block *nb)
{
	return notifier_chain_register(&ip_conntrack_expect_chain, nb);
}

static inline int
ip_conntrack_expect_unregister_notifier(struct notifier_block *nb)
{
	return notifier_chain_unregister(&ip_conntrack_expect_chain, nb);
}

static inline void 
ip_conntrack_event_cache(enum ip_conntrack_events event,
			 const struct sk_buff *skb)
{
	struct ip_conntrack_ecache *ecache = 
					&__get_cpu_var(ip_conntrack_ecache);

	if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) {
		if (net_ratelimit()) {
			printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n");
			dump_stack();
		}
	}
	ecache->events |= event;
}

extern void 
ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct);
extern void ip_conntrack_event_cache_init(const struct sk_buff *skb);

static inline void ip_conntrack_event(enum ip_conntrack_events event,
				      struct ip_conntrack *ct)
{
	if (is_confirmed(ct) && !is_dying(ct))
		notifier_call_chain(&ip_conntrack_chain, event, ct);
}

static inline void 
ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
			  struct ip_conntrack_expect *exp)
{
	notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
}
#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, 
					    const struct sk_buff *skb) {}
static inline void ip_conntrack_event(enum ip_conntrack_events event, 
				      struct ip_conntrack *ct) {}
static inline void ip_conntrack_deliver_cached_events_for(
						struct ip_conntrack *ct) {}
static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {}
static inline void 
ip_conntrack_expect_event(enum ip_conntrack_expect_events event, 
			  struct ip_conntrack_expect *exp) {}
#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */

#ifdef CONFIG_IP_NF_NAT_NEEDED
static inline int ip_nat_initialized(struct ip_conntrack *conntrack,
				     enum ip_nat_manip_type manip)
+13 −4
Original line number Diff line number Diff line
@@ -38,12 +38,21 @@ extern int __ip_conntrack_confirm(struct sk_buff **pskb);
/* Confirm a connection: returns NF_DROP if packet must be dropped. */
static inline int ip_conntrack_confirm(struct sk_buff **pskb)
{
	if ((*pskb)->nfct
	    && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct))
		return __ip_conntrack_confirm(pskb);
	return NF_ACCEPT;
	struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct;
	int ret = NF_ACCEPT;

	if (ct && !is_confirmed(ct))
		ret = __ip_conntrack_confirm(pskb);
	ip_conntrack_deliver_cached_events_for(ct);

	return ret;
}

#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
struct ip_conntrack_ecache;
extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec);
#endif

extern struct list_head *ip_conntrack_hash;
extern struct list_head ip_conntrack_expect_list;
extern rwlock_t ip_conntrack_lock;
+10 −0
Original line number Diff line number Diff line
@@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK
	  of packets, but this mark value is kept in the conntrack session
	  instead of the individual packets.
	
config IP_NF_CONNTRACK_EVENTS
	bool "Connection tracking events"
	depends on IP_NF_CONNTRACK
	help
	  If this option is enabled, the connection tracking code will
	  provide a notifier chain that can be used by other kernel code
	  to get notified about changes in the connection tracking state.
	  
	  IF unsure, say `N'.

config IP_NF_CT_PROTO_SCTP
	tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
	depends on IP_NF_CONNTRACK && EXPERIMENTAL
+117 −5
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/moduleparam.h>
#include <linux/notifier.h>

/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
   registrations, conntrack timers*/
@@ -49,7 +50,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/listhelp.h>

#define IP_CONNTRACK_VERSION	"2.1"
#define IP_CONNTRACK_VERSION	"2.2"

#if 0
#define DEBUGP printk
@@ -76,6 +77,81 @@ unsigned int ip_ct_log_invalid;
static LIST_HEAD(unconfirmed);
static int ip_conntrack_vmalloc;

#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
struct notifier_block *ip_conntrack_chain;
struct notifier_block *ip_conntrack_expect_chain;

DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);

static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache)
{
	if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
		notifier_call_chain(&ip_conntrack_chain, ecache->events,
				    ecache->ct);
	ecache->events = 0;
}

void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
{
	__deliver_cached_events(ecache);
}

/* Deliver all cached events for a particular conntrack. This is called
 * by code prior to async packet handling or freeing the skb */
void 
ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct)
{
	struct ip_conntrack_ecache *ecache = 
					&__get_cpu_var(ip_conntrack_ecache);

	if (!ct)
		return;

	if (ecache->ct == ct) {
		DEBUGP("ecache: delivering event for %p\n", ct);
		__deliver_cached_events(ecache);
	} else {
		if (net_ratelimit())
			printk(KERN_WARNING "ecache: want to deliver for %p, "
				"but cache has %p\n", ct, ecache->ct);
	}

	/* signalize that events have already been delivered */
	ecache->ct = NULL;
}

/* Deliver cached events for old pending events, if current conntrack != old */
void ip_conntrack_event_cache_init(const struct sk_buff *skb)
{
	struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct;
	struct ip_conntrack_ecache *ecache = 
					&__get_cpu_var(ip_conntrack_ecache);

	/* take care of delivering potentially old events */
	if (ecache->ct != ct) {
		enum ip_conntrack_info ctinfo;
		/* we have to check, since at startup the cache is NULL */
		if (likely(ecache->ct)) {
			DEBUGP("ecache: entered for different conntrack: "
			       "ecache->ct=%p, skb->nfct=%p. delivering "
			       "events\n", ecache->ct, ct);
			__deliver_cached_events(ecache);
			ip_conntrack_put(ecache->ct);
		} else {
			DEBUGP("ecache: entered for conntrack %p, "
				"cache was clean before\n", ct);
		}

		/* initialize for this conntrack/packet */
		ecache->ct = ip_conntrack_get(skb, &ctinfo);
		/* ecache->events cleared by __deliver_cached_devents() */
	} else {
		DEBUGP("ecache: re-entered for conntrack %p.\n", ct);
	}
}

#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */

DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);

void 
@@ -223,6 +299,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
	IP_NF_ASSERT(!timer_pending(&ct->timeout));

	set_bit(IPS_DYING_BIT, &ct->status);

	/* To make sure we don't get any weird locking issues here:
	 * destroy_conntrack() MUST NOT be called with a write lock
	 * to ip_conntrack_lock!!! -HW */
@@ -261,6 +339,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
{
	struct ip_conntrack *ct = (void *)ul_conntrack;

	ip_conntrack_event(IPCT_DESTROY, ct);
	write_lock_bh(&ip_conntrack_lock);
	/* Inside lock so preempt is disabled on module removal path.
	 * Otherwise we can get spurious warnings. */
@@ -374,6 +453,16 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
		set_bit(IPS_CONFIRMED_BIT, &ct->status);
		CONNTRACK_STAT_INC(insert);
		write_unlock_bh(&ip_conntrack_lock);
		if (ct->helper)
			ip_conntrack_event_cache(IPCT_HELPER, *pskb);
#ifdef CONFIG_IP_NF_NAT_NEEDED
		if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
		    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
			ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
#endif
		ip_conntrack_event_cache(master_ct(ct) ?
					 IPCT_RELATED : IPCT_NEW, *pskb);

		return NF_ACCEPT;
	}

@@ -607,7 +696,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
	struct ip_conntrack *ct;
	enum ip_conntrack_info ctinfo;
	struct ip_conntrack_protocol *proto;
	int set_reply;
	int set_reply = 0;
	int ret;

	/* Previously seen (loopback or untracked)?  Ignore. */
@@ -666,6 +755,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,

	IP_NF_ASSERT((*pskb)->nfct);

	ip_conntrack_event_cache_init(*pskb);

	ret = proto->packet(ct, *pskb, ctinfo);
	if (ret < 0) {
		/* Invalid: inverse of the return code tells
@@ -676,8 +767,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
		return -ret;
	}

	if (set_reply)
		set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
		ip_conntrack_event_cache(IPCT_STATUS, *pskb);

	return ret;
}
@@ -824,6 +915,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
		evict_oldest_expect(expect->master);

	ip_conntrack_expect_insert(expect);
	ip_conntrack_expect_event(IPEXP_NEW, expect);
	ret = 0;
out:
	write_unlock_bh(&ip_conntrack_lock);
@@ -861,8 +953,10 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
static inline int unhelp(struct ip_conntrack_tuple_hash *i,
			 const struct ip_conntrack_helper *me)
{
	if (tuplehash_to_ctrack(i)->helper == me)
	if (tuplehash_to_ctrack(i)->helper == me) {
 		ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
		tuplehash_to_ctrack(i)->helper = NULL;
	}
	return 0;
}

@@ -924,6 +1018,7 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
		if (del_timer(&ct->timeout)) {
			ct->timeout.expires = jiffies + extra_jiffies;
			add_timer(&ct->timeout);
			ip_conntrack_event_cache(IPCT_REFRESH, skb);
		}
		ct_add_counters(ct, ctinfo, skb);
		write_unlock_bh(&ip_conntrack_lock);
@@ -1012,6 +1107,23 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)

		ip_conntrack_put(ct);
	}

#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
	{
		/* we need to deliver all cached events in order to drop
		 * the reference counts */
		int cpu;
		for_each_cpu(cpu) {
			struct ip_conntrack_ecache *ecache = 
					&per_cpu(ip_conntrack_ecache, cpu);
			if (ecache->ct) {
				__ip_ct_deliver_cached_events(ecache);
				ip_conntrack_put(ecache->ct);
				ecache->ct = NULL;
			}
		}
	}
#endif
}

/* Fast function for those who don't want to parse /proc (and I don't
+8 −4
Original line number Diff line number Diff line
@@ -262,7 +262,8 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
}

/* We don't update if it's older than what we have. */
static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
			  struct sk_buff *skb)
{
	unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;

@@ -276,10 +277,13 @@ static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
			oldest = i;
	}

	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
	else if (oldest != NUM_SEQ_TO_REMEMBER)
		ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
	} else if (oldest != NUM_SEQ_TO_REMEMBER) {
		info->seq_aft_nl[dir][oldest] = nl_seq;
		ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
	}
}

static int help(struct sk_buff **pskb,
@@ -439,7 +443,7 @@ static int help(struct sk_buff **pskb,
	/* Now if this ends in \n, update ftp info.  Seq may have been
	 * adjusted by NAT code. */
	if (ends_in_nl)
		update_nl_seq(seq, ct_ftp_info,dir);
		update_nl_seq(seq, ct_ftp_info,dir, *pskb);
 out:
	spin_unlock_bh(&ip_ftp_lock);
	return ret;
Loading