Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d1665820 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'master' of git://1984.lsi.us.es/nf-next



Pablo Neira Ayuso says:

====================
The following patchset contains Netfilter and IPVS updates for
your net-next tree, most relevantly they are:

* Add net namespace support to NFLOG, ULOG and ebt_ulog and NFQUEUE.
  The LOG and ebt_log target has been also adapted, but they still
  depend on the syslog netnamespace that seems to be missing, from
  Gao Feng.

* Don't lose indications of congestion in IPv6 fragmentation handling,
  from Hannes Frederic Sowa.i

* IPVS conversion to use RCU, including some code consolidation patches
  and optimizations, also some from Julian Anastasov.

* cpu fanout support for NFQUEUE, from Holger Eitzenberger.

* Better error reporting to userspace when dropping packets from
  all our _*_[xfrm|route]_me_harder functions, from Patrick McHardy.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 19952cc4 b8dd6a22
Loading
Loading
Loading
Loading
+0 −5
Original line number Diff line number Diff line
@@ -289,11 +289,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#endif
}

#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
extern struct proc_dir_entry *proc_net_netfilter;
#endif

#else /* !CONFIG_NETFILTER */
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
+34 −1
Original line number Diff line number Diff line
@@ -575,7 +575,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
	skb->_skb_refdst = (unsigned long)dst;
}

extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst);
extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
				bool force);

/**
 * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
 * @skb: buffer
 * @dst: dst entry
 *
 * Sets skb dst, assuming a reference was not taken on dst.
 * If dst entry is cached, we do not take reference and dst_release
 * will be avoided by refdst_drop. If dst entry is not cached, we take
 * reference, so that last dst_release can destroy the dst immediately.
 */
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
	__skb_dst_set_noref(skb, dst, false);
}

/**
 * skb_dst_set_noref_force - sets skb dst, without taking reference
 * @skb: buffer
 * @dst: dst entry
 *
 * Sets skb dst, assuming a reference was not taken on dst.
 * No reference is taken and no dst_release will be called. While for
 * cached dsts deferred reclaim is a basic feature, for entries that are
 * not cached it is caller's job to guarantee that last dst_release for
 * provided dst happens when nobody uses it, eg. after a RCU grace period.
 */
static inline void skb_dst_set_noref_force(struct sk_buff *skb,
					   struct dst_entry *dst)
{
	__skb_dst_set_noref(skb, dst, true);
}

/**
 * skb_dst_is_noref - Test if skb dst isn't refcounted
+88 −42
Original line number Diff line number Diff line
@@ -233,6 +233,21 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
	dst->ip = src->ip;
}

static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
				  const union nf_inet_addr *src)
{
#ifdef CONFIG_IP_VS_IPV6
	if (af == AF_INET6) {
		dst->in6 = src->in6;
		return;
	}
#endif
	dst->ip = src->ip;
	dst->all[1] = 0;
	dst->all[2] = 0;
	dst->all[3] = 0;
}

static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
				   const union nf_inet_addr *b)
{
@@ -344,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
#define LeaveFunction(level)   do {} while (0)
#endif

#define	IP_VS_WAIT_WHILE(expr)	while (expr) { cpu_relax(); }


/*
 *      The port number of FTP service (in network order).
@@ -566,20 +579,19 @@ struct ip_vs_conn_param {
 */
struct ip_vs_conn {
	struct hlist_node	c_list;         /* hashed list heads */
#ifdef CONFIG_NET_NS
	struct net              *net;           /* Name space */
#endif
	/* Protocol, addresses and port numbers */
	u16                     af;             /* address family */
	__be16                  cport;
	__be16                  vport;
	__be16                  dport;
	__u32                   fwmark;         /* Fire wall mark from skb */
	__be16                  vport;
	u16			af;		/* address family */
	union nf_inet_addr      caddr;          /* client address */
	union nf_inet_addr      vaddr;          /* virtual address */
	union nf_inet_addr      daddr;          /* destination address */
	volatile __u32          flags;          /* status flags */
	__u16                   protocol;       /* Which protocol (TCP/UDP) */
#ifdef CONFIG_NET_NS
	struct net              *net;           /* Name space */
#endif

	/* counter and timer */
	atomic_t		refcnt;		/* reference count */
@@ -593,6 +605,7 @@ struct ip_vs_conn {
						 * state transition triggerd
						 * synchronization
						 */
	__u32			fwmark;		/* Fire wall mark from skb */
	unsigned long		sync_endtime;	/* jiffies + sent_retries */

	/* Control members */
@@ -620,6 +633,8 @@ struct ip_vs_conn {
	const struct ip_vs_pe	*pe;
	char			*pe_data;
	__u8			pe_data_len;

	struct rcu_head		rcu_head;
};

/*
@@ -695,10 +710,9 @@ struct ip_vs_dest_user_kern {
 *	and the forwarding entries
 */
struct ip_vs_service {
	struct list_head	s_list;   /* for normal service table */
	struct list_head	f_list;   /* for fwmark-based service table */
	struct hlist_node	s_list;   /* for normal service table */
	struct hlist_node	f_list;   /* for fwmark-based service table */
	atomic_t		refcnt;   /* reference counter */
	atomic_t		usecnt;   /* use counter */

	u16			af;       /* address family */
	__u16			protocol; /* which protocol (TCP/UDP) */
@@ -713,25 +727,35 @@ struct ip_vs_service {
	struct list_head	destinations;  /* real server d-linked list */
	__u32			num_dests;     /* number of servers */
	struct ip_vs_stats      stats;         /* statistics for the service */
	struct ip_vs_app	*inc;	  /* bind conns to this app inc */

	/* for scheduling */
	struct ip_vs_scheduler	*scheduler;    /* bound scheduler object */
	rwlock_t		sched_lock;    /* lock sched_data */
	struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
	spinlock_t		sched_lock;    /* lock sched_data */
	void			*sched_data;   /* scheduler application data */

	/* alternate persistence engine */
	struct ip_vs_pe		*pe;
	struct ip_vs_pe __rcu	*pe;

	struct rcu_head		rcu_head;
};

/* Information for cached dst */
struct ip_vs_dest_dst {
	struct dst_entry	*dst_cache;	/* destination cache entry */
	u32			dst_cookie;
	union nf_inet_addr	dst_saddr;
	struct rcu_head		rcu_head;
};

/* In grace period after removing */
#define IP_VS_DEST_STATE_REMOVING	0x01
/*
 *	The real server destination forwarding entry
 *	with ip address, port number, and so on.
 */
struct ip_vs_dest {
	struct list_head	n_list;   /* for the dests in the service */
	struct list_head	d_list;   /* for table with all the dests */
	struct hlist_node	d_list;   /* for table with all the dests */

	u16			af;		/* address family */
	__be16			port;		/* port number of the server */
@@ -742,6 +766,7 @@ struct ip_vs_dest {

	atomic_t		refcnt;		/* reference counter */
	struct ip_vs_stats      stats;          /* statistics */
	unsigned long		state;		/* state flags */

	/* connection counters and thresholds */
	atomic_t		activeconns;	/* active connections */
@@ -752,10 +777,7 @@ struct ip_vs_dest {

	/* for destination cache */
	spinlock_t		dst_lock;	/* lock of dst_cache */
	struct dst_entry	*dst_cache;	/* destination cache entry */
	u32			dst_rtos;	/* RT_TOS(tos) for dst */
	u32			dst_cookie;
	union nf_inet_addr	dst_saddr;
	struct ip_vs_dest_dst __rcu *dest_dst;	/* cached dst info */

	/* for virtual service */
	struct ip_vs_service	*svc;		/* service it belongs to */
@@ -763,6 +785,10 @@ struct ip_vs_dest {
	__be16			vport;		/* virtual port number */
	union nf_inet_addr	vaddr;		/* virtual IP address */
	__u32			vfwmark;	/* firewall mark of service */

	struct list_head	t_list;		/* in dest_trash */
	struct rcu_head		rcu_head;
	unsigned int		in_rs_table:1;	/* we are in rs_table */
};


@@ -778,9 +804,13 @@ struct ip_vs_scheduler {
	/* scheduler initializing service */
	int (*init_service)(struct ip_vs_service *svc);
	/* scheduling service finish */
	int (*done_service)(struct ip_vs_service *svc);
	/* scheduler updating service */
	int (*update_service)(struct ip_vs_service *svc);
	void (*done_service)(struct ip_vs_service *svc);
	/* dest is linked */
	int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
	/* dest is unlinked */
	int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
	/* dest is updated */
	int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);

	/* selecting a server from the given service */
	struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
@@ -819,6 +849,7 @@ struct ip_vs_app {
	struct ip_vs_app	*app;		/* its real application */
	__be16			port;		/* port number in net order */
	atomic_t		usecnt;		/* usage counter */
	struct rcu_head		rcu_head;

	/*
	 * output hook: Process packet in inout direction, diff set for TCP.
@@ -881,6 +912,9 @@ struct ipvs_master_sync_state {
	struct netns_ipvs	*ipvs;
};

/* How much time to keep dests in trash */
#define IP_VS_DEST_TRASH_PERIOD		(120 * HZ)

/* IPVS in network namespace */
struct netns_ipvs {
	int			gen;		/* Generation */
@@ -892,7 +926,7 @@ struct netns_ipvs {
	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)

	struct list_head	rs_table[IP_VS_RTAB_SIZE];
	struct hlist_head	rs_table[IP_VS_RTAB_SIZE];
	/* ip_vs_app */
	struct list_head	app_list;
	/* ip_vs_proto */
@@ -904,7 +938,6 @@ struct netns_ipvs {
	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
	struct list_head	tcp_apps[TCP_APP_TAB_SIZE];
	spinlock_t		tcp_app_lock;
#endif
	/* ip_vs_proto_udp */
#ifdef CONFIG_IP_VS_PROTO_UDP
@@ -912,7 +945,6 @@ struct netns_ipvs {
	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
	struct list_head	udp_apps[UDP_APP_TAB_SIZE];
	spinlock_t		udp_app_lock;
#endif
	/* ip_vs_proto_sctp */
#ifdef CONFIG_IP_VS_PROTO_SCTP
@@ -921,7 +953,6 @@ struct netns_ipvs {
	#define SCTP_APP_TAB_MASK	(SCTP_APP_TAB_SIZE - 1)
	/* Hash table for SCTP application incarnations	 */
	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
	spinlock_t		sctp_app_lock;
#endif
	/* ip_vs_conn */
	atomic_t		conn_count;      /*  connection counter */
@@ -931,9 +962,10 @@ struct netns_ipvs {

	int			num_services;    /* no of virtual services */

	rwlock_t		rs_lock;         /* real services table */
	/* Trash for destinations */
	struct list_head	dest_trash;
	spinlock_t		dest_trash_lock;
	struct timer_list	dest_trash_timer; /* expiration timer */
	/* Service counters */
	atomic_t		ftpsvc_counter;
	atomic_t		nullsvc_counter;
@@ -1181,9 +1213,19 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
					     const struct ip_vs_iphdr *iph,
					     int inverse);

/* Get reference to gain full access to conn.
 * By default, RCU read-side critical sections have access only to
 * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference.
 */
static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
{
	return atomic_inc_not_zero(&cp->refcnt);
}

/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
{
	smp_mb__before_atomic_dec();
	atomic_dec(&cp->refcnt);
}
extern void ip_vs_conn_put(struct ip_vs_conn *cp);
@@ -1298,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);

void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
@@ -1346,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
				struct ip_vs_scheduler *scheduler);
extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
				   struct ip_vs_scheduler *sched);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
@@ -1366,16 +1407,11 @@ extern struct ip_vs_stats ip_vs_stats;
extern int sysctl_ip_vs_sync_ver;

extern struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
		  const union nf_inet_addr *vaddr, __be16 vport);

static inline void ip_vs_service_put(struct ip_vs_service *svc)
{
	atomic_dec(&svc->usecnt);
}

extern struct ip_vs_dest *
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
extern bool
ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
		       const union nf_inet_addr *daddr, __be16 dport);

extern int ip_vs_use_count_inc(void);
@@ -1388,8 +1424,18 @@ extern struct ip_vs_dest *
ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
		__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
		__u16 protocol, __u32 fwmark, __u32 flags);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
extern void ip_vs_try_bind_dest(struct ip_vs_conn *cp);

static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
{
	atomic_inc(&dest->refcnt);
}

static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
{
	smp_mb__before_atomic_dec();
	atomic_dec(&dest->refcnt);
}

/*
 *      IPVS sync daemon data and function prototypes
@@ -1428,7 +1474,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
			   struct ip_vs_protocol *pp, int offset,
			   unsigned int hooknum, struct ip_vs_iphdr *iph);
extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head);

#ifdef CONFIG_IP_VS_IPV6
extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+2 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <net/netns/ipv6.h>
#include <net/netns/sctp.h>
#include <net/netns/dccp.h>
#include <net/netns/netfilter.h>
#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
@@ -94,6 +95,7 @@ struct net {
	struct netns_dccp	dccp;
#endif
#ifdef CONFIG_NETFILTER
	struct netns_nf		nf;
	struct netns_xt		xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	struct netns_ct		ct;
+10 −4
Original line number Diff line number Diff line
@@ -49,12 +49,18 @@ struct nf_logger {
int nf_log_register(u_int8_t pf, struct nf_logger *logger);
void nf_log_unregister(struct nf_logger *logger);

int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger);
void nf_log_unbind_pf(u_int8_t pf);
void nf_log_set(struct net *net, u_int8_t pf,
		const struct nf_logger *logger);
void nf_log_unset(struct net *net, const struct nf_logger *logger);

int nf_log_bind_pf(struct net *net, u_int8_t pf,
		   const struct nf_logger *logger);
void nf_log_unbind_pf(struct net *net, u_int8_t pf);

/* Calls the registered backend logging function */
__printf(7, 8)
void nf_log_packet(u_int8_t pf,
__printf(8, 9)
void nf_log_packet(struct net *net,
		   u_int8_t pf,
		   unsigned int hooknum,
		   const struct sk_buff *skb,
		   const struct net_device *in,
Loading