Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f4bc17cd authored by Julian Anastasov's avatar Julian Anastasov Committed by Patrick McHardy
Browse files

ipvs: netfilter connection tracking changes



	Add more code to IPVS to work with Netfilter connection
tracking and fix some problems.

- Allow IPVS to be compiled without connection tracking as in
2.6.35 and before. This can avoid keeping conntracks for all
IPVS connections because this costs memory. ip_vs_ftp still
depends on connection tracking and NAT as implemented for 2.6.36.

- Add sysctl var "conntrack" to enable connection tracking for
all IPVS connections. For loaded IPVS directors it needs
tuning of nf_conntrack_max limit.

- Add IP_VS_CONN_F_NFCT connection flag to request the connection
to use connection tracking. This allows user space to provide this
flag, for example, in dest->conn_flags. This can be useful to
request connection tracking per real server instead of forcing it
for all connections with the "conntrack" sysctl. This flag is
set currently only by ip_vs_ftp and of course by "conntrack" sysctl.

- Add ip_vs_nfct.c file to hold all connection tracking code,
by this way main code should not depend of netfilter conntrack
support.

- Return back the ip_vs_post_routing handler as in 2.6.35 and use
skb->ipvs_property=1 to allow IPVS to work without connection
tracking

Connection tracking:

- most of the code is already in 2.6.36-rc

- alter conntrack reply tuple for LVS-NAT connections when first packet
from client is forwarded and conntrack state is NEW or RELATED.
Additionally, alter reply for RELATED connections from real server,
again for packet in original direction.

- add IP_VS_XMIT_TUNNEL to confirm conntrack (without altering
reply) for LVS-TUN early because we want to call nf_reset. It is
needed because we add IPIP header and the original conntrack
should be preserved, not destroyed. The transmitted IPIP packets
can reuse same conntrack, so we do not set skb->ipvs_property.

- try to destroy conntrack when the IPVS connection is destroyed.
It is not fatal if conntrack disappears before that, it depends
on the used timers.

Fix problems from long time:

- add skb->ip_summed = CHECKSUM_NONE for the LVS-TUN transmitters

Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
parent 3575792e
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -90,10 +90,12 @@
#define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */

/* Flags that are not sent to backup server start from bit 16 */
#define IP_VS_CONN_F_NFCT	(1 << 16)	/* use netfilter conntrack */

/* Connection flags from destination that can be changed by user space */
#define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \
				IP_VS_CONN_F_ONE_PACKET | \
				IP_VS_CONN_F_NFCT | \
				0)

#define IP_VS_SCHEDNAME_MAXLEN	16
+43 −1
Original line number Diff line number Diff line
@@ -25,7 +25,9 @@
#include <linux/ip.h>
#include <linux/ipv6.h>			/* for struct ipv6hdr */
#include <net/ipv6.h>			/* for ipv6_addr_copy */

#ifdef CONFIG_IP_VS_NFCT
#include <net/netfilter/nf_conntrack.h>
#endif

/* Connections' size value needed by ip_vs_ctl.c */
extern int ip_vs_conn_tab_size;
@@ -798,6 +800,7 @@ extern int sysctl_ip_vs_expire_nodest_conn;
extern int sysctl_ip_vs_expire_quiescent_template;
extern int sysctl_ip_vs_sync_threshold[2];
extern int sysctl_ip_vs_nat_icmp_send;
extern int sysctl_ip_vs_conntrack;
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];

@@ -955,8 +958,47 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
	return csum_partial(diff, sizeof(diff), oldsum);
}

#ifdef CONFIG_IP_VS_NFCT
/*
 *      Netfilter connection tracking
 *      (from ip_vs_nfct.c)
 */
static inline int ip_vs_conntrack_enabled(void)
{
	return sysctl_ip_vs_conntrack;
}

extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
				   int outin);
extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp);
extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
				      struct ip_vs_conn *cp, u_int8_t proto,
				      const __be16 port, int from_rs);
extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);

#else

static inline int ip_vs_conntrack_enabled(void)
{
	return 0;
}

static inline void ip_vs_update_conntrack(struct sk_buff *skb,
					  struct ip_vs_conn *cp, int outin)
{
}

static inline int ip_vs_confirm_conntrack(struct sk_buff *skb,
					  struct ip_vs_conn *cp)
{
	return NF_ACCEPT;
}

static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
{
}
/* CONFIG_IP_VS_NFCT */
#endif

#endif /* __KERNEL__ */

+11 −2
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
#
menuconfig IP_VS
	tristate "IP virtual server support"
	depends on NET && INET && NETFILTER && NF_CONNTRACK
	depends on NET && INET && NETFILTER
	---help---
	  IP Virtual Server support will let you build a high-performance
	  virtual server based on cluster of two or more real servers. This
@@ -235,7 +235,8 @@ comment 'IPVS application helper'

config	IP_VS_FTP
  	tristate "FTP protocol helper"
        depends on IP_VS_PROTO_TCP && NF_NAT
        depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
	select IP_VS_NFCT
	---help---
	  FTP is a protocol that transfers IP address and/or port number in
	  the payload. In the virtual server via Network Address Translation,
@@ -247,4 +248,12 @@ config IP_VS_FTP
	  If you want to compile it in kernel, say Y. To compile it as a
	  module, choose M here. If unsure, say N.

config	IP_VS_NFCT
	bool "Netfilter connection tracking"
	depends on NF_CONNTRACK
	---help---
	  The Netfilter connection tracking support allows the IPVS
	  connection state to be exported to the Netfilter framework
	  for filtering purposes.

endif # IP_VS
+4 −1
Original line number Diff line number Diff line
@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o

ip_vs-extra_objs-y :=
ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o

ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
		ip_vs_est.o ip_vs_proto.o 				   \
		$(ip_vs_proto-objs-y)
		$(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)


# IPVS core
+13 −0
Original line number Diff line number Diff line
@@ -721,6 +721,9 @@ static void ip_vs_conn_expire(unsigned long data)
		if (cp->control)
			ip_vs_control_del(cp);

		if (cp->flags & IP_VS_CONN_F_NFCT)
			ip_vs_conn_drop_conntrack(cp);

		if (unlikely(cp->app != NULL))
			ip_vs_unbind_app(cp);
		ip_vs_unbind_dest(cp);
@@ -816,6 +819,16 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
	if (unlikely(pp && atomic_read(&pp->appcnt)))
		ip_vs_bind_app(cp, pp);

	/*
	 * Allow conntrack to be preserved. By default, conntrack
	 * is created and destroyed for every packet.
	 * Sometimes keeping conntrack can be useful for
	 * IP_VS_CONN_F_ONE_PACKET too.
	 */

	if (ip_vs_conntrack_enabled())
		cp->flags |= IP_VS_CONN_F_NFCT;

	/* Hash it in the ip_vs_conn_tab finally */
	ip_vs_conn_hash(cp);

Loading