Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 77f0379f authored by David S. Miller's avatar David S. Miller
Browse files


Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

A small batch with accumulated updates in nf-next, mostly IPVS updates,
they are:

1) Add 64-bits stats counters to IPVS, from Julian Anastasov.

2) Move NETFILTER_XT_MATCH_ADDRTYPE out of NETFILTER_ADVANCED as docker
seem to require this, from Anton Blanchard.

3) Use boolean instead of numeric value in set_match_v*(), from
coccinelle via Fengguang Wu.

4) Allows rescheduling of new connections in IPVS when port reuse is
detected, from Marcelo Ricardo Leitner.

5) Add missing bits to support arptables extensions from nft_compat,
from Arturo Borrero.

Patrick is preparing a large batch to enhance the set infrastructure,
named expressions among other things, that should follow up soon after
this batch.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 49b31e57 5f158939
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -22,6 +22,27 @@ backup_only - BOOLEAN
	If set, disable the director function while the server is
	in backup mode to avoid packet loops for DR/TUN methods.

conn_reuse_mode - INTEGER
	1 - default

	Controls how ipvs will deal with connections that are detected
	port reuse. It is a bitmap, with the values being:

	0: disable any special handling on port reuse. The new
	connection will be delivered to the same real server that was
	servicing the previous connection. This will effectively
	disable expire_nodest_conn.

	bit 1: enable rescheduling of new connections when it is safe.
	That is, whenever expire_nodest_conn and for TCP sockets, when
	the connection is in TIME_WAIT state (which is only possible if
	you use NAT mode).

	bit 2: it is bit 1 plus, for TCP connections, when connections
	are in FIN_WAIT state, as this is the last state seen by load
	balancer in Direct Routing mode. This bit helps on adding new
	real servers to a very busy cluster.

conntrack - BOOLEAN
	0 - disabled (default)
	not 0 - enabled
+44 −17
Original line number Diff line number Diff line
@@ -365,15 +365,15 @@ struct ip_vs_seq {

/* counters per cpu */
struct ip_vs_counters {
	__u32		conns;		/* connections scheduled */
	__u32		inpkts;		/* incoming packets */
	__u32		outpkts;	/* outgoing packets */
	__u64		conns;		/* connections scheduled */
	__u64		inpkts;		/* incoming packets */
	__u64		outpkts;	/* outgoing packets */
	__u64		inbytes;	/* incoming bytes */
	__u64		outbytes;	/* outgoing bytes */
};
/* Stats per cpu */
struct ip_vs_cpu_stats {
	struct ip_vs_counters   ustats;
	struct ip_vs_counters   cnt;
	struct u64_stats_sync   syncp;
};

@@ -383,23 +383,40 @@ struct ip_vs_estimator {

	u64			last_inbytes;
	u64			last_outbytes;
	u32			last_conns;
	u32			last_inpkts;
	u32			last_outpkts;

	u32			cps;
	u32			inpps;
	u32			outpps;
	u32			inbps;
	u32			outbps;
	u64			last_conns;
	u64			last_inpkts;
	u64			last_outpkts;

	u64			cps;
	u64			inpps;
	u64			outpps;
	u64			inbps;
	u64			outbps;
};

/*
 * IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user
 */
struct ip_vs_kstats {
	u64			conns;		/* connections scheduled */
	u64			inpkts;		/* incoming packets */
	u64			outpkts;	/* outgoing packets */
	u64			inbytes;	/* incoming bytes */
	u64			outbytes;	/* outgoing bytes */

	u64			cps;		/* current connection rate */
	u64			inpps;		/* current in packet rate */
	u64			outpps;		/* current out packet rate */
	u64			inbps;		/* current in byte rate */
	u64			outbps;		/* current out byte rate */
};

struct ip_vs_stats {
	struct ip_vs_stats_user	ustats;		/* statistics */
	struct ip_vs_kstats	kstats;		/* kernel statistics */
	struct ip_vs_estimator	est;		/* estimator */
	struct ip_vs_cpu_stats __percpu	*cpustats;	/* per cpu counters */
	spinlock_t		lock;		/* spin lock */
	struct ip_vs_stats_user	ustats0;	/* reset values */
	struct ip_vs_kstats	kstats0;	/* reset values */
};

struct dst_entry;
@@ -924,6 +941,7 @@ struct netns_ipvs {
	int			sysctl_nat_icmp_send;
	int			sysctl_pmtu_disc;
	int			sysctl_backup_only;
	int			sysctl_conn_reuse_mode;

	/* ip_vs_lblc */
	int			sysctl_lblc_expiration;
@@ -1042,6 +1060,11 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
	       ipvs->sysctl_backup_only;
}

static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
{
	return ipvs->sysctl_conn_reuse_mode;
}

#else

static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1109,6 +1132,11 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
	return 0;
}

static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
{
	return 1;
}

#endif

/* IPVS core functions
@@ -1388,8 +1416,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
void ip_vs_zero_estimator(struct ip_vs_stats *stats);
void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
			  struct ip_vs_stats *stats);
void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);

/* Various IPVS packet transmitters (from ip_vs_xmit.c) */
int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+6 −1
Original line number Diff line number Diff line
@@ -358,6 +358,8 @@ enum {

	IPVS_SVC_ATTR_PE_NAME,		/* name of ct retriever */

	IPVS_SVC_ATTR_STATS64,		/* nested attribute for service stats */

	__IPVS_SVC_ATTR_MAX,
};

@@ -387,6 +389,8 @@ enum {

	IPVS_DEST_ATTR_ADDR_FAMILY,	/* Address family of address */

	IPVS_DEST_ATTR_STATS64,		/* nested attribute for dest stats */

	__IPVS_DEST_ATTR_MAX,
};

@@ -410,7 +414,8 @@ enum {
/*
 * Attributes used to describe service or destination entry statistics
 *
 * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
 * Used inside nested attributes IPVS_SVC_ATTR_STATS, IPVS_DEST_ATTR_STATS,
 * IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64.
 */
enum {
	IPVS_STATS_ATTR_UNSPEC = 0,
+1 −1
Original line number Diff line number Diff line
@@ -951,7 +951,7 @@ comment "Xtables matches"

config NETFILTER_XT_MATCH_ADDRTYPE
	tristate '"addrtype" address type match support'
	depends on NETFILTER_ADVANCED
	default m if NETFILTER_ADVANCED=n
	---help---
	  This option allows you to match what routing thinks of an address,
	  eg. UNICAST, LOCAL, BROADCAST, ...
+50 −19
Original line number Diff line number Diff line
@@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
		struct ip_vs_service *svc;

		s = this_cpu_ptr(dest->stats.cpustats);
		s->ustats.inpkts++;
		u64_stats_update_begin(&s->syncp);
		s->ustats.inbytes += skb->len;
		s->cnt.inpkts++;
		s->cnt.inbytes += skb->len;
		u64_stats_update_end(&s->syncp);

		rcu_read_lock();
		svc = rcu_dereference(dest->svc);
		s = this_cpu_ptr(svc->stats.cpustats);
		s->ustats.inpkts++;
		u64_stats_update_begin(&s->syncp);
		s->ustats.inbytes += skb->len;
		s->cnt.inpkts++;
		s->cnt.inbytes += skb->len;
		u64_stats_update_end(&s->syncp);
		rcu_read_unlock();

		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
		s->ustats.inpkts++;
		u64_stats_update_begin(&s->syncp);
		s->ustats.inbytes += skb->len;
		s->cnt.inpkts++;
		s->cnt.inbytes += skb->len;
		u64_stats_update_end(&s->syncp);
	}
}
@@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
		struct ip_vs_service *svc;

		s = this_cpu_ptr(dest->stats.cpustats);
		s->ustats.outpkts++;
		u64_stats_update_begin(&s->syncp);
		s->ustats.outbytes += skb->len;
		s->cnt.outpkts++;
		s->cnt.outbytes += skb->len;
		u64_stats_update_end(&s->syncp);

		rcu_read_lock();
		svc = rcu_dereference(dest->svc);
		s = this_cpu_ptr(svc->stats.cpustats);
		s->ustats.outpkts++;
		u64_stats_update_begin(&s->syncp);
		s->ustats.outbytes += skb->len;
		s->cnt.outpkts++;
		s->cnt.outbytes += skb->len;
		u64_stats_update_end(&s->syncp);
		rcu_read_unlock();

		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
		s->ustats.outpkts++;
		u64_stats_update_begin(&s->syncp);
		s->ustats.outbytes += skb->len;
		s->cnt.outpkts++;
		s->cnt.outbytes += skb->len;
		u64_stats_update_end(&s->syncp);
	}
}
@@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
	struct ip_vs_cpu_stats *s;

	s = this_cpu_ptr(cp->dest->stats.cpustats);
	s->ustats.conns++;
	u64_stats_update_begin(&s->syncp);
	s->cnt.conns++;
	u64_stats_update_end(&s->syncp);

	s = this_cpu_ptr(svc->stats.cpustats);
	s->ustats.conns++;
	u64_stats_update_begin(&s->syncp);
	s->cnt.conns++;
	u64_stats_update_end(&s->syncp);

	s = this_cpu_ptr(ipvs->tot_stats.cpustats);
	s->ustats.conns++;
	u64_stats_update_begin(&s->syncp);
	s->cnt.conns++;
	u64_stats_update_end(&s->syncp);
}


@@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb,
	}
}

static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
					int conn_reuse_mode)
{
	/* Controlled (FTP DATA or persistence)? */
	if (cp->control)
		return false;

	switch (cp->protocol) {
	case IPPROTO_TCP:
		return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
			((conn_reuse_mode & 2) &&
			 (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
			 (cp->flags & IP_VS_CONN_F_NOOUTPUT));
	case IPPROTO_SCTP:
		return cp->state == IP_VS_SCTP_S_CLOSED;
	default:
		return false;
	}
}

/* Handle response packets: rewrite addresses and send away...
 */
static unsigned int
@@ -1585,6 +1611,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
	struct ip_vs_conn *cp;
	int ret, pkts;
	struct netns_ipvs *ipvs;
	int conn_reuse_mode;

	/* Already marked as IPVS request or reply? */
	if (skb->ipvs_property)
@@ -1653,9 +1680,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
	 */
	cp = pp->conn_in_get(af, skb, &iph, 0);

	if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
	    unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
	    is_new_conn(skb, &iph)) {
	conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
	if (conn_reuse_mode && !iph.fragoffs &&
	    is_new_conn(skb, &iph) && cp &&
	    ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
	      unlikely(!atomic_read(&cp->dest->weight))) ||
	     unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
		if (!atomic_read(&cp->n_control))
			ip_vs_conn_expire_now(cp);
		__ip_vs_conn_put(cp);
		cp = NULL;
Loading