Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 276472ea authored by Julian Anastasov's avatar Julian Anastasov Committed by Pablo Neira Ayuso
Browse files

ipvs: remove rs_lock by using RCU



rs_lock was used to protect rs_table (hash table)
from updaters (under global mutex) and readers (packet handlers).
We can remove rs_lock by using RCU lock for readers. Reclaiming
dest only with kfree_rcu is enough because the readers access
only fields from the ip_vs_dest structure.

Use hlist for rs_table.

As we are now using hlist_del_rcu, introduce in_rs_table
flag as replacement for the list_empty checks which do not
work with RCU. It is needed because only NAT dests are in
the rs_table.

Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent 363c97d7
Loading
Loading
Loading
Loading
+8 −6
Original line number Original line Diff line number Diff line
@@ -738,7 +738,7 @@ struct ip_vs_dest_dst {
 */
 */
struct ip_vs_dest {
struct ip_vs_dest {
	struct list_head	n_list;   /* for the dests in the service */
	struct list_head	n_list;   /* for the dests in the service */
	struct list_head	d_list;   /* for table with all the dests */
	struct hlist_node	d_list;   /* for table with all the dests */


	u16			af;		/* address family */
	u16			af;		/* address family */
	__be16			port;		/* port number of the server */
	__be16			port;		/* port number of the server */
@@ -767,6 +767,9 @@ struct ip_vs_dest {
	__be16			vport;		/* virtual port number */
	__be16			vport;		/* virtual port number */
	union nf_inet_addr	vaddr;		/* virtual IP address */
	union nf_inet_addr	vaddr;		/* virtual IP address */
	__u32			vfwmark;	/* firewall mark of service */
	__u32			vfwmark;	/* firewall mark of service */

	struct rcu_head		rcu_head;
	unsigned int		in_rs_table:1;	/* we are in rs_table */
};
};




@@ -897,7 +900,7 @@ struct netns_ipvs {
	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)


	struct list_head	rs_table[IP_VS_RTAB_SIZE];
	struct hlist_head	rs_table[IP_VS_RTAB_SIZE];
	/* ip_vs_app */
	/* ip_vs_app */
	struct list_head	app_list;
	struct list_head	app_list;
	/* ip_vs_proto */
	/* ip_vs_proto */
@@ -933,7 +936,6 @@ struct netns_ipvs {


	int			num_services;    /* no of virtual services */
	int			num_services;    /* no of virtual services */


	rwlock_t		rs_lock;         /* real services table */
	/* Trash for destinations */
	/* Trash for destinations */
	struct list_head	dest_trash;
	struct list_head	dest_trash;
	/* Service counters */
	/* Service counters */
@@ -1376,8 +1378,8 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
	atomic_dec(&svc->usecnt);
	atomic_dec(&svc->usecnt);
}
}


extern struct ip_vs_dest *
extern bool
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
		       const union nf_inet_addr *daddr, __be16 dport);
		       const union nf_inet_addr *daddr, __be16 dport);


extern int ip_vs_use_count_inc(void);
extern int ip_vs_use_count_inc(void);
+2 −3
Original line number Original line Diff line number Diff line
@@ -1161,8 +1161,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
					 sizeof(_ports), _ports, &iph);
					 sizeof(_ports), _ports, &iph);
		if (pptr == NULL)
		if (pptr == NULL)
			return NF_ACCEPT;	/* Not for me */
			return NF_ACCEPT;	/* Not for me */
		if (ip_vs_lookup_real_service(net, af, iph.protocol,
		if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
					      &iph.saddr,
					   pptr[0])) {
					   pptr[0])) {
			/*
			/*
			 * Notify the real server: there is no
			 * Notify the real server: there is no
+31 −57
Original line number Original line Diff line number Diff line
@@ -508,17 +508,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
		& IP_VS_RTAB_MASK;
		& IP_VS_RTAB_MASK;
}
}


/*
/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
 *	Hashes ip_vs_dest in rs_table by <proto,addr,port>.
static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 *	should be called with locked tables.
 */
static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
{
	unsigned int hash;
	unsigned int hash;


	if (!list_empty(&dest->d_list)) {
	if (dest->in_rs_table)
		return 0;
		return;
	}


	/*
	/*
	 *	Hash by proto,addr,port,
	 *	Hash by proto,addr,port,
@@ -526,60 +522,47 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
	 */
	 */
	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);


	list_add(&dest->d_list, &ipvs->rs_table[hash]);
	hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);

	dest->in_rs_table = 1;
	return 1;
}
}


/*
/* Unhash ip_vs_dest from rs_table. */
 *	UNhashes ip_vs_dest from rs_table.
static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
 *	should be called with locked tables.
 */
static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
{
{
	/*
	/*
	 * Remove it from the rs_table table.
	 * Remove it from the rs_table table.
	 */
	 */
	if (!list_empty(&dest->d_list)) {
	if (dest->in_rs_table) {
		list_del_init(&dest->d_list);
		hlist_del_rcu(&dest->d_list);
		dest->in_rs_table = 0;
	}
	}

	return 1;
}
}


/*
/* Check if real service by <proto,addr,port> is present */
 *	Lookup real service by <proto,addr,port> in the real service table.
bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
 */
			    const union nf_inet_addr *daddr, __be16 dport)
struct ip_vs_dest *
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
			  const union nf_inet_addr *daddr,
			  __be16 dport)
{
{
	struct netns_ipvs *ipvs = net_ipvs(net);
	struct netns_ipvs *ipvs = net_ipvs(net);
	unsigned int hash;
	unsigned int hash;
	struct ip_vs_dest *dest;
	struct ip_vs_dest *dest;


	/*
	/* Check for "full" addressed entries */
	 *	Check for "full" addressed entries
	 *	Return the first found entry
	 */
	hash = ip_vs_rs_hashkey(af, daddr, dport);
	hash = ip_vs_rs_hashkey(af, daddr, dport);


	read_lock(&ipvs->rs_lock);
	rcu_read_lock();
	list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
		if ((dest->af == af)
		if (dest->port == dport &&
		    && ip_vs_addr_equal(af, &dest->addr, daddr)
		    dest->af == af &&
		    && (dest->port == dport)
		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
		    && ((dest->protocol == protocol) ||
		    (dest->protocol == protocol || dest->vfwmark)) {
			dest->vfwmark)) {
			/* HIT */
			/* HIT */
			read_unlock(&ipvs->rs_lock);
			rcu_read_unlock();
			return dest;
			return true;
		}
		}
	}
	}
	read_unlock(&ipvs->rs_lock);
	rcu_read_unlock();


	return NULL;
	return false;
}
}


/*
/*
@@ -612,9 +595,6 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 * the backup synchronization daemon. It finds the
 * the backup synchronization daemon. It finds the
 * destination to be bound to the received connection
 * destination to be bound to the received connection
 * on the backup.
 * on the backup.
 *
 * ip_vs_lookup_real_service() looked promissing, but
 * seems not working as expected.
 */
 */
struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
				   const union nf_inet_addr *daddr,
				   const union nf_inet_addr *daddr,
@@ -715,7 +695,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
			__ip_vs_dst_cache_reset(dest);
			__ip_vs_dst_cache_reset(dest);
			__ip_vs_unbind_svc(dest);
			__ip_vs_unbind_svc(dest);
			free_percpu(dest->stats.cpustats);
			free_percpu(dest->stats.cpustats);
			kfree(dest);
			kfree_rcu(dest, rcu_head);
		}
		}
	}
	}


@@ -742,7 +722,7 @@ static void ip_vs_trash_cleanup(struct net *net)
		__ip_vs_dst_cache_reset(dest);
		__ip_vs_dst_cache_reset(dest);
		__ip_vs_unbind_svc(dest);
		__ip_vs_unbind_svc(dest);
		free_percpu(dest->stats.cpustats);
		free_percpu(dest->stats.cpustats);
		kfree(dest);
		kfree_rcu(dest, rcu_head);
	}
	}
}
}


@@ -807,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
		 *    Put the real service in rs_table if not present.
		 *    Put the real service in rs_table if not present.
		 *    For now only for NAT!
		 *    For now only for NAT!
		 */
		 */
		write_lock_bh(&ipvs->rs_lock);
		ip_vs_rs_hash(ipvs, dest);
		ip_vs_rs_hash(ipvs, dest);
		write_unlock_bh(&ipvs->rs_lock);
	}
	}
	atomic_set(&dest->conn_flags, conn_flags);
	atomic_set(&dest->conn_flags, conn_flags);


@@ -905,7 +883,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
	atomic_set(&dest->persistconns, 0);
	atomic_set(&dest->persistconns, 0);
	atomic_set(&dest->refcnt, 1);
	atomic_set(&dest->refcnt, 1);


	INIT_LIST_HEAD(&dest->d_list);
	INIT_HLIST_NODE(&dest->d_list);
	spin_lock_init(&dest->dst_lock);
	spin_lock_init(&dest->dst_lock);
	spin_lock_init(&dest->stats.lock);
	spin_lock_init(&dest->stats.lock);
	__ip_vs_update_dest(svc, dest, udest, 1);
	__ip_vs_update_dest(svc, dest, udest, 1);
@@ -1045,9 +1023,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
	/*
	/*
	 *  Remove it from the d-linked list with the real services.
	 *  Remove it from the d-linked list with the real services.
	 */
	 */
	write_lock_bh(&ipvs->rs_lock);
	ip_vs_rs_unhash(dest);
	ip_vs_rs_unhash(dest);
	write_unlock_bh(&ipvs->rs_lock);


	/*
	/*
	 *  Decrease the refcnt of the dest, and free the dest
	 *  Decrease the refcnt of the dest, and free the dest
@@ -1067,7 +1043,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
		   time, so the operation here is OK */
		   time, so the operation here is OK */
		atomic_dec(&dest->svc->refcnt);
		atomic_dec(&dest->svc->refcnt);
		free_percpu(dest->stats.cpustats);
		free_percpu(dest->stats.cpustats);
		kfree(dest);
		kfree_rcu(dest, rcu_head);
	} else {
	} else {
		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
			      "dest->refcnt=%d\n",
			      "dest->refcnt=%d\n",
@@ -3811,11 +3787,9 @@ int __net_init ip_vs_control_net_init(struct net *net)
	int idx;
	int idx;
	struct netns_ipvs *ipvs = net_ipvs(net);
	struct netns_ipvs *ipvs = net_ipvs(net);


	rwlock_init(&ipvs->rs_lock);

	/* Initialize rs_table */
	/* Initialize rs_table */
	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
		INIT_HLIST_HEAD(&ipvs->rs_table[idx]);


	INIT_LIST_HEAD(&ipvs->dest_trash);
	INIT_LIST_HEAD(&ipvs->dest_trash);
	atomic_set(&ipvs->ftpsvc_counter, 0);
	atomic_set(&ipvs->ftpsvc_counter, 0);
@@ -3892,7 +3866,7 @@ int __init ip_vs_control_init(void)


	EnterFunction(2);
	EnterFunction(2);


	/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
	/* Initialize svc_table, ip_vs_svc_fwm_table */
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);