Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2f3d771a authored by Julian Anastasov's avatar Julian Anastasov Committed by Simon Horman
Browse files

ipvs: do not use dest after ip_vs_dest_put in LBLC



commit c2a4ffb7 ("ipvs: convert lblc scheduler to rcu")
allows RCU readers to use dest after calling ip_vs_dest_put().
In the corner case it can race with ip_vs_dest_trash_expire()
which can release the dest while it is being returned to the
RCU readers as scheduling result.

To fix the problem do not allow en->dest to be replaced and
defer the ip_vs_dest_put() call by using RCU callback. Now
en->dest does not need to be RCU pointer.

Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent bcbde4c0
Loading
Loading
Loading
Loading
+31 −37
Original line number Diff line number Diff line
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
	struct hlist_node	list;
	int			af;		/* address family */
	union nf_inet_addr      addr;           /* destination IP address */
	struct ip_vs_dest __rcu	*dest;          /* real server (cache) */
	struct ip_vs_dest	*dest;          /* real server (cache) */
	unsigned long           lastuse;        /* last used time */
	struct rcu_head		rcu_head;
};
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = {
};
#endif

static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
static void ip_vs_lblc_rcu_free(struct rcu_head *head)
{
	struct ip_vs_dest *dest;
	struct ip_vs_lblc_entry *en = container_of(head,
						   struct ip_vs_lblc_entry,
						   rcu_head);

	hlist_del_rcu(&en->list);
	/*
	 * We don't kfree dest because it is referred either by its service
	 * or the trash dest list.
	 */
	dest = rcu_dereference_protected(en->dest, 1);
	ip_vs_dest_put(dest);
	kfree_rcu(en, rcu_head);
	ip_vs_dest_put(en->dest);
	kfree(en);
}

static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
{
	hlist_del_rcu(&en->list);
	call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
}

/*
 *	Returns hash value for IPVS LBLC entry
@@ -203,7 +204,11 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
	struct ip_vs_lblc_entry *en;

	en = ip_vs_lblc_get(dest->af, tbl, daddr);
	if (!en) {
	if (en) {
		if (en->dest == dest)
			return en;
		ip_vs_lblc_del(en);
	}
	en = kmalloc(sizeof(*en), GFP_ATOMIC);
	if (!en)
		return NULL;
@@ -213,20 +218,9 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
	en->lastuse = jiffies;

	ip_vs_dest_hold(dest);
		RCU_INIT_POINTER(en->dest, dest);
	en->dest = dest;

	ip_vs_lblc_hash(tbl, en);
	} else {
		struct ip_vs_dest *old_dest;

		old_dest = rcu_dereference_protected(en->dest, 1);
		if (old_dest != dest) {
			ip_vs_dest_put(old_dest);
			ip_vs_dest_hold(dest);
			/* No ordering constraints for refcnt */
			RCU_INIT_POINTER(en->dest, dest);
		}
	}

	return en;
}
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
	tbl->dead = 1;
	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
			ip_vs_lblc_free(en);
			ip_vs_lblc_del(en);
			atomic_dec(&tbl->entries);
		}
	}
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
					sysctl_lblc_expiration(svc)))
				continue;

			ip_vs_lblc_free(en);
			ip_vs_lblc_del(en);
			atomic_dec(&tbl->entries);
		}
		spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
				continue;

			ip_vs_lblc_free(en);
			ip_vs_lblc_del(en);
			atomic_dec(&tbl->entries);
			goal--;
		}
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
		 * free up entries from the trash at any time.
		 */

		dest = rcu_dereference(en->dest);
		dest = en->dest;
		if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
		    atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
			goto out;
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
{
	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
	unregister_pernet_subsys(&ip_vs_lblc_ops);
	synchronize_rcu();
	rcu_barrier();
}