Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 22c047cc authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

[NET]: Hashed spinlocks in net/ipv4/route.c



- Locking abstraction
- Spinlocks moved out of rt hash table : Less memory (50%) used by rt 
  hash table. it's a win even on UP.
- Sizing of spinlocks table depends on NR_CPUS

Signed-off-by: default avatarEric Dumazet <dada1@cosmosbay.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f0e36f8c
Loading
Loading
Loading
Loading
+47 −19
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@
 *		Marc Boucher	:	routing by fwmark
 *	Robert Olsson		:	Added rt_cache statistics
 *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
 *	Eric Dumazet		:	hashed spinlocks
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
@@ -201,8 +202,37 @@ __u8 ip_tos2prio[16] = {

struct rt_hash_bucket {
	struct rtable	*chain;
	spinlock_t	lock;
} __attribute__((__aligned__(8)));
};
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
/*
 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
 * The size of this table is a power of two and depends on the number of CPUS.
 */
#if NR_CPUS >= 32
#define RT_HASH_LOCK_SZ	4096
#elif NR_CPUS >= 16
#define RT_HASH_LOCK_SZ	2048
#elif NR_CPUS >= 8
#define RT_HASH_LOCK_SZ	1024
#elif NR_CPUS >= 4
#define RT_HASH_LOCK_SZ	512
#else
#define RT_HASH_LOCK_SZ	256
#endif

static spinlock_t	*rt_hash_locks;
# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
# define rt_hash_lock_init()	{ \
		int i; \
		rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
		if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
		for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
			spin_lock_init(&rt_hash_locks[i]); \
		}
#else
# define rt_hash_lock_addr(slot) NULL
# define rt_hash_lock_init()
#endif

static struct rt_hash_bucket 	*rt_hash_table;
static unsigned			rt_hash_mask;
@@ -587,7 +617,7 @@ static void rt_check_expire(unsigned long dummy)
		i = (i + 1) & rt_hash_mask;
		rthp = &rt_hash_table[i].chain;

		spin_lock(&rt_hash_table[i].lock);
		spin_lock(rt_hash_lock_addr(i));
		while ((rth = *rthp) != NULL) {
			if (rth->u.dst.expires) {
				/* Entry is expired even if it is in use */
@@ -620,7 +650,7 @@ static void rt_check_expire(unsigned long dummy)
 			rt_free(rth);
#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
		}
		spin_unlock(&rt_hash_table[i].lock);
		spin_unlock(rt_hash_lock_addr(i));

		/* Fallback loop breaker. */
		if (time_after(jiffies, now))
@@ -643,11 +673,11 @@ static void rt_run_flush(unsigned long dummy)
	get_random_bytes(&rt_hash_rnd, 4);

	for (i = rt_hash_mask; i >= 0; i--) {
		spin_lock_bh(&rt_hash_table[i].lock);
		spin_lock_bh(rt_hash_lock_addr(i));
		rth = rt_hash_table[i].chain;
		if (rth)
			rt_hash_table[i].chain = NULL;
		spin_unlock_bh(&rt_hash_table[i].lock);
		spin_unlock_bh(rt_hash_lock_addr(i));

		for (; rth; rth = next) {
			next = rth->u.rt_next;
@@ -780,7 +810,7 @@ static int rt_garbage_collect(void)

			k = (k + 1) & rt_hash_mask;
			rthp = &rt_hash_table[k].chain;
			spin_lock_bh(&rt_hash_table[k].lock);
			spin_lock_bh(rt_hash_lock_addr(k));
			while ((rth = *rthp) != NULL) {
				if (!rt_may_expire(rth, tmo, expire)) {
					tmo >>= 1;
@@ -812,7 +842,7 @@ static int rt_garbage_collect(void)
				goal--;
#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
			}
			spin_unlock_bh(&rt_hash_table[k].lock);
			spin_unlock_bh(rt_hash_lock_addr(k));
			if (goal <= 0)
				break;
		}
@@ -882,7 +912,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)

	rthp = &rt_hash_table[hash].chain;

	spin_lock_bh(&rt_hash_table[hash].lock);
	spin_lock_bh(rt_hash_lock_addr(hash));
	while ((rth = *rthp) != NULL) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
		if (!(rth->u.dst.flags & DST_BALANCED) &&
@@ -908,7 +938,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
			rth->u.dst.__use++;
			dst_hold(&rth->u.dst);
			rth->u.dst.lastuse = now;
			spin_unlock_bh(&rt_hash_table[hash].lock);
			spin_unlock_bh(rt_hash_lock_addr(hash));

			rt_drop(rt);
			*rp = rth;
@@ -949,7 +979,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
		int err = arp_bind_neighbour(&rt->u.dst);
		if (err) {
			spin_unlock_bh(&rt_hash_table[hash].lock);
			spin_unlock_bh(rt_hash_lock_addr(hash));

			if (err != -ENOBUFS) {
				rt_drop(rt);
@@ -990,7 +1020,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
	}
#endif
	rt_hash_table[hash].chain = rt;
	spin_unlock_bh(&rt_hash_table[hash].lock);
	spin_unlock_bh(rt_hash_lock_addr(hash));
	*rp = rt;
	return 0;
}
@@ -1058,7 +1088,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
{
	struct rtable **rthp;

	spin_lock_bh(&rt_hash_table[hash].lock);
	spin_lock_bh(rt_hash_lock_addr(hash));
	ip_rt_put(rt);
	for (rthp = &rt_hash_table[hash].chain; *rthp;
	     rthp = &(*rthp)->u.rt_next)
@@ -1067,7 +1097,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
			rt_free(rt);
			break;
		}
	spin_unlock_bh(&rt_hash_table[hash].lock);
	spin_unlock_bh(rt_hash_lock_addr(hash));
}

void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
@@ -3073,7 +3103,7 @@ __setup("rhash_entries=", set_rhash_entries);

int __init ip_rt_init(void)
{
	int i, order, goal, rc = 0;
	int order, goal, rc = 0;

	rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
			     (jiffies ^ (jiffies >> 7)));
@@ -3122,10 +3152,8 @@ int __init ip_rt_init(void)
		/* NOTHING */;

	rt_hash_mask--;
	for (i = 0; i <= rt_hash_mask; i++) {
		spin_lock_init(&rt_hash_table[i].lock);
		rt_hash_table[i].chain = NULL;
	}
	memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
	rt_hash_lock_init();

	ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
	ip_rt_max_size = (rt_hash_mask + 1) * 16;