Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0209f649 authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu: limit rcu_node leaf-level fanout



Some recent benchmarks have indicated possible lock contention on the
leaf-level rcu_node locks.  This commit therefore limits the number of
CPUs per leaf-level rcu_node structure to 16, in other words, there
can be at most 16 rcu_data structures fanning into a given rcu_node
structure.  Prior to this, the limit was 32 on 32-bit systems and 64 on
64-bit systems.

Note that the fanout of non-leaf rcu_node structures is unchanged.  The
organization of accesses to the rcu_node tree is such that references
to non-leaf rcu_node structures are much less frequent than to the
leaf structures.

Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
parent 121dfc4b
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -1869,8 +1869,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
{
	int i;

	for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
	for (i = NUM_RCU_LVLS - 1; i > 0; i--)
		rsp->levelspread[i] = CONFIG_RCU_FANOUT;
	rsp->levelspread[0] = RCU_FANOUT_LEAF;
}
#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
static void __init rcu_init_levelspread(struct rcu_state *rsp)
+25 −20
Original line number Diff line number Diff line
@@ -31,46 +31,51 @@
/*
 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
 * In theory, it should be possible to add more levels straightforwardly.
 * In practice, this has not been tested, so there is probably some
 * bug somewhere.
 * In practice, this did work well going from three levels to four.
 * Of course, your mileage may vary.
 */
#define MAX_RCU_LVLS 4
#define RCU_FANOUT	      (CONFIG_RCU_FANOUT)
#define RCU_FANOUT_SQ	      (RCU_FANOUT * RCU_FANOUT)
#define RCU_FANOUT_CUBE	      (RCU_FANOUT_SQ * RCU_FANOUT)
#define RCU_FANOUT_FOURTH     (RCU_FANOUT_CUBE * RCU_FANOUT)

#if NR_CPUS <= RCU_FANOUT
#if CONFIG_RCU_FANOUT > 16
#define RCU_FANOUT_LEAF       16
#else /* #if CONFIG_RCU_FANOUT > 16 */
#define RCU_FANOUT_LEAF       (CONFIG_RCU_FANOUT)
#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
#define RCU_FANOUT_1	      (RCU_FANOUT_LEAF)
#define RCU_FANOUT_2	      (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_3	      (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_4	      (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)

#if NR_CPUS <= RCU_FANOUT_1
#  define NUM_RCU_LVLS	      1
#  define NUM_RCU_LVL_0	      1
#  define NUM_RCU_LVL_1	      (NR_CPUS)
#  define NUM_RCU_LVL_2	      0
#  define NUM_RCU_LVL_3	      0
#  define NUM_RCU_LVL_4	      0
#elif NR_CPUS <= RCU_FANOUT_SQ
#elif NR_CPUS <= RCU_FANOUT_2
#  define NUM_RCU_LVLS	      2
#  define NUM_RCU_LVL_0	      1
#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
#  define NUM_RCU_LVL_2	      (NR_CPUS)
#  define NUM_RCU_LVL_3	      0
#  define NUM_RCU_LVL_4	      0
#elif NR_CPUS <= RCU_FANOUT_CUBE
#elif NR_CPUS <= RCU_FANOUT_3
#  define NUM_RCU_LVLS	      3
#  define NUM_RCU_LVL_0	      1
#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
#  define NUM_RCU_LVL_3	      NR_CPUS
#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
#  define NUM_RCU_LVL_3	      (NR_CPUS)
#  define NUM_RCU_LVL_4	      0
#elif NR_CPUS <= RCU_FANOUT_FOURTH
#elif NR_CPUS <= RCU_FANOUT_4
#  define NUM_RCU_LVLS	      4
#  define NUM_RCU_LVL_0	      1
#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE)
#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
#  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
#  define NUM_RCU_LVL_4	      NR_CPUS
#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
#  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
#  define NUM_RCU_LVL_4	      (NR_CPUS)
#else
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
#endif /* #if (NR_CPUS) <= RCU_FANOUT */
#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */

#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)