Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 394f4528 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge branch 'rcu/next' of...

parents 90a8a73c 3c2dcf2a
Loading
Loading
Loading
Loading
+128 −16
Original line number Diff line number Diff line
CONFIG_RCU_TRACE debugfs Files and Formats


The rcutree implementation of RCU provides debugfs trace output that
summarizes counters and state.  This information is useful for debugging
RCU itself, and can sometimes also help to debug abuses of RCU.
The following sections describe the debugfs files and formats.
The rcutree and rcutiny implementations of RCU provide debugfs trace
output that summarizes counters and state.  This information is useful for
debugging RCU itself, and can sometimes also help to debug abuses of RCU.
The following sections describe the debugfs files and formats, first
for rcutree and next for rcutiny.


Hierarchical RCU debugfs Files and Formats
CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats

This implementation of RCU provides three debugfs files under the
These implementations of RCU provides five debugfs files under the
top-level directory RCU: rcu/rcudata (which displays fields in struct
rcu_data), rcu/rcugp (which displays grace-period counters), and
rcu/rcuhier (which displays the struct rcu_node hierarchy).
rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of
rcu/rcudata), rcu/rcugp (which displays grace-period counters),
rcu/rcuhier (which displays the struct rcu_node hierarchy), and
rcu/rcu_pending (which displays counts of the reasons that the
rcu_pending() function decided that there was core RCU work to do).

The output of "cat rcu/rcudata" looks as follows:

@@ -130,7 +134,8 @@ o "ci" is the number of RCU callbacks that have been invoked for
	been registered in absence of CPU-hotplug activity.

o	"co" is the number of RCU callbacks that have been orphaned due to
	this CPU going offline.
	this CPU going offline.  These orphaned callbacks have been moved
	to an arbitrarily chosen online CPU.

o	"ca" is the number of RCU callbacks that have been adopted due to
	other CPUs going offline.  Note that ci+co-ca+ql is the number of
@@ -168,12 +173,12 @@ o "gpnum" is the number of grace periods that have started. It is

The output of "cat rcu/rcuhier" looks as follows, with very long lines:

c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0
c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
1/1 .>. 0:127 ^0    
3/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
3/3f .>. 0:5 ^0    2/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3    
rcu_bh:
c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0
c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
0/1 .>. 0:127 ^0    
0/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
0/3f .>. 0:5 ^0    0/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3
@@ -212,11 +217,6 @@ o "fqlh" is the number of calls to force_quiescent_state() that
	exited immediately (without even being counted in nfqs above)
	due to contention on ->fqslock.

o	"oqlen" is the number of callbacks on the "orphan" callback
	list.  RCU callbacks are placed on this list by CPUs going
	offline, and are "adopted" either by the CPU helping the outgoing
	CPU or by the next rcu_barrier*() call, whichever comes first.

o	Each element of the form "1/1 0:127 ^0" represents one struct
	rcu_node.  Each line represents one level of the hierarchy, from
	root to leaves.  It is best to think of the rcu_data structures
@@ -326,3 +326,115 @@ o "nn" is the number of times that this CPU needed nothing. Alert
	readers will note that the rcu "nn" number for a given CPU very
	closely matches the rcu_bh "np" number for that same CPU.  This
	is due to short-circuit evaluation in rcu_pending().


CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats

These implementations of RCU provides a single debugfs file under the
top-level directory RCU, namely rcu/rcudata, which displays fields in
rcu_bh_ctrlblk, rcu_sched_ctrlblk and, for CONFIG_TINY_PREEMPT_RCU,
rcu_preempt_ctrlblk.

The output of "cat rcu/rcudata" is as follows:

rcu_preempt: qlen=24 gp=1097669 g197/p197/c197 tasks=...
             ttb=. btg=no ntb=184 neb=0 nnb=183 j=01f7 bt=0274
             normal balk: nt=1097669 gt=0 bt=371 b=0 ny=25073378 nos=0
             exp balk: bt=0 nos=0
rcu_sched: qlen: 0
rcu_bh: qlen: 0

This is split into rcu_preempt, rcu_sched, and rcu_bh sections, with the
rcu_preempt section appearing only in CONFIG_TINY_PREEMPT_RCU builds.
The last three lines of the rcu_preempt section appear only in
CONFIG_RCU_BOOST kernel builds.  The fields are as follows:

o	"qlen" is the number of RCU callbacks currently waiting either
	for an RCU grace period or waiting to be invoked.  This is the
	only field present for rcu_sched and rcu_bh, due to the
	short-circuiting of grace period in those two cases.

o	"gp" is the number of grace periods that have completed.

o	"g197/p197/c197" displays the grace-period state, with the
	"g" number being the number of grace periods that have started
	(mod 256), the "p" number being the number of grace periods
	that the CPU has responded to (also mod 256), and the "c"
	number being the number of grace periods that have completed
	(once again mode 256).

	Why have both "gp" and "g"?  Because the data flowing into
	"gp" is only present in a CONFIG_RCU_TRACE kernel.

o	"tasks" is a set of bits.  The first bit is "T" if there are
	currently tasks that have recently blocked within an RCU
	read-side critical section, the second bit is "N" if any of the
	aforementioned tasks are blocking the current RCU grace period,
	and the third bit is "E" if any of the aforementioned tasks are
	blocking the current expedited grace period.  Each bit is "."
	if the corresponding condition does not hold.

o	"ttb" is a single bit.  It is "B" if any of the blocked tasks
	need to be priority boosted and "." otherwise.

o	"btg" indicates whether boosting has been carried out during
	the current grace period, with "exp" indicating that boosting
	is in progress for an expedited grace period, "no" indicating
	that boosting has not yet started for a normal grace period,
	"begun" indicating that boosting has bebug for a normal grace
	period, and "done" indicating that boosting has completed for
	a normal grace period.

o	"ntb" is the total number of tasks subjected to RCU priority boosting
	periods since boot.

o	"neb" is the number of expedited grace periods that have had
	to resort to RCU priority boosting since boot.

o	"nnb" is the number of normal grace periods that have had
	to resort to RCU priority boosting since boot.

o	"j" is the low-order 12 bits of the jiffies counter in hexadecimal.

o	"bt" is the low-order 12 bits of the value that the jiffies counter
	will have at the next time that boosting is scheduled to begin.

o	In the line beginning with "normal balk", the fields are as follows:

	o	"nt" is the number of times that the system balked from
		boosting because there were no blocked tasks to boost.
		Note that the system will balk from boosting even if the
		grace period is overdue when the currently running task
		is looping within an RCU read-side critical section.
		There is no point in boosting in this case, because
		boosting a running task won't make it run any faster.

	o	"gt" is the number of times that the system balked
		from boosting because, although there were blocked tasks,
		none of them were preventing the current grace period
		from completing.

	o	"bt" is the number of times that the system balked
		from boosting because boosting was already in progress.

	o	"b" is the number of times that the system balked from
		boosting because boosting had already completed for
		the grace period in question.

	o	"ny" is the number of times that the system balked from
		boosting because it was not yet time to start boosting
		the grace period in question.

	o	"nos" is the number of times that the system balked from
		boosting for inexplicable ("not otherwise specified")
		reasons.  This can actually happen due to races involving
		increments of the jiffies counter.

o	In the line beginning with "exp balk", the fields are as follows:

	o	"bt" is the number of times that the system balked from
		boosting because there were no blocked tasks to boost.

	o	"nos" is the number of times that the system balked from
		 boosting for inexplicable ("not otherwise specified")
		 reasons.
+8 −1
Original line number Diff line number Diff line
@@ -83,6 +83,12 @@ extern struct group_info init_groups;
 */
# define CAP_INIT_BSET  CAP_FULL_SET

#ifdef CONFIG_RCU_BOOST
#define INIT_TASK_RCU_BOOST()						\
	.rcu_boost_mutex = NULL,
#else
#define INIT_TASK_RCU_BOOST()
#endif
#ifdef CONFIG_TREE_PREEMPT_RCU
#define INIT_TASK_RCU_TREE_PREEMPT()					\
	.rcu_blocked_node = NULL,
@@ -94,7 +100,8 @@ extern struct group_info init_groups;
	.rcu_read_lock_nesting = 0,					\
	.rcu_read_unlock_special = 0,					\
	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),		\
	INIT_TASK_RCU_TREE_PREEMPT()
	INIT_TASK_RCU_TREE_PREEMPT()					\
	INIT_TASK_RCU_BOOST()
#else
#define INIT_TASK_RCU_PREEMPT(tsk)
#endif
+0 −5
Original line number Diff line number Diff line
@@ -241,11 +241,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
#define list_first_entry_rcu(ptr, type, member) \
	list_entry_rcu((ptr)->next, type, member)

#define __list_for_each_rcu(pos, head) \
	for (pos = rcu_dereference_raw(list_next_rcu(head)); \
		pos != (head); \
		pos = rcu_dereference_raw(list_next_rcu((pos)))

/**
 * list_for_each_entry_rcu	-	iterate over rcu list of given type
 * @pos:	the type * to use as a loop cursor.
+2 −2
Original line number Diff line number Diff line
@@ -47,6 +47,8 @@
extern int rcutorture_runnable; /* for sysctl */
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */

#define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
#define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
#define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
#define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))

@@ -66,7 +68,6 @@ extern void call_rcu_sched(struct rcu_head *head,
extern void synchronize_sched(void);
extern void rcu_barrier_bh(void);
extern void rcu_barrier_sched(void);
extern void synchronize_sched_expedited(void);
extern int sched_expedited_torture_stats(char *page);

static inline void __rcu_read_lock_bh(void)
@@ -118,7 +119,6 @@ static inline int rcu_preempt_depth(void)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */

/* Internal to kernel */
extern void rcu_init(void);
extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_check_callbacks(int cpu, int user);
+8 −5
Original line number Diff line number Diff line
@@ -27,7 +27,9 @@

#include <linux/cache.h>

#define rcu_init_sched()	do { } while (0)
static inline void rcu_init(void)
{
}

#ifdef CONFIG_TINY_RCU

@@ -58,6 +60,11 @@ static inline void synchronize_rcu_bh_expedited(void)
	synchronize_sched();
}

static inline void synchronize_sched_expedited(void)
{
	synchronize_sched();
}

#ifdef CONFIG_TINY_RCU

static inline void rcu_preempt_note_context_switch(void)
@@ -125,16 +132,12 @@ static inline void rcu_cpu_stall_reset(void)
}

#ifdef CONFIG_DEBUG_LOCK_ALLOC

extern int rcu_scheduler_active __read_mostly;
extern void rcu_scheduler_starting(void);

#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */

static inline void rcu_scheduler_starting(void)
{
}

#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */

#endif /* __LINUX_RCUTINY_H */
Loading