Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 350e4f49 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull nmi-safe seq_buf printk update from Steven Rostedt:
 "This code is a fork from the trace-3.19 pull as it needed the
  trace_seq clean ups from that branch.

  This code solves the issue of performing stack dumps from NMI context.
  The issue is that printk() is not safe from NMI context as if the NMI
  were to trigger when a printk() was being performed, the NMI could
  deadlock from the printk() internal locks.  This has been seen in
  practice.

  With lots of review from Petr Mladek, this code went through several
  iterations, and we feel that it is now at a point of quality to be
  accepted into mainline.

  Here's what is contained in this patch set:

   - Creates a "seq_buf" generic buffer utility that allows a descriptor
     to be passed around where functions can write their own "printk()"
     formatted strings into it.  The generic version was pulled out of
     the trace_seq() code that was made specifically for tracing.

   - The seq_buf code was change to model the seq_file code.  I have a
     patch (not included for 3.19) that converts the seq_file.c code
     over to use seq_buf.c like the trace_seq.c code does.  This was
     done to make sure that seq_buf.c is compatible with seq_file.c.  I
     may try to get that patch in for 3.20.

   - The seq_buf.c file was moved to lib/ to remove it from being
     dependent on CONFIG_TRACING.

   - The printk() was updated to allow for a per_cpu "override" of the
     internal calls.  That is, instead of writing to the console, a call
     to printk() may do something else.  This made it easier to allow
     the NMI to change what printk() does in order to call dump_stack()
     without needing to update that code as well.

   - Finally, the dump_stack from all CPUs via NMI code was converted to
     use the seq_buf code.  The caller to trigger the NMI code would
     wait till all the NMIs finished, and then it would print the
     seq_buf data to the console safely from a non NMI context

  One added bonus is that this code also makes the NMI dump stack work
  on PREEMPT_RT kernels.  As printk() includes sleeping locks on
  PREEMPT_RT, printk() only writes to console if the console does not
  use any rt_mutex converted spin locks.  Which a lot do"

* tag 'trace-seq-buf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
  x86/nmi: Fix use of unallocated cpumask_var_t
  printk/percpu: Define printk_func when printk is not defined
  x86/nmi: Perform a safe NMI stack trace on all CPUs
  printk: Add per_cpu printk func to allow printk to be diverted
  seq_buf: Move the seq_buf code to lib/
  seq-buf: Make seq_buf_bprintf() conditional on CONFIG_BINARY_PRINTF
  tracing: Add seq_buf_get_buf() and seq_buf_commit() helper functions
  tracing: Have seq_buf use full buffer
  seq_buf: Add seq_buf_can_fit() helper function
  tracing: Add paranoid size check in trace_printk_seq()
  tracing: Use trace_seq_used() and seq_buf_used() instead of len
  tracing: Clean up tracing_fill_pipe_page()
  seq_buf: Create seq_buf_used() to find out how much was written
  tracing: Add a seq_buf_clear() helper and clear len and readpos in init
  tracing: Convert seq_buf fields to be like seq_file fields
  tracing: Convert seq_buf_path() to be like seq_path()
  tracing: Create seq_buf layer in trace_seq
parents c3280952 db086554
Loading
Loading
Loading
Loading
+86 −5
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/nmi.h>
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/seq_buf.h>

#ifdef CONFIG_HARDLOCKUP_DETECTOR
u64 hw_nmi_get_sample_period(int watchdog_thresh)
@@ -29,14 +30,35 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
#ifdef arch_trigger_all_cpu_backtrace
/* For reliability, we're prepared to waste bits here. */
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
static cpumask_t printtrace_mask;

#define NMI_BUF_SIZE		4096

struct nmi_seq_buf {
	unsigned char		buffer[NMI_BUF_SIZE];
	struct seq_buf		seq;
};

/* Safe printing in NMI context */
static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);

/* "in progress" flag of arch_trigger_all_cpu_backtrace */
static unsigned long backtrace_flag;

static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
{
	const char *buf = s->buffer + start;

	printk("%.*s", (end - start) + 1, buf);
}

void arch_trigger_all_cpu_backtrace(bool include_self)
{
	struct nmi_seq_buf *s;
	int len;
	int cpu;
	int i;
	int cpu = get_cpu();
	int this_cpu = get_cpu();

	if (test_and_set_bit(0, &backtrace_flag)) {
		/*
@@ -49,7 +71,17 @@ void arch_trigger_all_cpu_backtrace(bool include_self)

	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
	if (!include_self)
		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
		cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));

	cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask));
	/*
	 * Set up per_cpu seq_buf buffers that the NMIs running on the other
	 * CPUs will write to.
	 */
	for_each_cpu(cpu, to_cpumask(backtrace_mask)) {
		s = &per_cpu(nmi_print_seq, cpu);
		seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
	}

	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
		pr_info("sending NMI to %s CPUs:\n",
@@ -65,11 +97,58 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
		touch_softlockup_watchdog();
	}

	/*
	 * Now that all the NMIs have triggered, we can dump out their
	 * back traces safely to the console.
	 */
	for_each_cpu(cpu, &printtrace_mask) {
		int last_i = 0;

		s = &per_cpu(nmi_print_seq, cpu);
		len = seq_buf_used(&s->seq);
		if (!len)
			continue;

		/* Print line by line. */
		for (i = 0; i < len; i++) {
			if (s->buffer[i] == '\n') {
				print_seq_line(s, last_i, i);
				last_i = i + 1;
			}
		}
		/* Check if there was a partial line. */
		if (last_i < len) {
			print_seq_line(s, last_i, len - 1);
			pr_cont("\n");
		}
	}

	clear_bit(0, &backtrace_flag);
	smp_mb__after_atomic();
	put_cpu();
}

/*
 * It is not safe to call printk() directly from NMI handlers.
 * It may be fine if the NMI detected a lock up and we have no choice
 * but to do so, but doing a NMI on all other CPUs to get a back trace
 * can be done with a sysrq-l. We don't want that to lock up, which
 * can happen if the NMI interrupts a printk in progress.
 *
 * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
 * the content into a per cpu seq_buf buffer. Then when the NMIs are
 * all done, we can safely dump the contents of the seq_buf to a printk()
 * from a non NMI context.
 */
static int nmi_vprintk(const char *fmt, va_list args)
{
	struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
	unsigned int len = seq_buf_used(&s->seq);

	seq_buf_vprintf(&s->seq, fmt, args);
	return seq_buf_used(&s->seq) - len;
}

static int
arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
{
@@ -78,12 +157,14 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
	cpu = smp_processor_id();

	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
		static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
		printk_func_t printk_func_save = this_cpu_read(printk_func);

		arch_spin_lock(&lock);
		/* Replace printk to write into the NMI seq */
		this_cpu_write(printk_func, nmi_vprintk);
		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
		show_regs(regs);
		arch_spin_unlock(&lock);
		this_cpu_write(printk_func, printk_func_save);

		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
		return NMI_HANDLED;
	}
+4 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#include <linux/preempt.h>
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <linux/printk.h>
#include <linux/pfn.h>
#include <linux/init.h>

@@ -134,4 +135,7 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
	(typeof(type) __percpu *)__alloc_percpu(sizeof(type),		\
						__alignof__(type))

/* To avoid include hell, as printk can not declare this, we declare it here */
DECLARE_PER_CPU(printk_func_t, printk_func);

#endif /* __LINUX_PERCPU_H */
+2 −0
Original line number Diff line number Diff line
@@ -123,6 +123,8 @@ static inline __printf(1, 2) __cold
void early_printk(const char *s, ...) { }
#endif

typedef int(*printk_func_t)(const char *fmt, va_list args);

#ifdef CONFIG_PRINTK
asmlinkage __printf(5, 0)
int vprintk_emit(int facility, int level,
+136 −0
Original line number Diff line number Diff line
#ifndef _LINUX_SEQ_BUF_H
#define _LINUX_SEQ_BUF_H

#include <linux/fs.h>

/*
 * Trace sequences are used to allow a function to call several other functions
 * to create a string of data to use.
 */

/**
 * seq_buf - seq buffer structure
 * @buffer:	pointer to the buffer
 * @size:	size of the buffer
 * @len:	the amount of data inside the buffer
 * @readpos:	The next position to read in the buffer.
 */
struct seq_buf {
	char			*buffer;
	size_t			size;
	size_t			len;
	loff_t			readpos;
};

static inline void seq_buf_clear(struct seq_buf *s)
{
	s->len = 0;
	s->readpos = 0;
}

static inline void
seq_buf_init(struct seq_buf *s, unsigned char *buf, unsigned int size)
{
	s->buffer = buf;
	s->size = size;
	seq_buf_clear(s);
}

/*
 * seq_buf have a buffer that might overflow. When this happens
 * the len and size are set to be equal.
 */
static inline bool
seq_buf_has_overflowed(struct seq_buf *s)
{
	return s->len > s->size;
}

static inline void
seq_buf_set_overflow(struct seq_buf *s)
{
	s->len = s->size + 1;
}

/*
 * How much buffer is left on the seq_buf?
 */
static inline unsigned int
seq_buf_buffer_left(struct seq_buf *s)
{
	if (seq_buf_has_overflowed(s))
		return 0;

	return s->size - s->len;
}

/* How much buffer was written? */
static inline unsigned int seq_buf_used(struct seq_buf *s)
{
	return min(s->len, s->size);
}

/**
 * seq_buf_get_buf - get buffer to write arbitrary data to
 * @s: the seq_buf handle
 * @bufp: the beginning of the buffer is stored here
 *
 * Return the number of bytes available in the buffer, or zero if
 * there's no space.
 */
static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
{
	WARN_ON(s->len > s->size + 1);

	if (s->len < s->size) {
		*bufp = s->buffer + s->len;
		return s->size - s->len;
	}

	*bufp = NULL;
	return 0;
}

/**
 * seq_buf_commit - commit data to the buffer
 * @s: the seq_buf handle
 * @num: the number of bytes to commit
 *
 * Commit @num bytes of data written to a buffer previously acquired
 * by seq_buf_get.  To signal an error condition, or that the data
 * didn't fit in the available space, pass a negative @num value.
 */
static inline void seq_buf_commit(struct seq_buf *s, int num)
{
	if (num < 0) {
		seq_buf_set_overflow(s);
	} else {
		/* num must be negative on overflow */
		BUG_ON(s->len + num > s->size);
		s->len += num;
	}
}

extern __printf(2, 3)
int seq_buf_printf(struct seq_buf *s, const char *fmt, ...);
extern __printf(2, 0)
int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args);
extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s);
extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf,
			   int cnt);
extern int seq_buf_puts(struct seq_buf *s, const char *str);
extern int seq_buf_putc(struct seq_buf *s, unsigned char c);
extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len);
extern int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
			      unsigned int len);
extern int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc);

extern int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp,
			   int nmaskbits);

#ifdef CONFIG_BINARY_PRINTF
extern int
seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary);
#endif

#endif /* _LINUX_SEQ_BUF_H */
+23 −7
Original line number Diff line number Diff line
#ifndef _LINUX_TRACE_SEQ_H
#define _LINUX_TRACE_SEQ_H

#include <linux/fs.h>
#include <linux/seq_buf.h>

#include <asm/page.h>

@@ -12,19 +12,35 @@

struct trace_seq {
	unsigned char		buffer[PAGE_SIZE];
	unsigned int		len;
	unsigned int		readpos;
	struct seq_buf		seq;
	int			full;
};

static inline void
trace_seq_init(struct trace_seq *s)
{
	s->len = 0;
	s->readpos = 0;
	seq_buf_init(&s->seq, s->buffer, PAGE_SIZE);
	s->full = 0;
}

/**
 * trace_seq_used - amount of actual data written to buffer
 * @s: trace sequence descriptor
 *
 * Returns the amount of data written to the buffer.
 *
 * IMPORTANT!
 *
 * Use this instead of @s->seq.len if you need to pass the amount
 * of data from the buffer to another buffer (userspace, or what not).
 * The @s->seq.len on overflow is bigger than the buffer size and
 * using it can cause access to undefined memory.
 */
static inline int trace_seq_used(struct trace_seq *s)
{
	return seq_buf_used(&s->seq);
}

/**
 * trace_seq_buffer_ptr - return pointer to next location in buffer
 * @s: trace sequence descriptor
@@ -37,7 +53,7 @@ trace_seq_init(struct trace_seq *s)
static inline unsigned char *
trace_seq_buffer_ptr(struct trace_seq *s)
{
	return s->buffer + s->len;
	return s->buffer + seq_buf_used(&s->seq);
}

/**
@@ -49,7 +65,7 @@ trace_seq_buffer_ptr(struct trace_seq *s)
 */
static inline bool trace_seq_has_overflowed(struct trace_seq *s)
{
	return s->full || s->len > PAGE_SIZE - 1;
	return s->full || seq_buf_has_overflowed(&s->seq);
}

/*
Loading