Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5c16d2c8 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge branch 'tip/perf/ringbuffer-2' of...

Merge branch 'tip/perf/ringbuffer-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into perf/urgent
parents 5df414c6 b8b2663b
Loading
Loading
Loading
Loading
+0 −12
Original line number Diff line number Diff line
@@ -62,18 +62,6 @@ enum ring_buffer_type {
unsigned ring_buffer_event_length(struct ring_buffer_event *event);
void *ring_buffer_event_data(struct ring_buffer_event *event);

/**
 * ring_buffer_event_time_delta - return the delta timestamp of the event
 * @event: the event to get the delta timestamp of
 *
 * The delta timestamp is the 27 bit timestamp since the last event.
 */
static inline unsigned
ring_buffer_event_time_delta(struct ring_buffer_event *event)
{
	return event->time_delta;
}

/*
 * ring_buffer_discard_commit will remove an event that has not
 *   ben committed yet. If this is used, then ring_buffer_unlock_commit
+172 −163
Original line number Diff line number Diff line
@@ -224,6 +224,9 @@ enum {
	RB_LEN_TIME_STAMP = 16,
};

#define skip_time_extend(event) \
	((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))

static inline int rb_null_event(struct ring_buffer_event *event)
{
	return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
	return length + RB_EVNT_HDR_SIZE;
}

/* inline for ring buffer fast paths */
static unsigned
/*
 * Return the length of the given event. Will return
 * the length of the time extend if the event is a
 * time extend.
 */
static inline unsigned
rb_event_length(struct ring_buffer_event *event)
{
	switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
	return 0;
}

/*
 * Return total length of time extend and data,
 *   or just the event length for all other events.
 */
static inline unsigned
rb_event_ts_length(struct ring_buffer_event *event)
{
	unsigned len = 0;

	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
		/* time extends include the data event after it */
		len = RB_LEN_TIME_EXTEND;
		event = skip_time_extend(event);
	}
	return len + rb_event_length(event);
}

/**
 * ring_buffer_event_length - return the length of the event
 * @event: the event to get the length of
 *
 * Returns the size of the data load of a data event.
 * If the event is something other than a data event, it
 * returns the size of the event itself. With the exception
 * of a TIME EXTEND, where it still returns the size of the
 * data load of the data event after it.
 */
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
	unsigned length = rb_event_length(event);
	unsigned length;

	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
		event = skip_time_extend(event);

	length = rb_event_length(event);
	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
		return length;
	length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
static void *
rb_event_data(struct ring_buffer_event *event)
{
	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
		event = skip_time_extend(event);
	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
	/* If length is in len field, then array[0] has the data */
	if (event->type_len)
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))

/* Max number of timestamps that can fit on a page */
#define RB_TIMESTAMPS_PER_PAGE	(BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)

int ring_buffer_print_page_header(struct trace_seq *s)
{
	struct buffer_data_page field;
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
	iter->head = 0;
}

/* Slow path, do not inline */
static noinline struct ring_buffer_event *
rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
{
	event->type_len = RINGBUF_TYPE_TIME_EXTEND;

	/* Not the first event on the page? */
	if (rb_event_index(event)) {
		event->time_delta = delta & TS_MASK;
		event->array[0] = delta >> TS_SHIFT;
	} else {
		/* nope, just zero it */
		event->time_delta = 0;
		event->array[0] = 0;
	}

	return skip_time_extend(event);
}

/**
 * ring_buffer_update_event - update event type and data
 * @event: the even to update
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
 * data field.
 */
static void
rb_update_event(struct ring_buffer_event *event,
			 unsigned type, unsigned length)
rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
		struct ring_buffer_event *event, unsigned length,
		int add_timestamp, u64 delta)
{
	event->type_len = type;

	switch (type) {
	/* Only a commit updates the timestamp */
	if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
		delta = 0;

	case RINGBUF_TYPE_PADDING:
	case RINGBUF_TYPE_TIME_EXTEND:
	case RINGBUF_TYPE_TIME_STAMP:
		break;
	/*
	 * If we need to add a timestamp, then we
	 * add it to the start of the resevered space.
	 */
	if (unlikely(add_timestamp)) {
		event = rb_add_time_stamp(event, delta);
		length -= RB_LEN_TIME_EXTEND;
		delta = 0;
	}

	case 0:
	event->time_delta = delta;
	length -= RB_EVNT_HDR_SIZE;
		if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
		event->type_len = 0;
		event->array[0] = length;
		else
	} else
		event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
		break;
	default:
		BUG();
	}
}

/*
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
	local_sub(length, &tail_page->write);
}

static struct ring_buffer_event *
/*
 * This is the slow path, force gcc not to inline it.
 */
static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
	     unsigned long length, unsigned long tail,
	     struct buffer_page *tail_page, u64 *ts)
	     struct buffer_page *tail_page, u64 ts)
{
	struct buffer_page *commit_page = cpu_buffer->commit_page;
	struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
		 * Nested commits always have zero deltas, so
		 * just reread the time stamp
		 */
		*ts = rb_time_stamp(buffer);
		next_page->page->time_stamp = *ts;
		ts = rb_time_stamp(buffer);
		next_page->page->time_stamp = ts;
	}

 out_again:
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,

static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
		  unsigned type, unsigned long length, u64 *ts)
		  unsigned long length, u64 ts,
		  u64 delta, int add_timestamp)
{
	struct buffer_page *tail_page;
	struct ring_buffer_event *event;
	unsigned long tail, write;

	/*
	 * If the time delta since the last event is too big to
	 * hold in the time field of the event, then we append a
	 * TIME EXTEND event ahead of the data event.
	 */
	if (unlikely(add_timestamp))
		length += RB_LEN_TIME_EXTEND;

	tail_page = cpu_buffer->tail_page;
	write = local_add_return(length, &tail_page->write);

@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
	tail = write - length;

	/* See if we shot pass the end of this buffer page */
	if (write > BUF_PAGE_SIZE)
	if (unlikely(write > BUF_PAGE_SIZE))
		return rb_move_tail(cpu_buffer, length, tail,
				    tail_page, ts);

@@ -1951,10 +2019,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,

	event = __rb_page_index(tail_page, tail);
	kmemcheck_annotate_bitfield(event, bitfield);
	rb_update_event(event, type, length);
	rb_update_event(cpu_buffer, event, length, add_timestamp, delta);

	/* The passed in type is zero for DATA */
	if (likely(!type))
	local_inc(&tail_page->entries);

	/*
@@ -1962,7 +2028,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
	 * its timestamp.
	 */
	if (!tail)
		tail_page->page->time_stamp = *ts;
		tail_page->page->time_stamp = ts;

	return event;
}
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
	unsigned long addr;

	new_index = rb_event_index(event);
	old_index = new_index + rb_event_length(event);
	old_index = new_index + rb_event_ts_length(event);
	addr = (unsigned long)event;
	addr &= PAGE_MASK;

@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
	return 0;
}

static int
rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
		  u64 *ts, u64 *delta)
{
	struct ring_buffer_event *event;
	int ret;

	WARN_ONCE(*delta > (1ULL << 59),
		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
		  (unsigned long long)*delta,
		  (unsigned long long)*ts,
		  (unsigned long long)cpu_buffer->write_stamp);

	/*
	 * The delta is too big, we to add a
	 * new timestamp.
	 */
	event = __rb_reserve_next(cpu_buffer,
				  RINGBUF_TYPE_TIME_EXTEND,
				  RB_LEN_TIME_EXTEND,
				  ts);
	if (!event)
		return -EBUSY;

	if (PTR_ERR(event) == -EAGAIN)
		return -EAGAIN;

	/* Only a commited time event can update the write stamp */
	if (rb_event_is_commit(cpu_buffer, event)) {
		/*
		 * If this is the first on the page, then it was
		 * updated with the page itself. Try to discard it
		 * and if we can't just make it zero.
		 */
		if (rb_event_index(event)) {
			event->time_delta = *delta & TS_MASK;
			event->array[0] = *delta >> TS_SHIFT;
		} else {
			/* try to discard, since we do not need this */
			if (!rb_try_to_discard(cpu_buffer, event)) {
				/* nope, just zero it */
				event->time_delta = 0;
				event->array[0] = 0;
			}
		}
		cpu_buffer->write_stamp = *ts;
		/* let the caller know this was the commit */
		ret = 1;
	} else {
		/* Try to discard the event */
		if (!rb_try_to_discard(cpu_buffer, event)) {
			/* Darn, this is just wasted space */
			event->time_delta = 0;
			event->array[0] = 0;
		}
		ret = 0;
	}

	*delta = 0;

	return ret;
}

static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
	local_inc(&cpu_buffer->committing);
	local_inc(&cpu_buffer->commits);
}

static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
	unsigned long commits;

@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
		      unsigned long length)
{
	struct ring_buffer_event *event;
	u64 ts, delta = 0;
	int commit = 0;
	u64 ts, delta;
	int nr_loops = 0;
	int add_timestamp;
	u64 diff;

	rb_start_commit(cpu_buffer);

@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,

	length = rb_calculate_event_length(length);
 again:
	add_timestamp = 0;
	delta = 0;

	/*
	 * We allow for interrupts to reenter here and do a trace.
	 * If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
		goto out_fail;

	ts = rb_time_stamp(cpu_buffer->buffer);

	/*
	 * Only the first commit can update the timestamp.
	 * Yes there is a race here. If an interrupt comes in
	 * just after the conditional and it traces too, then it
	 * will also check the deltas. More than one timestamp may
	 * also be made. But only the entry that did the actual
	 * commit will be something other than zero.
	 */
	if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
		   rb_page_write(cpu_buffer->tail_page) ==
		   rb_commit_index(cpu_buffer))) {
		u64 diff;

	diff = ts - cpu_buffer->write_stamp;

	/* make sure this diff is calculated here */
	barrier();

	/* Did the write stamp get updated already? */
		if (unlikely(ts < cpu_buffer->write_stamp))
			goto get_event;

	if (likely(ts >= cpu_buffer->write_stamp)) {
		delta = diff;
		if (unlikely(test_time_stamp(delta))) {

			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
			if (commit == -EBUSY)
				goto out_fail;

			if (commit == -EAGAIN)
				goto again;

			RB_WARN_ON(cpu_buffer, commit < 0);
			WARN_ONCE(delta > (1ULL << 59),
				  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
				  (unsigned long long)delta,
				  (unsigned long long)ts,
				  (unsigned long long)cpu_buffer->write_stamp);
			add_timestamp = 1;
		}
	}

 get_event:
	event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
	event = __rb_reserve_next(cpu_buffer, length, ts,
				  delta, add_timestamp);
	if (unlikely(PTR_ERR(event) == -EAGAIN))
		goto again;

	if (!event)
		goto out_fail;

	if (!rb_event_is_commit(cpu_buffer, event))
		delta = 0;

	event->time_delta = delta;

	return event;

 out_fail:
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,

#define TRACE_RECURSIVE_DEPTH 16

static int trace_recursive_lock(void)
/* Keep this code out of the fast path cache */
static noinline void trace_recursive_fail(void)
{
	current->trace_recursion++;

	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
		return 0;

	/* Disable all tracing before we do anything else */
	tracing_off_permanent();

@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
		    in_nmi());

	WARN_ON_ONCE(1);
}

static inline int trace_recursive_lock(void)
{
	current->trace_recursion++;

	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
		return 0;

	trace_recursive_fail();

	return -1;
}

static void trace_recursive_unlock(void)
static inline void trace_recursive_unlock(void)
{
	WARN_ON_ONCE(!current->trace_recursion);

@@ -2308,13 +2298,29 @@ static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
		      struct ring_buffer_event *event)
{
	u64 delta;

	/*
	 * The event first in the commit queue updates the
	 * time stamp.
	 */
	if (rb_event_is_commit(cpu_buffer, event))
	if (rb_event_is_commit(cpu_buffer, event)) {
		/*
		 * A commit event that is first on a page
		 * updates the write timestamp with the page stamp
		 */
		if (!rb_event_index(event))
			cpu_buffer->write_stamp =
				cpu_buffer->commit_page->page->time_stamp;
		else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
			delta = event->array[0];
			delta <<= TS_SHIFT;
			delta += event->time_delta;
			cpu_buffer->write_stamp += delta;
		} else
			cpu_buffer->write_stamp += event->time_delta;
	}
}

static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
		      struct ring_buffer_event *event)
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);

static inline void rb_event_discard(struct ring_buffer_event *event)
{
	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
		event = skip_time_extend(event);

	/* array[0] holds the actual length for the discarded event */
	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
	event->type_len = RINGBUF_TYPE_PADDING;
@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,

 again:
	/*
	 * We repeat when a timestamp is encountered. It is possible
	 * to get multiple timestamps from an interrupt entering just
	 * as one timestamp is about to be written, or from discarded
	 * commits. The most that we can have is the number on a single page.
	 * We repeat when a time extend is encountered.
	 * Since the time extend is always attached to a data event,
	 * we should never loop more than once.
	 * (We never hit the following condition more than twice).
	 */
	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
		return NULL;

	reader = rb_get_reader_page(cpu_buffer);
@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
		return NULL;

	/*
	 * We repeat when a timestamp is encountered.
	 * We can get multiple timestamps by nested interrupts or also
	 * if filtering is on (discarding commits). Since discarding
	 * commits can be frequent we can get a lot of timestamps.
	 * But we limit them by not adding timestamps if they begin
	 * at the start of a page.
	 * We repeat when a time extend is encountered.
	 * Since the time extend is always attached to a data event,
	 * we should never loop more than once.
	 * (We never hit the following condition more than twice).
	 */
	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
		return NULL;

	if (rb_per_cpu_empty(cpu_buffer))
@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
		if (len > (commit - read))
			len = (commit - read);

		size = rb_event_length(event);
		/* Always keep the time extend and data together */
		size = rb_event_ts_length(event);

		if (len < size)
			goto out_unlock;
@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
				break;

			event = rb_reader_event(cpu_buffer);
			size = rb_event_length(event);
			/* Always keep the time extend and data together */
			size = rb_event_ts_length(event);
		} while (len > size);

		/* update bpage */