Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b924f959 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sparc-perf-events-fixes-for-linus' of...

Merge branch 'sparc-perf-events-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sparc-perf-events-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  mm, perf_event: Make vmalloc_user() align base kernel virtual address to SHMLBA
  perf_event: Provide vmalloc() based mmap() backing
parents b9d40b7b 2dca6999
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ config SPARC
	select RTC_CLASS
	select RTC_DRV_M48T59
	select HAVE_PERF_EVENTS
	select PERF_USE_VMALLOC
	select HAVE_DMA_ATTRS
	select HAVE_DMA_API_DEBUG

@@ -48,6 +49,7 @@ config SPARC64
	select RTC_DRV_SUN4V
	select RTC_DRV_STARFIRE
	select HAVE_PERF_EVENTS
	select PERF_USE_VMALLOC

config ARCH_DEFCONFIG
	string
+5 −0
Original line number Diff line number Diff line
@@ -442,6 +442,7 @@ enum perf_callchain_context {
#include <linux/hrtimer.h>
#include <linux/fs.h>
#include <linux/pid_namespace.h>
#include <linux/workqueue.h>
#include <asm/atomic.h>

#define PERF_MAX_STACK_DEPTH		255
@@ -513,6 +514,10 @@ struct file;

struct perf_mmap_data {
	struct rcu_head			rcu_head;
#ifdef CONFIG_PERF_USE_VMALLOC
	struct work_struct		work;
#endif
	int				data_order;
	int				nr_pages;	/* nr of data pages  */
	int				writable;	/* are we writable   */
	int				nr_locked;	/* nr pages mlocked  */
+18 −0
Original line number Diff line number Diff line
@@ -921,6 +921,11 @@ config HAVE_PERF_EVENTS
	help
	  See tools/perf/design.txt for details.

config PERF_USE_VMALLOC
	bool
	help
	  See tools/perf/design.txt for details

menu "Kernel Performance Events And Counters"

config PERF_EVENTS
@@ -976,6 +981,19 @@ config PERF_COUNTERS

	  Say N if unsure.

config DEBUG_PERF_USE_VMALLOC
	default n
	bool "Debug: use vmalloc to back perf mmap() buffers"
	depends on PERF_EVENTS && DEBUG_KERNEL
	select PERF_USE_VMALLOC
	help
	 Use vmalloc memory to back perf mmap() buffers.

	 Mostly useful for debugging the vmalloc code on platforms
	 that don't require it.

	 Say N if unsure.

endmenu

config VM_EVENT_COUNTERS
+186 −62
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <linux/percpu.h>
#include <linux/ptrace.h>
#include <linux/vmstat.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
@@ -2091,49 +2092,31 @@ void perf_event_update_userpage(struct perf_event *event)
	rcu_read_unlock();
}

static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
static unsigned long perf_data_size(struct perf_mmap_data *data)
{
	struct perf_event *event = vma->vm_file->private_data;
	struct perf_mmap_data *data;
	int ret = VM_FAULT_SIGBUS;

	if (vmf->flags & FAULT_FLAG_MKWRITE) {
		if (vmf->pgoff == 0)
			ret = 0;
		return ret;
	return data->nr_pages << (PAGE_SHIFT + data->data_order);
}

	rcu_read_lock();
	data = rcu_dereference(event->data);
	if (!data)
		goto unlock;

	if (vmf->pgoff == 0) {
		vmf->page = virt_to_page(data->user_page);
	} else {
		int nr = vmf->pgoff - 1;

		if ((unsigned)nr > data->nr_pages)
			goto unlock;
#ifndef CONFIG_PERF_USE_VMALLOC

		if (vmf->flags & FAULT_FLAG_WRITE)
			goto unlock;

		vmf->page = virt_to_page(data->data_pages[nr]);
	}
/*
 * Back perf_mmap() with regular GFP_KERNEL-0 pages.
 */

	get_page(vmf->page);
	vmf->page->mapping = vma->vm_file->f_mapping;
	vmf->page->index   = vmf->pgoff;
static struct page *
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
{
	if (pgoff > data->nr_pages)
		return NULL;

	ret = 0;
unlock:
	rcu_read_unlock();
	if (pgoff == 0)
		return virt_to_page(data->user_page);

	return ret;
	return virt_to_page(data->data_pages[pgoff - 1]);
}

static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
static struct perf_mmap_data *
perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
{
	struct perf_mmap_data *data;
	unsigned long size;
@@ -2158,19 +2141,10 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
			goto fail_data_pages;
	}

	data->data_order = 0;
	data->nr_pages = nr_pages;
	atomic_set(&data->lock, -1);

	if (event->attr.watermark) {
		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
				      event->attr.wakeup_watermark);
	}
	if (!data->watermark)
		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);

	rcu_assign_pointer(event->data, data);

	return 0;
	return data;

fail_data_pages:
	for (i--; i >= 0; i--)
@@ -2182,7 +2156,7 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
	kfree(data);

fail:
	return -ENOMEM;
	return NULL;
}

static void perf_mmap_free_page(unsigned long addr)
@@ -2193,28 +2167,169 @@ static void perf_mmap_free_page(unsigned long addr)
	__free_page(page);
}

static void __perf_mmap_data_free(struct rcu_head *rcu_head)
static void perf_mmap_data_free(struct perf_mmap_data *data)
{
	struct perf_mmap_data *data;
	int i;

	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);

	perf_mmap_free_page((unsigned long)data->user_page);
	for (i = 0; i < data->nr_pages; i++)
		perf_mmap_free_page((unsigned long)data->data_pages[i]);
}

#else

/*
 * Back perf_mmap() with vmalloc memory.
 *
 * Required for architectures that have d-cache aliasing issues.
 */

static struct page *
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
{
	if (pgoff > (1UL << data->data_order))
		return NULL;

	return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
}

static void perf_mmap_unmark_page(void *addr)
{
	struct page *page = vmalloc_to_page(addr);

	page->mapping = NULL;
}

static void perf_mmap_data_free_work(struct work_struct *work)
{
	struct perf_mmap_data *data;
	void *base;
	int i, nr;

	data = container_of(work, struct perf_mmap_data, work);
	nr = 1 << data->data_order;

	base = data->user_page;
	for (i = 0; i < nr + 1; i++)
		perf_mmap_unmark_page(base + (i * PAGE_SIZE));

	vfree(base);
}

static void perf_mmap_data_free(struct perf_mmap_data *data)
{
	schedule_work(&data->work);
}

static struct perf_mmap_data *
perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
{
	struct perf_mmap_data *data;
	unsigned long size;
	void *all_buf;

	WARN_ON(atomic_read(&event->mmap_count));

	size = sizeof(struct perf_mmap_data);
	size += sizeof(void *);

	data = kzalloc(size, GFP_KERNEL);
	if (!data)
		goto fail;

	INIT_WORK(&data->work, perf_mmap_data_free_work);

	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
	if (!all_buf)
		goto fail_all_buf;

	data->user_page = all_buf;
	data->data_pages[0] = all_buf + PAGE_SIZE;
	data->data_order = ilog2(nr_pages);
	data->nr_pages = 1;

	return data;

fail_all_buf:
	kfree(data);

fail:
	return NULL;
}

#endif

static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct perf_event *event = vma->vm_file->private_data;
	struct perf_mmap_data *data;
	int ret = VM_FAULT_SIGBUS;

	if (vmf->flags & FAULT_FLAG_MKWRITE) {
		if (vmf->pgoff == 0)
			ret = 0;
		return ret;
	}

	rcu_read_lock();
	data = rcu_dereference(event->data);
	if (!data)
		goto unlock;

	if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
		goto unlock;

	vmf->page = perf_mmap_to_page(data, vmf->pgoff);
	if (!vmf->page)
		goto unlock;

	get_page(vmf->page);
	vmf->page->mapping = vma->vm_file->f_mapping;
	vmf->page->index   = vmf->pgoff;

	ret = 0;
unlock:
	rcu_read_unlock();

	return ret;
}

static void
perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
{
	long max_size = perf_data_size(data);

	atomic_set(&data->lock, -1);

	if (event->attr.watermark) {
		data->watermark = min_t(long, max_size,
					event->attr.wakeup_watermark);
	}

	if (!data->watermark)
		data->watermark = max_t(long, PAGE_SIZE, max_size / 2);


	rcu_assign_pointer(event->data, data);
}

static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
{
	struct perf_mmap_data *data;

	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
	perf_mmap_data_free(data);
	kfree(data);
}

static void perf_mmap_data_free(struct perf_event *event)
static void perf_mmap_data_release(struct perf_event *event)
{
	struct perf_mmap_data *data = event->data;

	WARN_ON(atomic_read(&event->mmap_count));

	rcu_assign_pointer(event->data, NULL);
	call_rcu(&data->rcu_head, __perf_mmap_data_free);
	call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
}

static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2230,11 +2345,12 @@ static void perf_mmap_close(struct vm_area_struct *vma)

	WARN_ON_ONCE(event->ctx->parent_ctx);
	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
		unsigned long size = perf_data_size(event->data);
		struct user_struct *user = current_user();

		atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm);
		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
		vma->vm_mm->locked_vm -= event->data->nr_locked;
		perf_mmap_data_free(event);
		perf_mmap_data_release(event);
		mutex_unlock(&event->mmap_mutex);
	}
}
@@ -2252,6 +2368,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
	unsigned long user_locked, user_lock_limit;
	struct user_struct *user = current_user();
	unsigned long locked, lock_limit;
	struct perf_mmap_data *data;
	unsigned long vma_size;
	unsigned long nr_pages;
	long user_extra, extra;
@@ -2314,10 +2431,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
	}

	WARN_ON(event->data);
	ret = perf_mmap_data_alloc(event, nr_pages);
	if (ret)

	data = perf_mmap_data_alloc(event, nr_pages);
	ret = -ENOMEM;
	if (!data)
		goto unlock;

	ret = 0;
	perf_mmap_data_init(event, data);

	atomic_set(&event->mmap_count, 1);
	atomic_long_add(user_extra, &user->locked_vm);
	vma->vm_mm->locked_vm += extra;
@@ -2505,7 +2627,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
	if (!data->writable)
		return true;

	mask = (data->nr_pages << PAGE_SHIFT) - 1;
	mask = perf_data_size(data) - 1;

	offset = (offset - tail) & mask;
	head   = (head   - tail) & mask;
@@ -2610,7 +2732,7 @@ void perf_output_copy(struct perf_output_handle *handle,
		      const void *buf, unsigned int len)
{
	unsigned int pages_mask;
	unsigned int offset;
	unsigned long offset;
	unsigned int size;
	void **pages;

@@ -2619,12 +2741,14 @@ void perf_output_copy(struct perf_output_handle *handle,
	pages		= handle->data->data_pages;

	do {
		unsigned int page_offset;
		unsigned long page_offset;
		unsigned long page_size;
		int nr;

		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
		page_offset = offset & (PAGE_SIZE - 1);
		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
		page_size   = 1UL << (handle->data->data_order + PAGE_SHIFT);
		page_offset = offset & (page_size - 1);
		size	    = min_t(unsigned int, page_size - page_offset, len);

		memcpy(pages[nr] + page_offset, buf, size);

+26 −22
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <asm/atomic.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>


/*** Page table manipulation functions ***/
@@ -1155,12 +1156,11 @@ static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
}

static struct vm_struct *__get_vm_area_node(unsigned long size,
		unsigned long flags, unsigned long start, unsigned long end,
		int node, gfp_t gfp_mask, void *caller)
		unsigned long align, unsigned long flags, unsigned long start,
		unsigned long end, int node, gfp_t gfp_mask, void *caller)
{
	static struct vmap_area *va;
	struct vm_struct *area;
	unsigned long align = 1;

	BUG_ON(in_interrupt());
	if (flags & VM_IOREMAP) {
@@ -1200,7 +1200,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
				unsigned long start, unsigned long end)
{
	return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
	return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
						__builtin_return_address(0));
}
EXPORT_SYMBOL_GPL(__get_vm_area);
@@ -1209,7 +1209,7 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
				       unsigned long start, unsigned long end,
				       void *caller)
{
	return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
	return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
				  caller);
}

@@ -1224,22 +1224,22 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
 */
struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
{
	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
				-1, GFP_KERNEL, __builtin_return_address(0));
}

struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
				void *caller)
{
	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
						-1, GFP_KERNEL, caller);
}

struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
				   int node, gfp_t gfp_mask)
{
	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
				  gfp_mask, __builtin_return_address(0));
	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
				  node, gfp_mask, __builtin_return_address(0));
}

static struct vm_struct *find_vm_area(const void *addr)
@@ -1402,7 +1402,8 @@ void *vmap(struct page **pages, unsigned int count,
}
EXPORT_SYMBOL(vmap);

static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
static void *__vmalloc_node(unsigned long size, unsigned long align,
			    gfp_t gfp_mask, pgprot_t prot,
			    int node, void *caller);
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
				 pgprot_t prot, int node, void *caller)
@@ -1416,7 +1417,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
	area->nr_pages = nr_pages;
	/* Please note that the recursion is strictly bounded. */
	if (array_size > PAGE_SIZE) {
		pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO,
		pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO,
				PAGE_KERNEL, node, caller);
		area->flags |= VM_VPAGES;
	} else {
@@ -1475,6 +1476,7 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
/**
 *	__vmalloc_node  -  allocate virtually contiguous memory
 *	@size:		allocation size
 *	@align:		desired alignment
 *	@gfp_mask:	flags for the page level allocator
 *	@prot:		protection mask for the allocated pages
 *	@node:		node to use for allocation or -1
@@ -1484,7 +1486,8 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
 *	allocator with @gfp_mask flags.  Map them into contiguous
 *	kernel virtual space, using a pagetable protection of @prot.
 */
static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
static void *__vmalloc_node(unsigned long size, unsigned long align,
			    gfp_t gfp_mask, pgprot_t prot,
			    int node, void *caller)
{
	struct vm_struct *area;
@@ -1495,8 +1498,8 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
	if (!size || (size >> PAGE_SHIFT) > totalram_pages)
		return NULL;

	area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
						node, gfp_mask, caller);
	area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
				  VMALLOC_END, node, gfp_mask, caller);

	if (!area)
		return NULL;
@@ -1515,7 +1518,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,

void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
	return __vmalloc_node(size, gfp_mask, prot, -1,
	return __vmalloc_node(size, 1, gfp_mask, prot, -1,
				__builtin_return_address(0));
}
EXPORT_SYMBOL(__vmalloc);
@@ -1531,7 +1534,7 @@ EXPORT_SYMBOL(__vmalloc);
 */
void *vmalloc(unsigned long size)
{
	return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
					-1, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc);
@@ -1548,7 +1551,8 @@ void *vmalloc_user(unsigned long size)
	struct vm_struct *area;
	void *ret;

	ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
	ret = __vmalloc_node(size, SHMLBA,
			     GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
			     PAGE_KERNEL, -1, __builtin_return_address(0));
	if (ret) {
		area = find_vm_area(ret);
@@ -1571,7 +1575,7 @@ EXPORT_SYMBOL(vmalloc_user);
 */
void *vmalloc_node(unsigned long size, int node)
{
	return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
					node, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_node);
@@ -1594,7 +1598,7 @@ EXPORT_SYMBOL(vmalloc_node);

void *vmalloc_exec(unsigned long size)
{
	return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
			      -1, __builtin_return_address(0));
}

@@ -1615,7 +1619,7 @@ void *vmalloc_exec(unsigned long size)
 */
void *vmalloc_32(unsigned long size)
{
	return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL,
	return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
			      -1, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_32);
@@ -1632,7 +1636,7 @@ void *vmalloc_32_user(unsigned long size)
	struct vm_struct *area;
	void *ret;

	ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
	ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
			     -1, __builtin_return_address(0));
	if (ret) {
		area = find_vm_area(ret);
Loading