Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e26b53d0 authored by Shaohua Li's avatar Shaohua Li Committed by Jens Axboe
Browse files

percpu_ida: make percpu_ida percpu size/batch configurable



Make percpu_ida percpu size/batch configurable. The block-mq-tag will
use it.

After block-mq uses percpu_ida to manage tags, performance is improved.
My test is done in a 2 sockets machine, 12 process cross the 2 sockets.
So if there is lock contention or ipi, should be stressed heavily.
Testing is done for null-blk.

hw_queue_depth	nopatch iops	patch iops
64		~800k/s		~1470k/s
2048		~4470k/s	~4340k/s

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarShaohua Li <shli@fusionio.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 098faf58
Loading
Loading
Loading
Loading
+17 −1
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@ struct percpu_ida {
	 * percpu_ida_init()
	 */
	unsigned			nr_tags;
	unsigned			percpu_max_size;
	unsigned			percpu_batch_size;

	struct percpu_ida_cpu __percpu	*tag_cpu;

@@ -51,10 +53,24 @@ struct percpu_ida {
	} ____cacheline_aligned_in_smp;
};

/*
 * Number of tags we move between the percpu freelist and the global freelist at
 * a time
 */
#define IDA_DEFAULT_PCPU_BATCH_MOVE	32U
/* Max size of percpu freelist, */
#define IDA_DEFAULT_PCPU_SIZE	((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)

int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
void percpu_ida_free(struct percpu_ida *pool, unsigned tag);

void percpu_ida_destroy(struct percpu_ida *pool);
int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags);
int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
	unsigned long max_size, unsigned long batch_size);
static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
{
	return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
		IDA_DEFAULT_PCPU_BATCH_MOVE);
}

#endif /* __PERCPU_IDA_H__ */
+11 −17
Original line number Diff line number Diff line
@@ -30,15 +30,6 @@
#include <linux/spinlock.h>
#include <linux/percpu_ida.h>

/*
 * Number of tags we move between the percpu freelist and the global freelist at
 * a time
 */
#define IDA_PCPU_BATCH_MOVE	32U

/* Max size of percpu freelist, */
#define IDA_PCPU_SIZE		((IDA_PCPU_BATCH_MOVE * 3) / 2)

struct percpu_ida_cpu {
	/*
	 * Even though this is percpu, we need a lock for tag stealing by remote
@@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool,
	struct percpu_ida_cpu *remote;

	for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
	     cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2;
	     cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
	     cpus_have_tags--) {
		cpu = cpumask_next(cpu, &pool->cpus_have_tags);

@@ -123,7 +114,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
{
	move_tags(tags->freelist, &tags->nr_free,
		  pool->freelist, &pool->nr_free,
		  min(pool->nr_free, IDA_PCPU_BATCH_MOVE));
		  min(pool->nr_free, pool->percpu_batch_size));
}

static inline unsigned alloc_local_tag(struct percpu_ida *pool,
@@ -245,17 +236,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
		wake_up(&pool->wait);
	}

	if (nr_free == IDA_PCPU_SIZE) {
	if (nr_free == pool->percpu_max_size) {
		spin_lock(&pool->lock);

		/*
		 * Global lock held and irqs disabled, don't need percpu
		 * lock
		 */
		if (tags->nr_free == IDA_PCPU_SIZE) {
		if (tags->nr_free == pool->percpu_max_size) {
			move_tags(pool->freelist, &pool->nr_free,
				  tags->freelist, &tags->nr_free,
				  IDA_PCPU_BATCH_MOVE);
				  pool->percpu_batch_size);

			wake_up(&pool->wait);
		}
@@ -292,7 +283,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy);
 * Allocation is percpu, but sharding is limited by nr_tags - for best
 * performance, the workload should not span more cpus than nr_tags / 128.
 */
int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
	unsigned long max_size, unsigned long batch_size)
{
	unsigned i, cpu, order;

@@ -301,6 +293,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
	init_waitqueue_head(&pool->wait);
	spin_lock_init(&pool->lock);
	pool->nr_tags = nr_tags;
	pool->percpu_max_size = max_size;
	pool->percpu_batch_size = batch_size;

	/* Guard against overflow */
	if (nr_tags > (unsigned) INT_MAX + 1) {
@@ -319,7 +313,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
	pool->nr_free = nr_tags;

	pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
				       IDA_PCPU_SIZE * sizeof(unsigned),
				       pool->percpu_max_size * sizeof(unsigned),
				       sizeof(unsigned));
	if (!pool->tag_cpu)
		goto err;
@@ -332,4 +326,4 @@ err:
	percpu_ida_destroy(pool);
	return -ENOMEM;
}
EXPORT_SYMBOL_GPL(percpu_ida_init);
EXPORT_SYMBOL_GPL(__percpu_ida_init);