Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 092ced04 authored by Gao Xiang's avatar Gao Xiang Committed by Gao Xiang
Browse files

erofs: introduce multipage per-CPU buffers

To deal the with the cases which inplace decompression is infeasible
for some inplace I/O. Per-CPU buffers was introduced to get rid of page
allocation latency and thrash for low-latency decompression algorithms
such as lz4.

For the big pcluster feature, introduce multipage per-CPU buffers to
keep such inplace I/O pclusters temporarily as well but note that
per-CPU pages are just consecutive virtually.

When a new big pcluster fs is mounted, its max pclustersize will be
read and per-CPU buffers can be growed if needed. Shrinking adjustable
per-CPU buffers is more complex (because we don't know if such size
is still be used), so currently just release them all when unloading.

Link: https://lore.kernel.org/r/20210409190630.19569-1-xiang@kernel.org


Acked-by: default avatarChao Yu <yuchao0@huawei.com>
Signed-off-by: default avatarGao Xiang <hsiangkao@redhat.com>
parent 5eb979af
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0-only

obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
+5 −3
Original line number Diff line number Diff line
@@ -46,7 +46,9 @@ int z_erofs_load_lz4_config(struct super_block *sb,
	EROFS_SB(sb)->lz4.max_distance_pages = distance ?
					DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
					LZ4_MAX_DISTANCE_PAGES;
	return 0;

	/* TODO: use max pclusterblks after bigpcluster is enabled */
	return erofs_pcpubuf_growsize(1);
}

static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
@@ -113,7 +115,7 @@ static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq,
	 * pages should be copied in order to avoid being overlapped.
	 */
	struct page **in = rq->in;
	u8 *const tmp = erofs_get_pcpubuf(0);
	u8 *const tmp = erofs_get_pcpubuf(1);
	u8 *tmpp = tmp;
	unsigned int inlen = rq->inputsize - pageofs_in;
	unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in);
@@ -269,7 +271,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
	 * compressed data is preferred.
	 */
	if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
		dst = erofs_get_pcpubuf(0);
		dst = erofs_get_pcpubuf(1);
		if (IS_ERR(dst))
			return PTR_ERR(dst);

+7 −18
Original line number Diff line number Diff line
@@ -192,9 +192,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)

/* hard limit of pages per compressed cluster */
#define Z_EROFS_CLUSTER_MAX_PAGES       (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
#define EROFS_PCPUBUF_NR_PAGES          Z_EROFS_CLUSTER_MAX_PAGES
#else
#define EROFS_PCPUBUF_NR_PAGES          0
#endif	/* !CONFIG_EROFS_FS_ZIP */

/* we strictly follow PAGE_SIZE and no buffer head yet */
@@ -399,24 +396,16 @@ int erofs_namei(struct inode *dir, struct qstr *name,
/* dir.c */
extern const struct file_operations erofs_dir_fops;

/* pcpubuf.c */
void *erofs_get_pcpubuf(unsigned int requiredpages);
void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
void erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);

/* utils.c / zdata.c */
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);

#if (EROFS_PCPUBUF_NR_PAGES > 0)
void *erofs_get_pcpubuf(unsigned int pagenr);
#define erofs_put_pcpubuf(buf) do { \
	(void)&(buf);	\
	preempt_enable();	\
} while (0)
#else
static inline void *erofs_get_pcpubuf(unsigned int pagenr)
{
	return ERR_PTR(-EOPNOTSUPP);
}

#define erofs_put_pcpubuf(buf) do {} while (0)
#endif

#ifdef CONFIG_EROFS_FS_ZIP
int erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,

fs/erofs/pcpubuf.c

0 → 100644
+148 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) Gao Xiang <xiang@kernel.org>
 *
 * For low-latency decompression algorithms (e.g. lz4), reserve consecutive
 * per-CPU virtual memory (in pages) in advance to store such inplace I/O
 * data if inplace decompression is failed (due to unmet inplace margin for
 * example).
 */
#include "internal.h"

struct erofs_pcpubuf {
	raw_spinlock_t lock;
	void *ptr;
	struct page **pages;
	unsigned int nrpages;
};

static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);

void *erofs_get_pcpubuf(unsigned int requiredpages)
	__acquires(pcb->lock)
{
	struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);

	raw_spin_lock(&pcb->lock);
	/* check if the per-CPU buffer is too small */
	if (requiredpages > pcb->nrpages) {
		raw_spin_unlock(&pcb->lock);
		put_cpu_var(erofs_pcb);
		/* (for sparse checker) pretend pcb->lock is still taken */
		__acquire(pcb->lock);
		return NULL;
	}
	return pcb->ptr;
}

void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
{
	struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());

	DBG_BUGON(pcb->ptr != ptr);
	raw_spin_unlock(&pcb->lock);
	put_cpu_var(erofs_pcb);
}

/* the next step: support per-CPU page buffers hotplug */
int erofs_pcpubuf_growsize(unsigned int nrpages)
{
	static DEFINE_MUTEX(pcb_resize_mutex);
	static unsigned int pcb_nrpages;
	LIST_HEAD(pagepool);
	int delta, cpu, ret, i;

	mutex_lock(&pcb_resize_mutex);
	delta = nrpages - pcb_nrpages;
	ret = 0;
	/* avoid shrinking pcpubuf, since no idea how many fses rely on */
	if (delta <= 0)
		goto out;

	for_each_possible_cpu(cpu) {
		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
		struct page **pages, **oldpages;
		void *ptr, *old_ptr;

		pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
		if (!pages) {
			ret = -ENOMEM;
			break;
		}

		for (i = 0; i < nrpages; ++i) {
			pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
			if (!pages[i]) {
				ret = -ENOMEM;
				oldpages = pages;
				goto free_pagearray;
			}
		}
		ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
		if (!ptr) {
			ret = -ENOMEM;
			oldpages = pages;
			goto free_pagearray;
		}
		raw_spin_lock(&pcb->lock);
		old_ptr = pcb->ptr;
		pcb->ptr = ptr;
		oldpages = pcb->pages;
		pcb->pages = pages;
		i = pcb->nrpages;
		pcb->nrpages = nrpages;
		raw_spin_unlock(&pcb->lock);

		if (!oldpages) {
			DBG_BUGON(old_ptr);
			continue;
		}

		if (old_ptr)
			vunmap(old_ptr);
free_pagearray:
		while (i)
			list_add(&oldpages[--i]->lru, &pagepool);
		kfree(oldpages);
		if (ret)
			break;
	}
	pcb_nrpages = nrpages;
	put_pages_list(&pagepool);
out:
	mutex_unlock(&pcb_resize_mutex);
	return ret;
}

void erofs_pcpubuf_init(void)
{
	int cpu;

	for_each_possible_cpu(cpu) {
		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);

		raw_spin_lock_init(&pcb->lock);
	}
}

void erofs_pcpubuf_exit(void)
{
	int cpu, i;

	for_each_possible_cpu(cpu) {
		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);

		if (pcb->ptr) {
			vunmap(pcb->ptr);
			pcb->ptr = NULL;
		}
		if (!pcb->pages)
			continue;

		for (i = 0; i < pcb->nrpages; ++i)
			if (pcb->pages[i])
				put_page(pcb->pages[i]);
		kfree(pcb->pages);
		pcb->pages = NULL;
	}
}
+2 −0
Original line number Diff line number Diff line
@@ -668,6 +668,7 @@ static int __init erofs_module_init(void)
	if (err)
		goto shrinker_err;

	erofs_pcpubuf_init();
	err = z_erofs_init_zip_subsystem();
	if (err)
		goto zip_err;
@@ -697,6 +698,7 @@ static void __exit erofs_module_exit(void)
	/* Ensure all RCU free inodes are safe before cache is destroyed. */
	rcu_barrier();
	kmem_cache_destroy(erofs_inode_cachep);
	erofs_pcpubuf_exit();
}

/* get filesystem statistics */
Loading