Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e4d6726a authored by Gao Xiang's avatar Gao Xiang Committed by Gao Xiang
Browse files

erofs: introduce physical cluster slab pools

Since multiple pcluster sizes could be used at once, the number of
compressed pages will become a variable factor. It's necessary to
introduce slab pools rather than a single slab cache now.

This limits the pclustersize to 1M (Z_EROFS_PCLUSTER_MAX_SIZE), and
get rid of the obsolete EROFS_FS_CLUSTER_PAGE_LIMIT, which has no
use now.

Link: https://lore.kernel.org/r/20210407043927.10623-4-xiang@kernel.org


Acked-by: default avatarChao Yu <yuchao0@huawei.com>
Signed-off-by: default avatarGao Xiang <hsiangkao@redhat.com>
parent 092ced04
Loading
Loading
Loading
Loading
+0 −14
Original line number Diff line number Diff line
@@ -76,17 +76,3 @@ config EROFS_FS_ZIP

	  If you don't want to enable compression feature, say N.
config EROFS_FS_CLUSTER_PAGE_LIMIT
	int "EROFS Cluster Pages Hard Limit"
	depends on EROFS_FS_ZIP
	range 1 256
	default "1"
	help
	  Indicates maximum # of pages of a compressed
	  physical cluster.

	  For example, if files in a image were compressed
	  into 8k-unit, hard limit should not be configured
	  less than 2. Otherwise, the image will be refused
	  to mount on this kernel.
+3 −0
Original line number Diff line number Diff line
@@ -201,6 +201,9 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
				 e->e_name_len + le16_to_cpu(e->e_value_size));
}

/* maximum supported size of a physical compression cluster */
#define Z_EROFS_PCLUSTER_MAX_SIZE	(1024 * 1024)

/* available compression algorithm types (for h_algorithmtype) */
enum {
	Z_EROFS_COMPRESSION_LZ4	= 0,
+0 −3
Original line number Diff line number Diff line
@@ -189,9 +189,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
	return v;
}
#endif	/* !CONFIG_SMP */

/* hard limit of pages per compressed cluster */
#define Z_EROFS_CLUSTER_MAX_PAGES       (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
#endif	/* !CONFIG_EROFS_FS_ZIP */

/* we strictly follow PAGE_SIZE and no buffer head yet */
+118 −63
Original line number Diff line number Diff line
@@ -9,6 +9,93 @@

#include <trace/events/erofs.h>

/*
 * since pclustersize is variable for big pcluster feature, introduce slab
 * pools implementation for different pcluster sizes.
 */
struct z_erofs_pcluster_slab {
	struct kmem_cache *slab;
	unsigned int maxpages;
	char name[48];
};

#define _PCLP(n) { .maxpages = n }

static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
	_PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128),
	_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
};

static void z_erofs_destroy_pcluster_pool(void)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
		if (!pcluster_pool[i].slab)
			continue;
		kmem_cache_destroy(pcluster_pool[i].slab);
		pcluster_pool[i].slab = NULL;
	}
}

static int z_erofs_create_pcluster_pool(void)
{
	struct z_erofs_pcluster_slab *pcs;
	struct z_erofs_pcluster *a;
	unsigned int size;

	for (pcs = pcluster_pool;
	     pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) {
		size = struct_size(a, compressed_pages, pcs->maxpages);

		sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages);
		pcs->slab = kmem_cache_create(pcs->name, size, 0,
					      SLAB_RECLAIM_ACCOUNT, NULL);
		if (pcs->slab)
			continue;

		z_erofs_destroy_pcluster_pool();
		return -ENOMEM;
	}
	return 0;
}

static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
		struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
		struct z_erofs_pcluster *pcl;

		if (nrpages > pcs->maxpages)
			continue;

		pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS);
		if (!pcl)
			return ERR_PTR(-ENOMEM);
		pcl->pclusterpages = nrpages;
		return pcl;
	}
	return ERR_PTR(-EINVAL);
}

static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
		struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;

		if (pcl->pclusterpages > pcs->maxpages)
			continue;

		kmem_cache_free(pcs->slab, pcl);
		return;
	}
	DBG_BUGON(1);
}

/*
 * a compressed_pages[] placeholder in order to avoid
 * being filled with file pages for in-place decompression.
@@ -36,12 +123,11 @@ typedef tagptr1_t compressed_page_t;
	tagptr_fold(compressed_page_t, page, 1)

static struct workqueue_struct *z_erofs_workqueue __read_mostly;
static struct kmem_cache *pcluster_cachep __read_mostly;

void z_erofs_exit_zip_subsystem(void)
{
	destroy_workqueue(z_erofs_workqueue);
	kmem_cache_destroy(pcluster_cachep);
	z_erofs_destroy_pcluster_pool();
}

static inline int z_erofs_init_workqueue(void)
@@ -58,42 +144,16 @@ static inline int z_erofs_init_workqueue(void)
	return z_erofs_workqueue ? 0 : -ENOMEM;
}

static void z_erofs_pcluster_init_once(void *ptr)
{
	struct z_erofs_pcluster *pcl = ptr;
	struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
	unsigned int i;

	mutex_init(&cl->lock);
	cl->nr_pages = 0;
	cl->vcnt = 0;
	for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
		pcl->compressed_pages[i] = NULL;
}

static void z_erofs_pcluster_init_always(struct z_erofs_pcluster *pcl)
{
	struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);

	atomic_set(&pcl->obj.refcount, 1);

	DBG_BUGON(cl->nr_pages);
	DBG_BUGON(cl->vcnt);
}

int __init z_erofs_init_zip_subsystem(void)
{
	pcluster_cachep = kmem_cache_create("erofs_compress",
					    Z_EROFS_WORKGROUP_SIZE, 0,
					    SLAB_RECLAIM_ACCOUNT,
					    z_erofs_pcluster_init_once);
	if (pcluster_cachep) {
		if (!z_erofs_init_workqueue())
			return 0;
	int err = z_erofs_create_pcluster_pool();

		kmem_cache_destroy(pcluster_cachep);
	}
	return -ENOMEM;
	if (err)
		return err;
	err = z_erofs_init_workqueue();
	if (err)
		z_erofs_destroy_pcluster_pool();
	return err;
}

enum z_erofs_collectmode {
@@ -178,7 +238,6 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
				     struct list_head *pagepool)
{
	const struct z_erofs_pcluster *pcl = clt->pcl;
	const unsigned int clusterpages = BIT(pcl->clusterbits);
	struct page **pages = clt->compressedpages;
	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
	bool standalone = true;
@@ -188,7 +247,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
		return;

	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
	for (; pages < pcl->compressed_pages + pcl->pclusterpages; ++pages) {
		struct page *page;
		compressed_page_t t;
		struct page *newpage = NULL;
@@ -248,14 +307,13 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
	struct z_erofs_pcluster *const pcl =
		container_of(grp, struct z_erofs_pcluster, obj);
	struct address_space *const mapping = MNGD_MAPPING(sbi);
	const unsigned int clusterpages = BIT(pcl->clusterbits);
	int i;

	/*
	 * refcount of workgroup is now freezed as 1,
	 * therefore no need to worry about available decompression users.
	 */
	for (i = 0; i < clusterpages; ++i) {
	for (i = 0; i < pcl->pclusterpages; ++i) {
		struct page *page = pcl->compressed_pages[i];

		if (!page)
@@ -283,13 +341,12 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
				  struct page *page)
{
	struct z_erofs_pcluster *const pcl = (void *)page_private(page);
	const unsigned int clusterpages = BIT(pcl->clusterbits);
	int ret = 0;	/* 0 - busy */

	if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
		unsigned int i;

		for (i = 0; i < clusterpages; ++i) {
		for (i = 0; i < pcl->pclusterpages; ++i) {
			if (pcl->compressed_pages[i] == page) {
				WRITE_ONCE(pcl->compressed_pages[i], NULL);
				ret = 1;
@@ -312,9 +369,9 @@ static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
					  struct page *page)
{
	struct z_erofs_pcluster *const pcl = clt->pcl;
	const unsigned int clusterpages = BIT(pcl->clusterbits);

	while (clt->compressedpages < pcl->compressed_pages + clusterpages) {
	while (clt->compressedpages <
	       pcl->compressed_pages + pcl->pclusterpages) {
		if (!cmpxchg(clt->compressedpages++, NULL, page))
			return true;
	}
@@ -434,12 +491,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
	struct z_erofs_collection *cl;
	int err;

	/* no available workgroup, let's allocate one */
	pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS);
	if (!pcl)
		return -ENOMEM;
	/* no available pcluster, let's allocate one */
	pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
	if (IS_ERR(pcl))
		return PTR_ERR(pcl);

	z_erofs_pcluster_init_always(pcl);
	atomic_set(&pcl->obj.refcount, 1);
	pcl->obj.index = map->m_pa >> PAGE_SHIFT;

	pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
@@ -451,8 +508,6 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
	else
		pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;

	pcl->clusterbits = 0;

	/* new pclusters should be claimed as type 1, primary and followed */
	pcl->next = clt->owned_head;
	clt->mode = COLLECT_PRIMARY_FOLLOWED;
@@ -464,12 +519,13 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
	 * lock all primary followed works before visible to others
	 * and mutex_trylock *never* fails for a new pcluster.
	 */
	mutex_trylock(&cl->lock);
	mutex_init(&cl->lock);
	DBG_BUGON(!mutex_trylock(&cl->lock));

	err = erofs_register_workgroup(inode->i_sb, &pcl->obj);
	if (err) {
		mutex_unlock(&cl->lock);
		kmem_cache_free(pcluster_cachep, pcl);
		z_erofs_free_pcluster(pcl);
		return -EAGAIN;
	}
	/* used to check tail merging loop due to corrupted images */
@@ -518,7 +574,7 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,

	clt->compressedpages = clt->pcl->compressed_pages;
	if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */
		clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES;
		clt->compressedpages += clt->pcl->pclusterpages;
	return 0;
}

@@ -531,8 +587,7 @@ static void z_erofs_rcu_callback(struct rcu_head *head)
	struct z_erofs_collection *const cl =
		container_of(head, struct z_erofs_collection, rcu);

	kmem_cache_free(pcluster_cachep,
			container_of(cl, struct z_erofs_pcluster,
	z_erofs_free_pcluster(container_of(cl, struct z_erofs_pcluster,
					   primary_collection));
}

@@ -786,9 +841,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
				       struct list_head *pagepool)
{
	struct erofs_sb_info *const sbi = EROFS_SB(sb);
	const unsigned int clusterpages = BIT(pcl->clusterbits);
	struct z_erofs_pagevec_ctor ctor;
	unsigned int i, outputsize, llen, nr_pages;
	unsigned int i, inputsize, outputsize, llen, nr_pages;
	struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
	struct page **pages, **compressed_pages, *page;

@@ -868,7 +922,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
	overlapped = false;
	compressed_pages = pcl->compressed_pages;

	for (i = 0; i < clusterpages; ++i) {
	for (i = 0; i < pcl->pclusterpages; ++i) {
		unsigned int pagenr;

		page = compressed_pages[i];
@@ -921,12 +975,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
		partial = true;
	}

	inputsize = pcl->pclusterpages * PAGE_SIZE;
	err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
					.sb = sb,
					.in = compressed_pages,
					.out = pages,
					.pageofs_out = cl->pageofs,
					.inputsize = PAGE_SIZE,
					.inputsize = inputsize,
					.outputsize = outputsize,
					.alg = pcl->algorithmformat,
					.inplace_io = overlapped,
@@ -935,7 +990,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,

out:
	/* must handle all compressed pages before ending pages */
	for (i = 0; i < clusterpages; ++i) {
	for (i = 0; i < pcl->pclusterpages; ++i) {
		page = compressed_pages[i];

		if (erofs_page_is_managed(sbi, page))
@@ -1237,7 +1292,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
		pcl = container_of(owned_head, struct z_erofs_pcluster, next);

		cur = pcl->obj.index;
		end = cur + BIT(pcl->clusterbits);
		end = cur + pcl->pclusterpages;

		/* close the main owned chain at first */
		owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+7 −7
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include "internal.h"
#include "zpvec.h"

#define Z_EROFS_PCLUSTER_MAX_PAGES	(Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_NR_INLINE_PAGEVECS      3

/*
@@ -58,16 +59,17 @@ struct z_erofs_pcluster {
	/* A: point to next chained pcluster or TAILs */
	z_erofs_next_pcluster_t next;

	/* A: compressed pages (including multi-usage pages) */
	struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];

	/* A: lower limit of decompressed length and if full length or not */
	unsigned int length;

	/* I: physical cluster size in pages */
	unsigned short pclusterpages;

	/* I: compression algorithm format */
	unsigned char algorithmformat;
	/* I: bit shift of physical cluster size */
	unsigned char clusterbits;

	/* A: compressed pages (can be cached or inplaced pages) */
	struct page *compressed_pages[];
};

#define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection)
@@ -81,8 +83,6 @@ struct z_erofs_pcluster {

#define Z_EROFS_PCLUSTER_NIL            (NULL)

#define Z_EROFS_WORKGROUP_SIZE  sizeof(struct z_erofs_pcluster)

struct z_erofs_decompressqueue {
	struct super_block *sb;
	atomic_t pending_bios;