Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8e2e0047 authored by Bob Peterson's avatar Bob Peterson Committed by Steven Whitehouse
Browse files

GFS2: Reduce file fragmentation



This patch reduces GFS2 file fragmentation by pre-reserving blocks. The
resulting improved on disk layout greatly speeds up operations in cases
which would have resulted in interlaced allocation of blocks previously.
A typical example of this is 10 parallel dd processes, each writing to a
file in a common dirctory.

The implementation uses an rbtree of reservations attached to each
resource group (and each inode).

Signed-off-by: default avatarBob Peterson <rpeterso@redhat.com>
Signed-off-by: default avatarSteven Whitehouse <swhiteho@redhat.com>
parent 294f2ad5
Loading
Loading
Loading
Loading
+3 −0
Original line number Original line Diff line number Diff line
@@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
	if (error)
	if (error)
		goto out_rlist;
		goto out_rlist;


	if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
		gfs2_rs_deltree(ip->i_res);

	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
				 revokes);
				 revokes);
+11 −13
Original line number Original line Diff line number Diff line
@@ -383,6 +383,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
	if (ret)
	if (ret)
		return ret;
		return ret;


	atomic_set(&ip->i_res->rs_sizehint,
		   PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize);

	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
	if (ret)
@@ -571,22 +574,15 @@ static int gfs2_open(struct inode *inode, struct file *file)


static int gfs2_release(struct inode *inode, struct file *file)
static int gfs2_release(struct inode *inode, struct file *file)
{
{
	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
	struct gfs2_file *fp;
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_inode *ip = GFS2_I(inode);


	fp = file->private_data;
	kfree(file->private_data);
	file->private_data = NULL;
	file->private_data = NULL;


	if ((file->f_mode & FMODE_WRITE) && ip->i_res &&
	if ((file->f_mode & FMODE_WRITE) &&
	    (atomic_read(&inode->i_writecount) == 1))
	    (atomic_read(&inode->i_writecount) == 1))
		gfs2_rs_delete(ip);
		gfs2_rs_delete(ip);


	if (gfs2_assert_warn(sdp, fp))
		return -EIO;

	kfree(fp);

	return 0;
	return 0;
}
}


@@ -662,14 +658,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
				   unsigned long nr_segs, loff_t pos)
				   unsigned long nr_segs, loff_t pos)
{
{
	struct file *file = iocb->ki_filp;
	struct file *file = iocb->ki_filp;
	size_t writesize = iov_length(iov, nr_segs);
	struct dentry *dentry = file->f_dentry;
	struct dentry *dentry = file->f_dentry;
	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
	struct gfs2_sbd *sdp;
	int ret;
	int ret;


	sdp = GFS2_SB(file->f_mapping->host);
	ret = gfs2_rs_alloc(ip);
	ret = gfs2_rs_alloc(ip);
	if (ret)
	if (ret)
		return ret;
		return ret;


	atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize);
	if (file->f_flags & O_APPEND) {
	if (file->f_flags & O_APPEND) {
		struct gfs2_holder gh;
		struct gfs2_holder gh;


@@ -795,6 +795,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
	if (unlikely(error))
	if (unlikely(error))
		goto out_uninit;
		goto out_uninit;


	atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize);

	while (len > 0) {
	while (len > 0) {
		if (len < bytes)
		if (len < bytes)
			bytes = len;
			bytes = len;
@@ -803,10 +805,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
			offset += bytes;
			offset += bytes;
			continue;
			continue;
		}
		}
		error = gfs2_rindex_update(sdp);
		if (error)
			goto out_unlock;

		error = gfs2_quota_lock_check(ip);
		error = gfs2_quota_lock_check(ip);
		if (error)
		if (error)
			goto out_unlock;
			goto out_unlock;
+37 −12
Original line number Original line Diff line number Diff line
@@ -84,6 +84,7 @@ struct gfs2_rgrpd {
	u32 rd_data;			/* num of data blocks in rgrp */
	u32 rd_data;			/* num of data blocks in rgrp */
	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
	u32 rd_free;
	u32 rd_free;
	u32 rd_reserved;                /* number of blocks reserved */
	u32 rd_free_clone;
	u32 rd_free_clone;
	u32 rd_dinodes;
	u32 rd_dinodes;
	u64 rd_igeneration;
	u64 rd_igeneration;
@@ -96,6 +97,9 @@ struct gfs2_rgrpd {
#define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
#define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
#define GFS2_RDF_ERROR		0x40000000 /* error in rg */
#define GFS2_RDF_ERROR		0x40000000 /* error in rg */
#define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
#define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
	spinlock_t rd_rsspin;           /* protects reservation related vars */
	struct rb_root rd_rstree;       /* multi-block reservation tree */
	u32 rd_rs_cnt;                  /* count of current reservations */
};
};


enum gfs2_state_bits {
enum gfs2_state_bits {
@@ -233,6 +237,38 @@ struct gfs2_holder {
	unsigned long gh_ip;
	unsigned long gh_ip;
};
};


/* Resource group multi-block reservation, in order of appearance:

   Step 1. Function prepares to write, allocates a mb, sets the size hint.
   Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
   Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
   Step 4. Bits are assigned from the rgrp based on either the reservation
           or wherever it can.
*/

struct gfs2_blkreserv {
	/* components used during write (step 1): */
	atomic_t rs_sizehint;         /* hint of the write size */

	/* components used during inplace_reserve (step 2): */
	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */

	/* components used during get_local_rgrp (step 3): */
	struct gfs2_rgrpd *rs_rgd;    /* pointer to the gfs2_rgrpd */
	struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
	struct rb_node rs_node;       /* link to other block reservations */

	/* components used during block searches and assignments (step 4): */
	struct gfs2_bitmap *rs_bi;    /* bitmap for the current allocation */
	u32 rs_biblk;                 /* start block relative to the bi */
	u32 rs_free;                  /* how many blocks are still free */

	/* ancillary quota stuff */
	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
	struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
	unsigned int rs_qa_qd_num;
};

enum {
enum {
	GLF_LOCK			= 1,
	GLF_LOCK			= 1,
	GLF_DEMOTE			= 3,
	GLF_DEMOTE			= 3,
@@ -290,16 +326,6 @@ struct gfs2_glock {


#define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */
#define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */


struct gfs2_blkreserv {
	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
	struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */

	/* ancillary quota stuff */
	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
	struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
	unsigned int rs_qa_qd_num;
};

enum {
enum {
	GIF_INVALID		= 0,
	GIF_INVALID		= 0,
	GIF_QD_LOCKED		= 1,
	GIF_QD_LOCKED		= 1,
@@ -307,7 +333,6 @@ enum {
	GIF_SW_PAGED		= 3,
	GIF_SW_PAGED		= 3,
};
};



struct gfs2_inode {
struct gfs2_inode {
	struct inode i_inode;
	struct inode i_inode;
	u64 i_no_addr;
	u64 i_no_addr;
@@ -318,7 +343,7 @@ struct gfs2_inode {
	struct gfs2_glock *i_gl; /* Move into i_gh? */
	struct gfs2_glock *i_gl; /* Move into i_gh? */
	struct gfs2_holder i_iopen_gh;
	struct gfs2_holder i_iopen_gh;
	struct gfs2_holder i_gh; /* for prepare/commit_write only */
	struct gfs2_holder i_gh; /* for prepare/commit_write only */
	struct gfs2_blkreserv *i_res; /* resource group block reservation */
	struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
	struct gfs2_rgrpd *i_rgd;
	struct gfs2_rgrpd *i_rgd;
	u64 i_goal;	/* goal block for allocations */
	u64 i_goal;	/* goal block for allocations */
	struct rw_semaphore i_rw_mutex;
	struct rw_semaphore i_rw_mutex;
+28 −9
Original line number Original line Diff line number Diff line
@@ -521,6 +521,9 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
	int error;
	int error;


	munge_mode_uid_gid(dip, &mode, &uid, &gid);
	munge_mode_uid_gid(dip, &mode, &uid, &gid);
	error = gfs2_rindex_update(sdp);
	if (error)
		return error;


	error = gfs2_quota_lock(dip, uid, gid);
	error = gfs2_quota_lock(dip, uid, gid);
	if (error)
	if (error)
@@ -551,6 +554,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
	struct buffer_head *dibh;
	struct buffer_head *dibh;
	int error;
	int error;


	error = gfs2_rindex_update(sdp);
	if (error)
		return error;

	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
	if (error)
	if (error)
		goto fail;
		goto fail;
@@ -596,6 +603,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
	gfs2_trans_end(sdp);
	gfs2_trans_end(sdp);


fail_ipreserv:
fail_ipreserv:
	if (alloc_required)
		gfs2_inplace_release(dip);
		gfs2_inplace_release(dip);


fail_quota_locks:
fail_quota_locks:
@@ -647,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
	const struct qstr *name = &dentry->d_name;
	const struct qstr *name = &dentry->d_name;
	struct gfs2_holder ghs[2];
	struct gfs2_holder ghs[2];
	struct inode *inode = NULL;
	struct inode *inode = NULL;
	struct gfs2_inode *dip = GFS2_I(dir);
	struct gfs2_inode *dip = GFS2_I(dir), *ip;
	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
	int error;
	int error;
@@ -657,6 +665,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
	if (!name->len || name->len > GFS2_FNAMESIZE)
	if (!name->len || name->len > GFS2_FNAMESIZE)
		return -ENAMETOOLONG;
		return -ENAMETOOLONG;


	/* We need a reservation to allocate the new dinode block. The
	   directory ip temporarily points to the reservation, but this is
	   being done to get a set of contiguous blocks for the new dinode.
	   Since this is a create, we don't have a sizehint yet, so it will
	   have to use the minimum reservation size. */
	error = gfs2_rs_alloc(dip);
	error = gfs2_rs_alloc(dip);
	if (error)
	if (error)
		return error;
		return error;
@@ -694,24 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
	if (IS_ERR(inode))
	if (IS_ERR(inode))
		goto fail_gunlock2;
		goto fail_gunlock2;


	error = gfs2_inode_refresh(GFS2_I(inode));
	ip = GFS2_I(inode);
	error = gfs2_inode_refresh(ip);
	if (error)
	if (error)
		goto fail_gunlock2;
		goto fail_gunlock2;


	/* the new inode needs a reservation so it can allocate xattrs. */
	/* The newly created inode needs a reservation so it can allocate
	error = gfs2_rs_alloc(GFS2_I(inode));
	   xattrs. At the same time, we want new blocks allocated to the new
	if (error)
	   dinode to be as contiguous as possible. Since we allocated the
		goto fail_gunlock2;
	   dinode block under the directory's reservation, we transfer
	   ownership of that reservation to the new inode. The directory
	   doesn't need a reservation unless it needs a new allocation. */
	ip->i_res = dip->i_res;
	dip->i_res = NULL;


	error = gfs2_acl_create(dip, inode);
	error = gfs2_acl_create(dip, inode);
	if (error)
	if (error)
		goto fail_gunlock2;
		goto fail_gunlock2;


	error = gfs2_security_init(dip, GFS2_I(inode), name);
	error = gfs2_security_init(dip, ip, name);
	if (error)
	if (error)
		goto fail_gunlock2;
		goto fail_gunlock2;


	error = link_dinode(dip, name, GFS2_I(inode));
	error = link_dinode(dip, name, ip);
	if (error)
	if (error)
		goto fail_gunlock2;
		goto fail_gunlock2;


@@ -738,6 +756,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
		iput(inode);
		iput(inode);
	}
	}
fail:
fail:
	gfs2_rs_delete(dip);
	if (bh)
	if (bh)
		brelse(bh);
		brelse(bh);
	return error;
	return error;
+523 −55

File changed.

Preview size limit exceeded, changes collapsed.

Loading