Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 063c4561 authored by Mark Fasheh's avatar Mark Fasheh
Browse files

ocfs2: support for removing file regions



Provide an internal interface for the removal of arbitrary file regions.

ocfs2_remove_inode_range() takes a byte range within a file and will remove
existing extents within that range. Partial clusters will be zeroed so that
any read from within the region will return zeros.

Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent 35edec1d
Loading
Loading
Loading
Loading
+10 −10
Original line number Diff line number Diff line
@@ -4373,7 +4373,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
	return ret;
}

static int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
			u32 cpos, u32 len, handle_t *handle,
			struct ocfs2_alloc_context *meta_ac,
			struct ocfs2_cached_dealloc_ctxt *dealloc)
@@ -4506,7 +4506,7 @@ static int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
	return ret;
}

static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
{
	struct buffer_head *tl_bh = osb->osb_tl_bh;
	struct ocfs2_dinode *di;
@@ -4539,7 +4539,7 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
	return current_tail == new_start;
}

static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
int ocfs2_truncate_log_append(struct ocfs2_super *osb,
			      handle_t *handle,
			      u64 start_blk,
			      unsigned int num_clusters)
@@ -4698,7 +4698,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
}

/* Expects you to already be holding tl_inode->i_mutex */
static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
{
	int status;
	unsigned int num_to_flush;
+10 −0
Original line number Diff line number Diff line
@@ -41,6 +41,10 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
			      handle_t *handle, u32 cpos, u32 len, u32 phys,
			      struct ocfs2_alloc_context *meta_ac,
			      struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
			u32 cpos, u32 len, handle_t *handle,
			struct ocfs2_alloc_context *meta_ac,
			struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
			   struct inode *inode,
			   struct ocfs2_dinode *fe);
@@ -68,6 +72,12 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
				      struct ocfs2_dinode **tl_copy);
int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
					 struct ocfs2_dinode *tl_copy);
int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb);
int ocfs2_truncate_log_append(struct ocfs2_super *osb,
			      handle_t *handle,
			      u64 start_blk,
			      unsigned int num_clusters);
int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);

/*
 * Process local structure which describes the block unlinks done
+240 −2
Original line number Diff line number Diff line
@@ -541,13 +541,16 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
			  struct ocfs2_alloc_context **data_ac,
			  struct ocfs2_alloc_context **meta_ac)
{
	int ret, num_free_extents;
	int ret = 0, num_free_extents;
	unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

	*meta_ac = NULL;
	if (data_ac)
		*data_ac = NULL;

	BUG_ON(clusters_to_add != 0 && data_ac == NULL);

	mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
	     "clusters_to_add = %u, extents_to_split = %u\n",
	     (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
@@ -583,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
		}
	}

	if (clusters_to_add == 0)
		goto out;

	ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
	if (ret < 0) {
		if (ret != -ENOSPC)
@@ -1252,6 +1258,238 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
	return ret;
}

static int __ocfs2_remove_inode_range(struct inode *inode,
				      struct buffer_head *di_bh,
				      u32 cpos, u32 phys_cpos, u32 len,
				      struct ocfs2_cached_dealloc_ctxt *dealloc)
{
	int ret;
	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct inode *tl_inode = osb->osb_tl_inode;
	handle_t *handle;
	struct ocfs2_alloc_context *meta_ac = NULL;
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;

	ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac);
	if (ret) {
		mlog_errno(ret);
		return ret;
	}

	mutex_lock(&tl_inode->i_mutex);

	if (ocfs2_truncate_log_needs_flush(osb)) {
		ret = __ocfs2_flush_truncate_log(osb);
		if (ret < 0) {
			mlog_errno(ret);
			goto out;
		}
	}

	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
	if (handle == NULL) {
		ret = -ENOMEM;
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, di_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac,
				  dealloc);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	OCFS2_I(inode)->ip_clusters -= len;
	di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);

	ret = ocfs2_journal_dirty(handle, di_bh);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
	if (ret)
		mlog_errno(ret);

out_commit:
	ocfs2_commit_trans(osb, handle);
out:
	mutex_unlock(&tl_inode->i_mutex);

	if (meta_ac)
		ocfs2_free_alloc_context(meta_ac);

	return ret;
}

/*
 * Truncate a byte range, avoiding pages within partial clusters. This
 * preserves those pages for the zeroing code to write to.
 */
static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
					 u64 byte_len)
{
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	loff_t start, end;
	struct address_space *mapping = inode->i_mapping;

	start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
	end = byte_start + byte_len;
	end = end & ~(osb->s_clustersize - 1);

	if (start < end) {
		unmap_mapping_range(mapping, start, end - start, 0);
		truncate_inode_pages_range(mapping, start, end - 1);
	}
}

static int ocfs2_zero_partial_clusters(struct inode *inode,
				       u64 start, u64 len)
{
	int ret = 0;
	u64 tmpend, end = start + len;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	unsigned int csize = osb->s_clustersize;
	handle_t *handle;

	/*
	 * The "start" and "end" values are NOT necessarily part of
	 * the range whose allocation is being deleted. Rather, this
	 * is what the user passed in with the request. We must zero
	 * partial clusters here. There's no need to worry about
	 * physical allocation - the zeroing code knows to skip holes.
	 */
	mlog(0, "byte start: %llu, end: %llu\n",
	     (unsigned long long)start, (unsigned long long)end);

	/*
	 * If both edges are on a cluster boundary then there's no
	 * zeroing required as the region is part of the allocation to
	 * be truncated.
	 */
	if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
		goto out;

	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
	if (handle == NULL) {
		ret = -ENOMEM;
		mlog_errno(ret);
		goto out;
	}

	/*
	 * We want to get the byte offset of the end of the 1st cluster.
	 */
	tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
	if (tmpend > end)
		tmpend = end;

	mlog(0, "1st range: start: %llu, tmpend: %llu\n",
	     (unsigned long long)start, (unsigned long long)tmpend);

	ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
	if (ret)
		mlog_errno(ret);

	if (tmpend < end) {
		/*
		 * This may make start and end equal, but the zeroing
		 * code will skip any work in that case so there's no
		 * need to catch it up here.
		 */
		start = end & ~(osb->s_clustersize - 1);

		mlog(0, "2nd range: start: %llu, end: %llu\n",
		     (unsigned long long)start, (unsigned long long)end);

		ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
		if (ret)
			mlog_errno(ret);
	}

	ocfs2_commit_trans(osb, handle);
out:
	return ret;
}

static int ocfs2_remove_inode_range(struct inode *inode,
				    struct buffer_head *di_bh, u64 byte_start,
				    u64 byte_len)
{
	int ret = 0;
	u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct ocfs2_cached_dealloc_ctxt dealloc;

	ocfs2_init_dealloc_ctxt(&dealloc);

	if (byte_len == 0)
		return 0;

	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
	trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
	if (trunc_len >= trunc_start)
		trunc_len -= trunc_start;
	else
		trunc_len = 0;

	mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n",
	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
	     (unsigned long long)byte_start,
	     (unsigned long long)byte_len, trunc_start, trunc_len);

	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	cpos = trunc_start;
	while (trunc_len) {
		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
					 &alloc_size, NULL);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		if (alloc_size > trunc_len)
			alloc_size = trunc_len;

		/* Only do work for non-holes */
		if (phys_cpos != 0) {
			ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,
							 phys_cpos, alloc_size,
							 &dealloc);
			if (ret) {
				mlog_errno(ret);
				goto out;
			}
		}

		cpos += alloc_size;
		trunc_len -= alloc_size;
	}

	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);

out:
	ocfs2_schedule_truncate_log_flush(osb, 1);
	ocfs2_run_deallocs(osb, &dealloc);

	return ret;
}

static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
					 loff_t *ppos,
					 size_t count,
+2 −0
Original line number Diff line number Diff line
@@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle,
#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE 		      \
					 + OCFS2_TRUNCATE_LOG_UPDATE)

#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS)

/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
 * bitmap block for the new bit) */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)