Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f66fd9f0 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov
Browse files

ceph: pre-allocate data structure that tracks caps flushing



Signed-off-by: default avatarYan, Zheng <zyan@redhat.com>
parent e548e9b9
Loading
Loading
Loading
Loading
+15 −4
Original line number Diff line number Diff line
@@ -1308,12 +1308,17 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
	struct inode *inode = file_inode(vma->vm_file);
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_file_info *fi = vma->vm_file->private_data;
	struct ceph_cap_flush *prealloc_cf;
	struct page *page = vmf->page;
	loff_t off = page_offset(page);
	loff_t size = i_size_read(inode);
	size_t len;
	int want, got, ret;

	prealloc_cf = ceph_alloc_cap_flush();
	if (!prealloc_cf)
		return VM_FAULT_SIGBUS;

	if (ci->i_inline_version != CEPH_INLINE_NONE) {
		struct page *locked_page = NULL;
		if (off == 0) {
@@ -1323,8 +1328,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
		ret = ceph_uninline_data(vma->vm_file, locked_page);
		if (locked_page)
			unlock_page(locked_page);
		if (ret < 0)
			return VM_FAULT_SIGBUS;
		if (ret < 0) {
			ret = VM_FAULT_SIGBUS;
			goto out_free;
		}
	}

	if (off + PAGE_CACHE_SIZE <= size)
@@ -1346,7 +1353,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
			break;
		if (ret != -ERESTARTSYS) {
			WARN_ON(1);
			return VM_FAULT_SIGBUS;
			ret = VM_FAULT_SIGBUS;
			goto out_free;
		}
	}
	dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
@@ -1381,7 +1389,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
		int dirty;
		spin_lock(&ci->i_ceph_lock);
		ci->i_inline_version = CEPH_INLINE_NONE;
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
					       &prealloc_cf);
		spin_unlock(&ci->i_ceph_lock);
		if (dirty)
			__mark_inode_dirty(inode, dirty);
@@ -1390,6 +1399,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
	dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
	     inode, off, len, ceph_cap_string(got), ret);
	ceph_put_cap_refs(ci, got);
out_free:
	ceph_free_cap_flush(prealloc_cf);

	return ret;
}
+22 −4
Original line number Diff line number Diff line
@@ -1356,7 +1356,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 * Caller is then responsible for calling __mark_inode_dirty with the
 * returned flags value.
 */
int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
			   struct ceph_cap_flush **pcf)
{
	struct ceph_mds_client *mdsc =
		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1376,6 +1377,9 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
	     ceph_cap_string(was | mask));
	ci->i_dirty_caps |= mask;
	if (was == 0) {
		WARN_ON_ONCE(ci->i_prealloc_cap_flush);
		swap(ci->i_prealloc_cap_flush, *pcf);

		if (!ci->i_head_snapc) {
			WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem));
			ci->i_head_snapc = ceph_get_snap_context(
@@ -1391,6 +1395,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
			ihold(inode);
			dirty |= I_DIRTY_SYNC;
		}
	} else {
		WARN_ON_ONCE(!ci->i_prealloc_cap_flush);
	}
	BUG_ON(list_empty(&ci->i_dirty_item));
	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
@@ -1446,6 +1452,17 @@ static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc,
	rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree);
}

struct ceph_cap_flush *ceph_alloc_cap_flush(void)
{
	return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
}

void ceph_free_cap_flush(struct ceph_cap_flush *cf)
{
	if (cf)
		kmem_cache_free(ceph_cap_flush_cachep, cf);
}

static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc)
{
	struct rb_node *n = rb_first(&mdsc->cap_flush_tree);
@@ -1469,11 +1486,12 @@ static int __mark_caps_flushing(struct inode *inode,
{
	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_cap_flush *cf;
	struct ceph_cap_flush *cf = NULL;
	int flushing;

	BUG_ON(ci->i_dirty_caps == 0);
	BUG_ON(list_empty(&ci->i_dirty_item));
	BUG_ON(!ci->i_prealloc_cap_flush);

	flushing = ci->i_dirty_caps;
	dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
@@ -1484,7 +1502,7 @@ static int __mark_caps_flushing(struct inode *inode,
	ci->i_dirty_caps = 0;
	dout(" inode %p now !dirty\n", inode);

	cf = kmalloc(sizeof(*cf), GFP_ATOMIC);
	swap(cf, ci->i_prealloc_cap_flush);
	cf->caps = flushing;
	cf->kick = false;

@@ -3075,7 +3093,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
		cf = list_first_entry(&to_remove,
				      struct ceph_cap_flush, list);
		list_del(&cf->list);
		kfree(cf);
		ceph_free_cap_flush(cf);
	}
	if (drop)
		iput(inode);
+16 −2
Original line number Diff line number Diff line
@@ -939,6 +939,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_osd_client *osdc =
		&ceph_sb_to_client(inode->i_sb)->client->osdc;
	struct ceph_cap_flush *prealloc_cf;
	ssize_t count, written = 0;
	int err, want, got;
	loff_t pos;
@@ -946,6 +947,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
	if (ceph_snap(inode) != CEPH_NOSNAP)
		return -EROFS;

	prealloc_cf = ceph_alloc_cap_flush();
	if (!prealloc_cf)
		return -ENOMEM;

	mutex_lock(&inode->i_mutex);

	/* We can write back this queue in page reclaim */
@@ -1050,7 +1055,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
		int dirty;
		spin_lock(&ci->i_ceph_lock);
		ci->i_inline_version = CEPH_INLINE_NONE;
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
					       &prealloc_cf);
		spin_unlock(&ci->i_ceph_lock);
		if (dirty)
			__mark_inode_dirty(inode, dirty);
@@ -1074,6 +1080,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
out:
	mutex_unlock(&inode->i_mutex);
out_unlocked:
	ceph_free_cap_flush(prealloc_cf);
	current->backing_dev_info = NULL;
	return written ? written : err;
}
@@ -1270,6 +1277,7 @@ static long ceph_fallocate(struct file *file, int mode,
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_osd_client *osdc =
		&ceph_inode_to_client(inode)->client->osdc;
	struct ceph_cap_flush *prealloc_cf;
	int want, got = 0;
	int dirty;
	int ret = 0;
@@ -1282,6 +1290,10 @@ static long ceph_fallocate(struct file *file, int mode,
	if (!S_ISREG(inode->i_mode))
		return -EOPNOTSUPP;

	prealloc_cf = ceph_alloc_cap_flush();
	if (!prealloc_cf)
		return -ENOMEM;

	mutex_lock(&inode->i_mutex);

	if (ceph_snap(inode) != CEPH_NOSNAP) {
@@ -1328,7 +1340,8 @@ static long ceph_fallocate(struct file *file, int mode,
	if (!ret) {
		spin_lock(&ci->i_ceph_lock);
		ci->i_inline_version = CEPH_INLINE_NONE;
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
					       &prealloc_cf);
		spin_unlock(&ci->i_ceph_lock);
		if (dirty)
			__mark_inode_dirty(inode, dirty);
@@ -1337,6 +1350,7 @@ static long ceph_fallocate(struct file *file, int mode,
	ceph_put_cap_refs(ci, got);
unlock:
	mutex_unlock(&inode->i_mutex);
	ceph_free_cap_flush(prealloc_cf);
	return ret;
}

+13 −2
Original line number Diff line number Diff line
@@ -416,6 +416,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
	ci->i_flushing_caps = 0;
	INIT_LIST_HEAD(&ci->i_dirty_item);
	INIT_LIST_HEAD(&ci->i_flushing_item);
	ci->i_prealloc_cap_flush = NULL;
	ci->i_cap_flush_tree = RB_ROOT;
	init_waitqueue_head(&ci->i_cap_wq);
	ci->i_hold_caps_min = 0;
@@ -1720,6 +1721,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
	const unsigned int ia_valid = attr->ia_valid;
	struct ceph_mds_request *req;
	struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
	struct ceph_cap_flush *prealloc_cf;
	int issued;
	int release = 0, dirtied = 0;
	int mask = 0;
@@ -1734,10 +1736,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
	if (err != 0)
		return err;

	prealloc_cf = ceph_alloc_cap_flush();
	if (!prealloc_cf)
		return -ENOMEM;

	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR,
				       USE_AUTH_MDS);
	if (IS_ERR(req))
	if (IS_ERR(req)) {
		ceph_free_cap_flush(prealloc_cf);
		return PTR_ERR(req);
	}

	spin_lock(&ci->i_ceph_lock);
	issued = __ceph_caps_issued(ci, NULL);
@@ -1895,7 +1903,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
		dout("setattr %p ATTR_FILE ... hrm!\n", inode);

	if (dirtied) {
		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
							   &prealloc_cf);
		inode->i_ctime = CURRENT_TIME;
	}

@@ -1927,9 +1936,11 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
	ceph_mdsc_put_request(req);
	if (mask & CEPH_SETATTR_SIZE)
		__ceph_do_pending_vmtruncate(inode);
	ceph_free_cap_flush(prealloc_cf);
	return err;
out_put:
	ceph_mdsc_put_request(req);
	ceph_free_cap_flush(prealloc_cf);
	return err;
}

+5 −1
Original line number Diff line number Diff line
@@ -1189,6 +1189,10 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
		}
		spin_unlock(&mdsc->cap_dirty_lock);

		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
			list_add(&ci->i_prealloc_cap_flush->list, &to_remove);
			ci->i_prealloc_cap_flush = NULL;
		}
	}
	spin_unlock(&ci->i_ceph_lock);
	while (!list_empty(&to_remove)) {
@@ -1196,7 +1200,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
		cf = list_first_entry(&to_remove,
				      struct ceph_cap_flush, list);
		list_del(&cf->list);
		kfree(cf);
		ceph_free_cap_flush(cf);
	}
	while (drop--)
		iput(inode);
Loading