Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3bf7878f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-4.13-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The main item here is support for v12.y.z ("Luminous") clusters:
  RESEND_ON_SPLIT, RADOS_BACKOFF, OSDMAP_PG_UPMAP and CRUSH_CHOOSE_ARGS
  feature bits, and various other changes in the RADOS client protocol.

  On top of that we have a new fsc mount option to allow supplying
  fscache uniquifier (similar to NFS) and the usual pile of filesystem
  fixes from Zheng"

* tag 'ceph-for-4.13-rc1' of git://github.com/ceph/ceph-client: (44 commits)
  libceph: advertise support for NEW_OSDOP_ENCODING and SERVER_LUMINOUS
  libceph: osd_state is 32 bits wide in luminous
  crush: remove an obsolete comment
  crush: crush_init_workspace starts with struct crush_work
  libceph, crush: per-pool crush_choose_arg_map for crush_do_rule()
  crush: implement weight and id overrides for straw2
  libceph: apply_upmap()
  libceph: compute actual pgid in ceph_pg_to_up_acting_osds()
  libceph: pg_upmap[_items] infrastructure
  libceph: ceph_decode_skip_* helpers
  libceph: kill __{insert,lookup,remove}_pg_mapping()
  libceph: introduce and switch to decode_pg_mapping()
  libceph: don't pass pgid by value
  libceph: respect RADOS_BACKOFF backoffs
  libceph: make DEFINE_RB_* helpers more general
  libceph: avoid unnecessary pi lookups in calc_target()
  libceph: use target pi for calc_target() calculations
  libceph: always populate t->target_{oid,oloc} in calc_target()
  libceph: make sure need_resend targets reflect latest map
  libceph: delete from need_resend_linger before check_linger_pool_dne()
  ...
parents 07d306c8 33e9c8db
Loading
Loading
Loading
Loading
+9 −12
Original line number Diff line number Diff line
@@ -530,14 +530,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
	long writeback_stat;
	u64 truncate_size;
	u32 truncate_seq;
	int err = 0, len = PAGE_SIZE;
	int err, len = PAGE_SIZE;

	dout("writepage %p idx %lu\n", page, page->index);

	if (!page->mapping || !page->mapping->host) {
		dout("writepage %p - no mapping\n", page);
		return -EFAULT;
	}
	inode = page->mapping->host;
	ci = ceph_inode(inode);
	fsc = ceph_inode_to_client(inode);
@@ -547,7 +543,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
	snapc = page_snap_context(page);
	if (snapc == NULL) {
		dout("writepage %p page %p not dirty?\n", inode, page);
		goto out;
		return 0;
	}
	oldest = get_oldest_context(inode, &snap_size,
				    &truncate_size, &truncate_seq);
@@ -555,9 +551,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
		dout("writepage %p page %p snapc %p not writeable - noop\n",
		     inode, page, snapc);
		/* we should only noop if called by kswapd */
		WARN_ON((current->flags & PF_MEMALLOC) == 0);
		WARN_ON(!(current->flags & PF_MEMALLOC));
		ceph_put_snap_context(oldest);
		goto out;
		redirty_page_for_writepage(wbc, page);
		return 0;
	}
	ceph_put_snap_context(oldest);

@@ -567,8 +564,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
	/* is this a partial page at end of file? */
	if (page_off >= snap_size) {
		dout("%p page eof %llu\n", page, snap_size);
		goto out;
		return 0;
	}

	if (snap_size < page_off + len)
		len = snap_size - page_off;

@@ -595,7 +593,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
			dout("writepage interrupted page %p\n", page);
			redirty_page_for_writepage(wbc, page);
			end_page_writeback(page);
			goto out;
			return err;
		}
		dout("writepage setting page/mapping error %d %p\n",
		     err, page);
@@ -611,7 +609,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
	end_page_writeback(page);
	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
	ceph_put_snap_context(snapc);  /* page's reference */
out:
	return err;
}

@@ -1318,7 +1315,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
			  struct page *page, void *fsdata)
{
	struct inode *inode = file_inode(file);
	int check_cap = 0;
	bool check_cap = false;

	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
	     inode, page, (int)pos, (int)copied, (int)len);
+83 −9
Original line number Diff line number Diff line
@@ -35,18 +35,34 @@ struct fscache_netfs ceph_cache_netfs = {
	.version	= 0,
};

static DEFINE_MUTEX(ceph_fscache_lock);
static LIST_HEAD(ceph_fscache_list);

struct ceph_fscache_entry {
	struct list_head list;
	struct fscache_cookie *fscache;
	struct ceph_fsid fsid;
	size_t uniq_len;
	char uniquifier[0];
};

static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
					     void *buffer, uint16_t maxbuf)
{
	const struct ceph_fs_client* fsc = cookie_netfs_data;
	uint16_t klen;
	const char *fscache_uniq = fsc->mount_options->fscache_uniq;
	uint16_t fsid_len, uniq_len;

	klen = sizeof(fsc->client->fsid);
	if (klen > maxbuf)
	fsid_len = sizeof(fsc->client->fsid);
	uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
	if (fsid_len + uniq_len > maxbuf)
		return 0;

	memcpy(buffer, &fsc->client->fsid, klen);
	return klen;
	memcpy(buffer, &fsc->client->fsid, fsid_len);
	if (uniq_len)
		memcpy(buffer + fsid_len, fscache_uniq, uniq_len);

	return fsid_len + uniq_len;
}

static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
@@ -67,13 +83,54 @@ void ceph_fscache_unregister(void)

int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
{
	const struct ceph_fsid *fsid = &fsc->client->fsid;
	const char *fscache_uniq = fsc->mount_options->fscache_uniq;
	size_t uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
	struct ceph_fscache_entry *ent;
	int err = 0;

	mutex_lock(&ceph_fscache_lock);
	list_for_each_entry(ent, &ceph_fscache_list, list) {
		if (memcmp(&ent->fsid, fsid, sizeof(*fsid)))
			continue;
		if (ent->uniq_len != uniq_len)
			continue;
		if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len))
			continue;

		pr_err("fscache cookie already registered for fsid %pU\n", fsid);
		pr_err("  use fsc=%%s mount option to specify a uniquifier\n");
		err = -EBUSY;
		goto out_unlock;
	}

	ent = kzalloc(sizeof(*ent) + uniq_len, GFP_KERNEL);
	if (!ent) {
		err = -ENOMEM;
		goto out_unlock;
	}

	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
					      &ceph_fscache_fsid_object_def,
					      fsc, true);
	if (!fsc->fscache)
		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);

	return 0;
	if (fsc->fscache) {
		memcpy(&ent->fsid, fsid, sizeof(*fsid));
		if (uniq_len > 0) {
			memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
			ent->uniq_len = uniq_len;
		}
		ent->fscache = fsc->fscache;
		list_add_tail(&ent->list, &ceph_fscache_list);
	} else {
		kfree(ent);
		pr_err("unable to register fscache cookie for fsid %pU\n",
		       fsid);
		/* all other fs ignore this error */
	}
out_unlock:
	mutex_unlock(&ceph_fscache_lock);
	return err;
}

static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
@@ -349,7 +406,24 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)

void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
{
	fscache_relinquish_cookie(fsc->fscache, 0);
	if (fscache_cookie_valid(fsc->fscache)) {
		struct ceph_fscache_entry *ent;
		bool found = false;

		mutex_lock(&ceph_fscache_lock);
		list_for_each_entry(ent, &ceph_fscache_list, list) {
			if (ent->fscache == fsc->fscache) {
				list_del(&ent->list);
				kfree(ent);
				found = true;
				break;
			}
		}
		WARN_ON_ONCE(!found);
		mutex_unlock(&ceph_fscache_lock);

		__fscache_relinquish_cookie(fsc->fscache, 0);
	}
	fsc->fscache = NULL;
}

+33 −7
Original line number Diff line number Diff line
@@ -1653,6 +1653,21 @@ static int try_nonblocking_invalidate(struct inode *inode)
	return -1;
}

bool __ceph_should_report_size(struct ceph_inode_info *ci)
{
	loff_t size = ci->vfs_inode.i_size;
	/* mds will adjust max size according to the reported size */
	if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
		return false;
	if (size >= ci->i_max_size)
		return true;
	/* half of previous max_size increment has been used */
	if (ci->i_max_size > ci->i_reported_size &&
	    (size << 1) >= ci->i_max_size + ci->i_reported_size)
		return true;
	return false;
}

/*
 * Swiss army knife function to examine currently used and wanted
 * versus held caps.  Release, flush, ack revoked caps to mds as
@@ -1806,8 +1821,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
			}

			/* approaching file_max? */
			if ((inode->i_size << 1) >= ci->i_max_size &&
			    (ci->i_reported_size << 1) < ci->i_max_size) {
			if (__ceph_should_report_size(ci)) {
				dout("i_size approaching max_size\n");
				goto ack;
			}
@@ -3027,8 +3041,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
					le32_to_cpu(grant->truncate_seq),
					le64_to_cpu(grant->truncate_size),
					size);
		/* max size increase? */
		if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
	}

	if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) {
		if (max_size != ci->i_max_size) {
			dout("max_size %lld -> %llu\n",
			     ci->i_max_size, max_size);
			ci->i_max_size = max_size;
@@ -3037,6 +3053,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
				ci->i_requested_max_size = 0;
			}
			wake = true;
		} else if (ci->i_wanted_max_size > ci->i_max_size &&
			   ci->i_wanted_max_size > ci->i_requested_max_size) {
			/* CEPH_CAP_OP_IMPORT */
			wake = true;
		}
	}

@@ -3554,7 +3574,6 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
	}

	/* make sure we re-request max_size, if necessary */
	ci->i_wanted_max_size = 0;
	ci->i_requested_max_size = 0;

	*old_issued = issued;
@@ -3790,6 +3809,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 */
void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
{
	struct inode *inode;
	struct ceph_inode_info *ci;
	int flags = CHECK_CAPS_NODELAY;

@@ -3805,9 +3825,15 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
		    time_before(jiffies, ci->i_hold_caps_max))
			break;
		list_del_init(&ci->i_cap_delay_list);

		inode = igrab(&ci->vfs_inode);
		spin_unlock(&mdsc->cap_delay_lock);
		dout("check_delayed_caps on %p\n", &ci->vfs_inode);

		if (inode) {
			dout("check_delayed_caps on %p\n", inode);
			ceph_check_caps(ci, flags, NULL);
			iput(inode);
		}
	}
	spin_unlock(&mdsc->cap_delay_lock);
}
+1 −1
Original line number Diff line number Diff line
@@ -1040,8 +1040,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
	int num_pages;
	int written = 0;
	int flags;
	int check_caps = 0;
	int ret;
	bool check_caps = false;
	struct timespec mtime = current_time(inode);
	size_t count = iov_iter_count(from);

+10 −8
Original line number Diff line number Diff line
@@ -1016,6 +1016,7 @@ static void update_dentry_lease(struct dentry *dentry,
	long unsigned ttl = from_time + (duration * HZ) / 1000;
	long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
	struct inode *dir;
	struct ceph_mds_session *old_lease_session = NULL;

	/*
	 * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
@@ -1051,8 +1052,10 @@ static void update_dentry_lease(struct dentry *dentry,
	    time_before(ttl, di->time))
		goto out_unlock;  /* we already have a newer lease. */

	if (di->lease_session && di->lease_session != session)
		goto out_unlock;
	if (di->lease_session && di->lease_session != session) {
		old_lease_session = di->lease_session;
		di->lease_session = NULL;
	}

	ceph_dentry_lru_touch(dentry);

@@ -1065,6 +1068,8 @@ static void update_dentry_lease(struct dentry *dentry,
	di->time = ttl;
out_unlock:
	spin_unlock(&dentry->d_lock);
	if (old_lease_session)
		ceph_put_mds_session(old_lease_session);
	return;
}

@@ -1653,20 +1658,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
	return err;
}

int ceph_inode_set_size(struct inode *inode, loff_t size)
bool ceph_inode_set_size(struct inode *inode, loff_t size)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	int ret = 0;
	bool ret;

	spin_lock(&ci->i_ceph_lock);
	dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
	i_size_write(inode, size);
	inode->i_blocks = calc_inode_blocks(size);

	/* tell the MDS if we are approaching max_size */
	if ((size << 1) >= ci->i_max_size &&
	    (ci->i_reported_size << 1) < ci->i_max_size)
		ret = 1;
	ret = __ceph_should_report_size(ci);

	spin_unlock(&ci->i_ceph_lock);
	return ret;
Loading