Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ba5b56cb authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (23 commits)
  ceph: document unlocked d_parent accesses
  ceph: explicitly reference rename old_dentry parent dir in request
  ceph: document locking for ceph_set_dentry_offset
  ceph: avoid d_parent in ceph_dentry_hash; fix ceph_encode_fh() hashing bug
  ceph: protect d_parent access in ceph_d_revalidate
  ceph: protect access to d_parent
  ceph: handle racing calls to ceph_init_dentry
  ceph: set dir complete frag after adding capability
  rbd: set blk_queue request sizes to object size
  ceph: set up readahead size when rsize is not passed
  rbd: cancel watch request when releasing the device
  ceph: ignore lease mask
  ceph: fix ceph_lookup_open intent usage
  ceph: only link open operations to directory unsafe list if O_CREAT|O_TRUNC
  ceph: fix bad parent_inode calc in ceph_lookup_open
  ceph: avoid carrying Fw cap during write into page cache
  libceph: don't time out osd requests that haven't been received
  ceph: report f_bfree based on kb_avail rather than diffing.
  ceph: only queue capsnap if caps are dirty
  ceph: fix snap writeback when racing with writes
  ...
parents 243dd280 d79698da
Loading
Loading
Loading
Loading
+45 −1
Original line number Diff line number Diff line
@@ -629,6 +629,14 @@ static int rbd_get_num_segments(struct rbd_image_header *header,
	return end_seg - start_seg + 1;
}

/*
 * returns the size of an object in the image
 */
static u64 rbd_obj_bytes(struct rbd_image_header *header)
{
	return 1 << header->obj_order;
}

/*
 * bio helpers
 */
@@ -1253,6 +1261,35 @@ fail:
	return ret;
}

/*
 * Request sync osd unwatch
 */
static int rbd_req_sync_unwatch(struct rbd_device *dev,
				const char *obj)
{
	struct ceph_osd_req_op *ops;

	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
	if (ret < 0)
		return ret;

	ops[0].watch.ver = 0;
	ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
	ops[0].watch.flag = 0;

	ret = rbd_req_sync_op(dev, NULL,
			      CEPH_NOSNAP,
			      0,
			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
			      ops,
			      1, obj, 0, 0, NULL, NULL, NULL);

	rbd_destroy_ops(ops);
	ceph_osdc_cancel_event(dev->watch_event);
	dev->watch_event = NULL;
	return ret;
}

struct rbd_notify_info {
	struct rbd_device *dev;
};
@@ -1736,6 +1773,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
	q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
	if (!q)
		goto out_disk;

	/* set io sizes to object size */
	blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL);
	blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header));
	blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header));
	blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header));

	blk_queue_merge_bvec(q, rbd_merge_bvec);
	disk->queue = q;

@@ -2290,7 +2334,7 @@ static void rbd_dev_release(struct device *dev)
		ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc,
						    rbd_dev->watch_request);
	if (rbd_dev->watch_event)
		ceph_osdc_cancel_event(rbd_dev->watch_event);
		rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);

	rbd_put_client(rbd_dev);

+1 −1
Original line number Diff line number Diff line
@@ -102,7 +102,7 @@ static int mdsc_show(struct seq_file *s, void *p)
				path = NULL;
			spin_lock(&req->r_old_dentry->d_lock);
			seq_printf(s, " #%llx/%.*s (%s)",
			   ceph_ino(req->r_old_dentry->d_parent->d_inode),
			   ceph_ino(req->r_old_dentry_dir),
				   req->r_old_dentry->d_name.len,
				   req->r_old_dentry->d_name.name,
				   path ? path : "");
+73 −43
Original line number Diff line number Diff line
@@ -40,14 +40,6 @@ int ceph_init_dentry(struct dentry *dentry)
	if (dentry->d_fsdata)
		return 0;

	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
		d_set_d_op(dentry, &ceph_dentry_ops);
	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
		d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
	else
		d_set_d_op(dentry, &ceph_snap_dentry_ops);

	di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
	if (!di)
		return -ENOMEM;          /* oh well */
@@ -58,16 +50,42 @@ int ceph_init_dentry(struct dentry *dentry)
		kmem_cache_free(ceph_dentry_cachep, di);
		goto out_unlock;
	}

	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
		d_set_d_op(dentry, &ceph_dentry_ops);
	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
		d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
	else
		d_set_d_op(dentry, &ceph_snap_dentry_ops);

	di->dentry = dentry;
	di->lease_session = NULL;
	dentry->d_fsdata = di;
	dentry->d_time = jiffies;
	/* avoid reordering d_fsdata setup so that the check above is safe */
	smp_mb();
	dentry->d_fsdata = di;
	ceph_dentry_lru_add(dentry);
out_unlock:
	spin_unlock(&dentry->d_lock);
	return 0;
}

struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
{
	struct inode *inode = NULL;

	if (!dentry)
		return NULL;

	spin_lock(&dentry->d_lock);
	if (dentry->d_parent) {
		inode = dentry->d_parent->d_inode;
		ihold(inode);
	}
	spin_unlock(&dentry->d_lock);
	return inode;
}


/*
@@ -133,7 +151,7 @@ more:
		     d_unhashed(dentry) ? "!hashed" : "hashed",
		     parent->d_subdirs.prev, parent->d_subdirs.next);
		if (p == &parent->d_subdirs) {
			fi->at_end = 1;
			fi->flags |= CEPH_F_ATEND;
			goto out_unlock;
		}
		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -234,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
	const int max_bytes = fsc->mount_options->max_readdir_bytes;

	dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
	if (fi->at_end)
	if (fi->flags & CEPH_F_ATEND)
		return 0;

	/* always start with . and .. */
@@ -403,7 +421,7 @@ more:
		dout("readdir next frag is %x\n", frag);
		goto more;
	}
	fi->at_end = 1;
	fi->flags |= CEPH_F_ATEND;

	/*
	 * if dir_release_count still matches the dir, no dentries
@@ -435,7 +453,7 @@ static void reset_readdir(struct ceph_file_info *fi)
		dput(fi->dentry);
		fi->dentry = NULL;
	}
	fi->at_end = 0;
	fi->flags &= ~CEPH_F_ATEND;
}

static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
@@ -463,7 +481,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
		if (offset != file->f_pos) {
			file->f_pos = offset;
			file->f_version = 0;
			fi->at_end = 0;
			fi->flags &= ~CEPH_F_ATEND;
		}
		retval = offset;

@@ -488,21 +506,13 @@ out:
}

/*
 * Process result of a lookup/open request.
 *
 * Mainly, make sure we return the final req->r_dentry (if it already
 * existed) in place of the original VFS-provided dentry when they
 * differ.
 *
 * Gracefully handle the case where the MDS replies with -ENOENT and
 * no trace (which it may do, at its discretion, e.g., if it doesn't
 * care to issue a lease on the negative dentry).
 * Handle lookups for the hidden .snap directory.
 */
struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
int ceph_handle_snapdir(struct ceph_mds_request *req,
			struct dentry *dentry, int err)
{
	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
	struct inode *parent = dentry->d_parent->d_inode;
	struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */

	/* .snap dir? */
	if (err == -ENOENT &&
@@ -516,7 +526,23 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
		d_add(dentry, inode);
		err = 0;
	}
	return err;
}

/*
 * Figure out final result of a lookup/open request.
 *
 * Mainly, make sure we return the final req->r_dentry (if it already
 * existed) in place of the original VFS-provided dentry when they
 * differ.
 *
 * Gracefully handle the case where the MDS replies with -ENOENT and
 * no trace (which it may do, at its discretion, e.g., if it doesn't
 * care to issue a lease on the negative dentry).
 */
struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
				  struct dentry *dentry, int err)
{
	if (err == -ENOENT) {
		/* no trace? */
		err = 0;
@@ -610,6 +636,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
	req->r_locked_dir = dir;
	err = ceph_mdsc_do_request(mdsc, NULL, req);
	err = ceph_handle_snapdir(req, dentry, err);
	dentry = ceph_finish_lookup(req, dentry, err);
	ceph_mdsc_put_request(req);  /* will dput(dentry) */
	dout("lookup result=%p\n", dentry);
@@ -789,6 +816,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
	req->r_dentry = dget(dentry);
	req->r_num_caps = 2;
	req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
	req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
	req->r_locked_dir = dir;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -887,6 +915,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
	req->r_dentry = dget(new_dentry);
	req->r_num_caps = 2;
	req->r_old_dentry = dget(old_dentry);
	req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
	req->r_locked_dir = new_dir;
	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
	req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -1002,36 +1031,38 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
 */
static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
	int valid = 0;
	struct inode *dir;

	if (nd && nd->flags & LOOKUP_RCU)
		return -ECHILD;

	dir = dentry->d_parent->d_inode;

	dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
	     dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
	     ceph_dentry(dentry)->offset);

	dir = ceph_get_dentry_parent_inode(dentry);

	/* always trust cached snapped dentries, snapdir dentry */
	if (ceph_snap(dir) != CEPH_NOSNAP) {
		dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
		     dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
		goto out_touch;
		valid = 1;
	} else if (dentry->d_inode &&
		   ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
		valid = 1;
	} else if (dentry_lease_is_valid(dentry) ||
		   dir_lease_is_valid(dir, dentry)) {
		valid = 1;
	}
	if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR)
		goto out_touch;

	if (dentry_lease_is_valid(dentry) ||
	    dir_lease_is_valid(dir, dentry))
		goto out_touch;

	dout("d_revalidate %p invalid\n", dentry);
	d_drop(dentry);
	return 0;
out_touch:
	dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
	if (valid)
		ceph_dentry_lru_touch(dentry);
	return 1;
	else
		d_drop(dentry);
	iput(dir);
	return valid;
}

/*
@@ -1228,9 +1259,8 @@ void ceph_dentry_lru_del(struct dentry *dn)
 * Return name hash for a given dentry.  This is dependent on
 * the parent directory's hash function.
 */
unsigned ceph_dentry_hash(struct dentry *dn)
unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
	struct inode *dir = dn->d_parent->d_inode;
	struct ceph_inode_info *dci = ceph_inode(dir);

	switch (dci->i_dir_layout.dl_dir_hash) {
+15 −9
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
	int type;
	struct ceph_nfs_fh *fh = (void *)rawfh;
	struct ceph_nfs_confh *cfh = (void *)rawfh;
	struct dentry *parent = dentry->d_parent;
	struct dentry *parent;
	struct inode *inode = dentry->d_inode;
	int connected_handle_length = sizeof(*cfh)/4;
	int handle_length = sizeof(*fh)/4;
@@ -55,26 +55,33 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
	if (ceph_snap(inode) != CEPH_NOSNAP)
		return -EINVAL;

	spin_lock(&dentry->d_lock);
	parent = dget(dentry->d_parent);
	spin_unlock(&dentry->d_lock);

	if (*max_len >= connected_handle_length) {
		dout("encode_fh %p connectable\n", dentry);
		cfh->ino = ceph_ino(dentry->d_inode);
		cfh->parent_ino = ceph_ino(parent->d_inode);
		cfh->parent_name_hash = ceph_dentry_hash(parent);
		cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
							 dentry);
		*max_len = connected_handle_length;
		type = 2;
	} else if (*max_len >= handle_length) {
		if (connectable) {
			*max_len = connected_handle_length;
			return 255;
		}
			type = 255;
		} else {
			dout("encode_fh %p\n", dentry);
			fh->ino = ceph_ino(dentry->d_inode);
			*max_len = handle_length;
			type = 1;
		}
	} else {
		*max_len = handle_length;
		return 255;
		type = 255;
	}
	dput(parent);
	return type;
}

@@ -123,7 +130,6 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
		return dentry;
	}
	err = ceph_init_dentry(dentry);

	if (err < 0) {
		iput(inode);
		return ERR_PTR(err);
+45 −16
Original line number Diff line number Diff line
@@ -122,7 +122,7 @@ int ceph_open(struct inode *inode, struct file *file)
	struct ceph_mds_client *mdsc = fsc->mdsc;
	struct ceph_mds_request *req;
	struct ceph_file_info *cf = file->private_data;
	struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
	struct inode *parent_inode = NULL;
	int err;
	int flags, fmode, wanted;

@@ -194,7 +194,10 @@ int ceph_open(struct inode *inode, struct file *file)
	req->r_inode = inode;
	ihold(inode);
	req->r_num_caps = 1;
	if (flags & (O_CREAT|O_TRUNC))
		parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
	iput(parent_inode);
	if (!err)
		err = ceph_init_file(inode, file, req->r_fmode);
	ceph_mdsc_put_request(req);
@@ -222,9 +225,9 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
{
	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
	struct ceph_mds_client *mdsc = fsc->mdsc;
	struct file *file = nd->intent.open.file;
	struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
	struct file *file;
	struct ceph_mds_request *req;
	struct dentry *ret;
	int err;
	int flags = nd->intent.open.flags;

@@ -242,16 +245,24 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
		req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
	}
	req->r_locked_dir = dir;           /* caller holds dir->i_mutex */
	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
	dentry = ceph_finish_lookup(req, dentry, err);
	if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
	err = ceph_mdsc_do_request(mdsc,
				   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
				   req);
	err = ceph_handle_snapdir(req, dentry, err);
	if (err)
		goto out;
	if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
		err = ceph_handle_notrace_create(dir, dentry);
	if (!err)
		err = ceph_init_file(req->r_dentry->d_inode, file,
				     req->r_fmode);
	if (err)
		goto out;
	file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
	if (IS_ERR(file))
		err = PTR_ERR(file);
out:
	ret = ceph_finish_lookup(req, dentry, err);
	ceph_mdsc_put_request(req);
	dout("ceph_lookup_open result=%p\n", dentry);
	return dentry;
	dout("ceph_lookup_open result=%p\n", ret);
	return ret;
}

int ceph_release(struct inode *inode, struct file *file)
@@ -643,7 +654,8 @@ again:

	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
	    (iocb->ki_filp->f_flags & O_DIRECT) ||
	    (inode->i_sb->s_flags & MS_SYNCHRONOUS))
	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
	    (fi->flags & CEPH_F_SYNC))
		/* hmm, this isn't really async... */
		ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
	else
@@ -712,7 +724,7 @@ retry_snap:
		want = CEPH_CAP_FILE_BUFFER;
	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
	if (ret < 0)
		goto out;
		goto out_put;

	dout("aio_write %p %llx.%llx %llu~%u  got cap refs on %s\n",
	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
@@ -720,12 +732,23 @@ retry_snap:

	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
	    (iocb->ki_filp->f_flags & O_DIRECT) ||
	    (inode->i_sb->s_flags & MS_SYNCHRONOUS)) {
	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
	    (fi->flags & CEPH_F_SYNC)) {
		ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
			&iocb->ki_pos);
	} else {
		ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
		/*
		 * buffered write; drop Fw early to avoid slow
		 * revocation if we get stuck on balance_dirty_pages
		 */
		int dirty;

		spin_lock(&inode->i_lock);
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
		spin_unlock(&inode->i_lock);
		ceph_put_cap_refs(ci, got);

		ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
		if ((ret >= 0 || ret == -EIOCBQUEUED) &&
		    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
		     || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
@@ -733,7 +756,12 @@ retry_snap:
			if (err < 0)
				ret = err;
		}

		if (dirty)
			__mark_inode_dirty(inode, dirty);
		goto out;
	}

	if (ret >= 0) {
		int dirty;
		spin_lock(&inode->i_lock);
@@ -743,12 +771,13 @@ retry_snap:
			__mark_inode_dirty(inode, dirty);
	}

out:
out_put:
	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
	     ceph_cap_string(got));
	ceph_put_cap_refs(ci, got);

out:
	if (ret == -EOLDSNAPC) {
		dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
		     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
Loading