Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 96e35b40 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  ceph: use separate class for ceph sockets' sk_lock
  ceph: reserve one more caps space when doing readdir
  ceph: queue_cap_snap should always queue dirty context
  ceph: fix dentry reference leak in dcache readdir
  ceph: decode v5 of osdmap (pool names) [protocol change]
  ceph: fix ack counter reset on connection reset
  ceph: fix leaked inode ref due to snap metadata writeback race
  ceph: fix snap context reference leaks
  ceph: allow writeback of snapped pages older than 'oldest' snapc
  ceph: fix dentry rehashing on virtual .snap dir
parents f5c07a2d a6a5349d
Loading
Loading
Loading
Loading
+30 −32
Original line number Diff line number Diff line
@@ -337,16 +337,15 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
/*
 * Get ref for the oldest snapc for an inode with dirty data... that is, the
 * only snap context we are allowed to write back.
 *
 * Caller holds i_lock.
 */
static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
static struct ceph_snap_context *get_oldest_context(struct inode *inode,
						    u64 *snap_size)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_snap_context *snapc = NULL;
	struct ceph_cap_snap *capsnap = NULL;

	spin_lock(&inode->i_lock);
	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
		dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
		     capsnap->context, capsnap->dirty_pages);
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
			break;
		}
	}
	if (!snapc && ci->i_snap_realm) {
		snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
	if (!snapc && ci->i_head_snapc) {
		snapc = ceph_get_snap_context(ci->i_head_snapc);
		dout(" head snapc %p has %d dirty pages\n",
		     snapc, ci->i_wrbuffer_ref_head);
	}
	return snapc;
}

static struct ceph_snap_context *get_oldest_context(struct inode *inode,
						    u64 *snap_size)
{
	struct ceph_snap_context *snapc = NULL;

	spin_lock(&inode->i_lock);
	snapc = __get_oldest_context(inode, snap_size);
	spin_unlock(&inode->i_lock);
	return snapc;
}
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
	int len = PAGE_CACHE_SIZE;
	loff_t i_size;
	int err = 0;
	struct ceph_snap_context *snapc;
	struct ceph_snap_context *snapc, *oldest;
	u64 snap_size = 0;
	long writeback_stat;

@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
		dout("writepage %p page %p not dirty?\n", inode, page);
		goto out;
	}
	if (snapc != get_oldest_context(inode, &snap_size)) {
	oldest = get_oldest_context(inode, &snap_size);
	if (snapc->seq > oldest->seq) {
		dout("writepage %p page %p snapc %p not writeable - noop\n",
		     inode, page, (void *)page->private);
		/* we should only noop if called by kswapd */
		WARN_ON((current->flags & PF_MEMALLOC) == 0);
		ceph_put_snap_context(oldest);
		goto out;
	}
	ceph_put_snap_context(oldest);

	/* is this a partial page at end of file? */
	if (snap_size)
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
	ClearPagePrivate(page);
	end_page_writeback(page);
	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
	ceph_put_snap_context(snapc);
	ceph_put_snap_context(snapc);  /* page's reference */
out:
	return err;
}
@@ -558,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req,
			dout("inode %p skipping page %p\n", inode, page);
			wbc->pages_skipped++;
		}
		ceph_put_snap_context((void *)page->private);
		page->private = 0;
		ClearPagePrivate(page);
		ceph_put_snap_context(snapc);
		dout("unlocking %d %p\n", i, page);
		end_page_writeback(page);

@@ -618,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping,
	int range_whole = 0;
	int should_loop = 1;
	pgoff_t max_pages = 0, max_pages_ever = 0;
	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL;
	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
	struct pagevec pvec;
	int done = 0;
	int rc = 0;
@@ -770,9 +762,10 @@ static int ceph_writepages_start(struct address_space *mapping,
			}

			/* only if matching snap context */
			if (snapc != (void *)page->private) {
				dout("page snapc %p != oldest %p\n",
				     (void *)page->private, snapc);
			pgsnapc = (void *)page->private;
			if (pgsnapc->seq > snapc->seq) {
				dout("page snapc %p %lld > oldest %p %lld\n",
				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
				unlock_page(page);
				if (!locked_pages)
					continue; /* keep looking for snap */
@@ -914,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode,
					   struct ceph_snap_context *snapc)
{
	struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
	return !oldest || snapc->seq <= oldest->seq;
	int ret = !oldest || snapc->seq <= oldest->seq;

	ceph_put_snap_context(oldest);
	return ret;
}

/*
@@ -936,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file,
	int pos_in_page = pos & ~PAGE_CACHE_MASK;
	int end_in_page = pos_in_page + len;
	loff_t i_size;
	struct ceph_snap_context *snapc;
	int r;
	struct ceph_snap_context *snapc, *oldest;

retry_locked:
	/* writepages currently holds page lock, but if we change that later, */
@@ -947,23 +943,24 @@ static int ceph_update_writeable_page(struct file *file,
	BUG_ON(!ci->i_snap_realm);
	down_read(&mdsc->snap_rwsem);
	BUG_ON(!ci->i_snap_realm->cached_context);
	if (page->private &&
	    (void *)page->private != ci->i_snap_realm->cached_context) {
	snapc = (void *)page->private;
	if (snapc && snapc != ci->i_head_snapc) {
		/*
		 * this page is already dirty in another (older) snap
		 * context!  is it writeable now?
		 */
		snapc = get_oldest_context(inode, NULL);
		oldest = get_oldest_context(inode, NULL);
		up_read(&mdsc->snap_rwsem);

		if (snapc != (void *)page->private) {
		if (snapc->seq > oldest->seq) {
			ceph_put_snap_context(oldest);
			dout(" page %p snapc %p not current or oldest\n",
			     page, (void *)page->private);
			     page, snapc);
			/*
			 * queue for writeback, and wait for snapc to
			 * be writeable or written
			 */
			snapc = ceph_get_snap_context((void *)page->private);
			snapc = ceph_get_snap_context(snapc);
			unlock_page(page);
			ceph_queue_writeback(inode);
			r = wait_event_interruptible(ci->i_cap_wq,
@@ -973,6 +970,7 @@ static int ceph_update_writeable_page(struct file *file,
				return r;
			return -EAGAIN;
		}
		ceph_put_snap_context(oldest);

		/* yay, writeable, do it now (without dropping page lock) */
		dout(" page %p snapc %p not current, but oldest\n",
+32 −10
Original line number Diff line number Diff line
@@ -1205,6 +1205,12 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
		if (capsnap->dirty_pages || capsnap->writing)
			continue;

		/*
		 * if cap writeback already occurred, we should have dropped
		 * the capsnap in ceph_put_wrbuffer_cap_refs.
		 */
		BUG_ON(capsnap->dirty == 0);

		/* pick mds, take s_mutex */
		mds = __ceph_get_cap_mds(ci, &mseq);
		if (session && session->s_mds != mds) {
@@ -2118,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
		}
	spin_unlock(&inode->i_lock);

	dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had),
	     last ? "last" : "");
	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
	     last ? " last" : "", put ? " put" : "");

	if (last && !flushsnaps)
		ceph_check_caps(ci, 0, NULL);
@@ -2143,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
{
	struct inode *inode = &ci->vfs_inode;
	int last = 0;
	int last_snap = 0;
	int complete_capsnap = 0;
	int drop_capsnap = 0;
	int found = 0;
	struct ceph_cap_snap *capsnap = NULL;

@@ -2166,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
			if (capsnap->context == snapc) {
				found = 1;
				capsnap->dirty_pages -= nr;
				last_snap = !capsnap->dirty_pages;
				break;
			}
		}
		BUG_ON(!found);
		capsnap->dirty_pages -= nr;
		if (capsnap->dirty_pages == 0) {
			complete_capsnap = 1;
			if (capsnap->dirty == 0)
				/* cap writeback completed before we created
				 * the cap_snap; no FLUSHSNAP is needed */
				drop_capsnap = 1;
		}
		dout("put_wrbuffer_cap_refs on %p cap_snap %p "
		     " snap %lld %d/%d -> %d/%d %s%s\n",
		     " snap %lld %d/%d -> %d/%d %s%s%s\n",
		     inode, capsnap, capsnap->context->seq,
		     ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
		     ci->i_wrbuffer_ref, capsnap->dirty_pages,
		     last ? " (wrbuffer last)" : "",
		     last_snap ? " (capsnap last)" : "");
		     complete_capsnap ? " (complete capsnap)" : "",
		     drop_capsnap ? " (drop capsnap)" : "");
		if (drop_capsnap) {
			ceph_put_snap_context(capsnap->context);
			list_del(&capsnap->ci_item);
			list_del(&capsnap->flushing_item);
			ceph_put_cap_snap(capsnap);
		}
	}

	spin_unlock(&inode->i_lock);
@@ -2186,10 +2206,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
	if (last) {
		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
		iput(inode);
	} else if (last_snap) {
	} else if (complete_capsnap) {
		ceph_flush_snaps(ci);
		wake_up(&ci->i_cap_wq);
	}
	if (drop_capsnap)
		iput(inode);
}

/*
@@ -2465,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
				break;
			}
			WARN_ON(capsnap->dirty_pages || capsnap->writing);
			dout(" removing cap_snap %p follows %lld\n",
			     capsnap, follows);
			dout(" removing %p cap_snap %p follows %lld\n",
			     inode, capsnap, follows);
			ceph_put_snap_context(capsnap->context);
			list_del(&capsnap->ci_item);
			list_del(&capsnap->flushing_item);
+4 −3
Original line number Diff line number Diff line
@@ -171,11 +171,11 @@ static int __dcache_readdir(struct file *filp,
	spin_lock(&inode->i_lock);
	spin_lock(&dcache_lock);

	last = dentry;

	if (err < 0)
		goto out_unlock;

	last = dentry;

	p = p->prev;
	filp->f_pos++;

@@ -312,7 +312,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
		req->r_readdir_offset = fi->next_offset;
		req->r_args.readdir.frag = cpu_to_le32(frag);
		req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
		req->r_num_caps = max_entries;
		req->r_num_caps = max_entries + 1;
		err = ceph_mdsc_do_request(mdsc, NULL, req);
		if (err < 0) {
			ceph_mdsc_put_request(req);
@@ -489,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
		struct inode *inode = ceph_get_snapdir(parent);
		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
		BUG_ON(!d_unhashed(dentry));
		d_add(dentry, inode);
		err = 0;
	}
+9 −1
Original line number Diff line number Diff line
@@ -886,6 +886,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
	struct inode *in = NULL;
	struct ceph_mds_reply_inode *ininfo;
	struct ceph_vino vino;
	struct ceph_client *client = ceph_sb_to_client(sb);
	int i = 0;
	int err = 0;

@@ -949,7 +950,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
			return err;
	}

	if (rinfo->head->is_dentry && !req->r_aborted) {
	/*
	 * ignore null lease/binding on snapdir ENOENT, or else we
	 * will have trouble splicing in the virtual snapdir later
	 */
	if (rinfo->head->is_dentry && !req->r_aborted &&
	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
					       client->mount_args->snapdir_name,
					       req->r_dentry->d_name.len))) {
		/*
		 * lookup link rename   : null -> possibly existing inode
		 * mknod symlink mkdir  : null -> new inode
+9 −0
Original line number Diff line number Diff line
@@ -30,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;

#ifdef CONFIG_LOCKDEP
static struct lock_class_key socket_class;
#endif


static void queue_con(struct ceph_connection *con);
static void con_work(struct work_struct *);
@@ -228,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
	con->sock = sock;
	sock->sk->sk_allocation = GFP_NOFS;

#ifdef CONFIG_LOCKDEP
	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
#endif

	set_sock_callbacks(sock, con);

	dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
@@ -333,6 +341,7 @@ static void reset_connection(struct ceph_connection *con)
		con->out_msg = NULL;
	}
	con->in_seq = 0;
	con->in_seq_acked = 0;
}

/*
Loading