Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit aa958874 authored by Mark Fasheh's avatar Mark Fasheh
Browse files

ocfs2: implement directory read-ahead



Uptodate.c now knows about read-ahead buffers. Use some more aggressive
logic in ocfs2_readdir().

The two functions which currently use directory read-ahead are
ocfs2_find_entry() and ocfs2_readdir().

Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent e0b4096d
Loading
Loading
Loading
Loading
+72 −23
Original line number Original line Diff line number Diff line
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
		   (unsigned long long)block, nr, flags, inode);
		   (unsigned long long)block, nr, flags, inode);


	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
	       (!inode || !(flags & OCFS2_BH_CACHED)));

	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
		status = -EINVAL;
		status = -EINVAL;
		mlog_errno(status);
		mlog_errno(status);
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
		bh = bhs[i];
		bh = bhs[i];
		ignore_cache = 0;
		ignore_cache = 0;


		/* There are three read-ahead cases here which we need to
		 * be concerned with. All three assume a buffer has
		 * previously been submitted with OCFS2_BH_READAHEAD
		 * and it hasn't yet completed I/O.
		 *
		 * 1) The current request is sync to disk. This rarely
		 *    happens these days, and never when performance
		 *    matters - the code can just wait on the buffer
		 *    lock and re-submit.
		 *
		 * 2) The current request is cached, but not
		 *    readahead. ocfs2_buffer_uptodate() will return
		 *    false anyway, so we'll wind up waiting on the
		 *    buffer lock to do I/O. We re-check the request
		 *    with after getting the lock to avoid a re-submit.
		 *
		 * 3) The current request is readahead (and so must
		 *    also be a caching one). We short circuit if the
		 *    buffer is locked (under I/O) and if it's in the
		 *    uptodate cache. The re-check from #2 catches the
		 *    case that the previous read-ahead completes just
		 *    before our is-it-in-flight check.
		 */

		if (flags & OCFS2_BH_CACHED &&
		if (flags & OCFS2_BH_CACHED &&
		    !ocfs2_buffer_uptodate(inode, bh)) {
		    !ocfs2_buffer_uptodate(inode, bh)) {
			mlog(ML_UPTODATE,
			mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
				continue;
				continue;
			}
			}


			/* A read-ahead request was made - if the
			 * buffer is already under read-ahead from a
			 * previously submitted request than we are
			 * done here. */
			if ((flags & OCFS2_BH_READAHEAD)
			    && ocfs2_buffer_read_ahead(inode, bh))
				continue;

			lock_buffer(bh);
			lock_buffer(bh);
			if (buffer_jbd(bh)) {
			if (buffer_jbd(bh)) {
#ifdef CATCH_BH_JBD_RACES
#ifdef CATCH_BH_JBD_RACES
@@ -181,12 +216,21 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
				continue;
				continue;
#endif
#endif
			}
			}

			/* Re-check ocfs2_buffer_uptodate() as a
			 * previously read-ahead buffer may have
			 * completed I/O while we were waiting for the
			 * buffer lock. */
			if ((flags & OCFS2_BH_CACHED)
			    && !(flags & OCFS2_BH_READAHEAD)
			    && ocfs2_buffer_uptodate(inode, bh)) {
				unlock_buffer(bh);
				continue;
			}

			clear_buffer_uptodate(bh);
			clear_buffer_uptodate(bh);
			get_bh(bh); /* for end_buffer_read_sync() */
			get_bh(bh); /* for end_buffer_read_sync() */
			bh->b_end_io = end_buffer_read_sync;
			bh->b_end_io = end_buffer_read_sync;
			if (flags & OCFS2_BH_READAHEAD)
				submit_bh(READA, bh);
			else
			submit_bh(READ, bh);
			submit_bh(READ, bh);
			continue;
			continue;
		}
		}
@@ -197,6 +241,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
	for (i = (nr - 1); i >= 0; i--) {
	for (i = (nr - 1); i >= 0; i--) {
		bh = bhs[i];
		bh = bhs[i];


		if (!(flags & OCFS2_BH_READAHEAD)) {
			/* We know this can't have changed as we hold the
			/* We know this can't have changed as we hold the
			 * inode sem. Avoid doing any work on the bh if the
			 * inode sem. Avoid doing any work on the bh if the
			 * journal has it. */
			 * journal has it. */
@@ -215,16 +260,20 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
				bhs[i] = NULL;
				bhs[i] = NULL;
				continue;
				continue;
			}
			}
		}


		/* Always set the buffer in the cache, even if it was
		 * a forced read, or read-ahead which hasn't yet
		 * completed. */
		if (inode)
		if (inode)
			ocfs2_set_buffer_uptodate(inode, bh);
			ocfs2_set_buffer_uptodate(inode, bh);
	}
	}
	if (inode)
	if (inode)
		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);


	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
	     (unsigned long long)block, nr,
	     (unsigned long long)block, nr,
	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);


bail:
bail:


+1 −1
Original line number Original line Diff line number Diff line
@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb,




#define OCFS2_BH_CACHED            1
#define OCFS2_BH_CACHED            1
#define OCFS2_BH_READAHEAD         8	/* use this to pass READA down to submit_bh */
#define OCFS2_BH_READAHEAD         8


static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
				   struct buffer_head **bh, int flags,
				   struct buffer_head **bh, int flags,
+16 −12
Original line number Original line Diff line number Diff line
@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
{
	int error = 0;
	int error = 0;
	unsigned long offset, blk;
	unsigned long offset, blk, last_ra_blk = 0;
	int i, num, stored;
	int i, stored;
	struct buffer_head * bh, * tmp;
	struct buffer_head * bh, * tmp;
	struct ocfs2_dir_entry * de;
	struct ocfs2_dir_entry * de;
	int err;
	int err;
	struct inode *inode = filp->f_dentry->d_inode;
	struct inode *inode = filp->f_dentry->d_inode;
	struct super_block * sb = inode->i_sb;
	struct super_block * sb = inode->i_sb;
	int have_disk_lock = 0;
	unsigned int ra_sectors = 16;


	mlog_entry("dirino=%llu\n",
	mlog_entry("dirino=%llu\n",
		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
			mlog_errno(error);
			mlog_errno(error);
		/* we haven't got any yet, so propagate the error. */
		/* we haven't got any yet, so propagate the error. */
		stored = error;
		stored = error;
		goto bail;
		goto bail_nolock;
	}
	}
	have_disk_lock = 1;


	offset = filp->f_pos & (sb->s_blocksize - 1);
	offset = filp->f_pos & (sb->s_blocksize - 1);


@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
			continue;
			continue;
		}
		}


		/*
		/* The idea here is to begin with 8k read-ahead and to stay
		 * Do the readahead (8k)
		 * 4k ahead of our current position.
		 */
		 *
		if (!offset) {
		 * TODO: Use the pagecache for this. We just need to
			for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
		 * make sure it's cluster-safe... */
		if (!last_ra_blk
		    || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
			for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
			     i > 0; i--) {
			     i > 0; i--) {
				tmp = ocfs2_bread(inode, ++blk, &err, 1);
				tmp = ocfs2_bread(inode, ++blk, &err, 1);
				if (tmp)
				if (tmp)
					brelse(tmp);
					brelse(tmp);
			}
			}
			last_ra_blk = blk;
			ra_sectors = 8;
		}
		}


revalidate:
revalidate:
@@ -194,9 +198,9 @@ revalidate:


	stored = 0;
	stored = 0;
bail:
bail:
	if (have_disk_lock)
	ocfs2_meta_unlock(inode, 0);
	ocfs2_meta_unlock(inode, 0);


bail_nolock:
	mlog_exit(stored);
	mlog_exit(stored);


	return stored;
	return stored;
+0 −4
Original line number Original line Diff line number Diff line
@@ -1050,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
	u64 p_blkno;
	u64 p_blkno;
	int readflags = OCFS2_BH_CACHED;
	int readflags = OCFS2_BH_CACHED;


#if 0
	/* only turn this on if we know we can deal with read_block
	 * returning nothing */
	if (reada)
	if (reada)
		readflags |= OCFS2_BH_READAHEAD;
		readflags |= OCFS2_BH_READAHEAD;
#endif


	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
	    i_size_read(inode)) {
	    i_size_read(inode)) {
+5 −5
Original line number Original line Diff line number Diff line
@@ -56,6 +56,7 @@
#include "journal.h"
#include "journal.h"
#include "namei.h"
#include "namei.h"
#include "suballoc.h"
#include "suballoc.h"
#include "super.h"
#include "symlink.h"
#include "symlink.h"
#include "sysfile.h"
#include "sysfile.h"
#include "uptodate.h"
#include "uptodate.h"
@@ -1962,13 +1963,8 @@ restart:
				}
				}
				num++;
				num++;


				/* XXX: questionable readahead stuff here */
				bh = ocfs2_bread(dir, b++, &err, 1);
				bh = ocfs2_bread(dir, b++, &err, 1);
				bh_use[ra_max] = bh;
				bh_use[ra_max] = bh;
#if 0		// ???
				if (bh)
					ll_rw_block(READ, 1, &bh);
#endif
			}
			}
		}
		}
		if ((bh = bh_use[ra_ptr++]) == NULL)
		if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1976,6 +1972,10 @@ restart:
		wait_on_buffer(bh);
		wait_on_buffer(bh);
		if (!buffer_uptodate(bh)) {
		if (!buffer_uptodate(bh)) {
			/* read error, skip block & hope for the best */
			/* read error, skip block & hope for the best */
			ocfs2_error(dir->i_sb, "reading directory %llu, "
				    "offset %lu\n",
				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
				    block);
			brelse(bh);
			brelse(bh);
			goto next;
			goto next;
		}
		}
Loading