Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 60c7b4df authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs

Pull xfs update from Ben Myers:
 "Several enhancements and cleanups:

   - make inode32 and inode64 remountable options
   - SEEK_HOLE/SEEK_DATA enhancements
   - cleanup struct declarations in xfs_mount.h"

* tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs:
  xfs: Make inode32 a remountable option
  xfs: add inode64->inode32 transition into xfs_set_inode32()
  xfs: Fix mp->m_maxagi update during inode64 remount
  xfs: reduce code duplication handling inode32/64 options
  xfs: make inode64 as the default allocation mode
  xfs: Fix m_agirotor reset during AG selection
  Make inode64 a remountable option
  xfs: stop the sync worker before xfs_unmountfs
  xfs: xfs_seek_hole() refinement with hole searching from page cache for unwritten extents
  xfs: xfs_seek_data() refinement with unwritten extents check up from page cache
  xfs: Introduce a helper routine to probe data or hole offset from page cache
  xfs: Remove type argument from xfs_seek_data()/xfs_seek_hole()
  xfs: fix race while discarding buffers [V4]
  xfs: check for possible overflow in xfs_ioc_trim
  xfs: unlock the AGI buffer when looping in xfs_dialloc
  xfs: kill struct declarations in xfs_mount.h
  xfs: fix uninitialised variable in xfs_rtbuf_get()
parents aab174f0 2ea03929
Loading
Loading
Loading
Loading
+344 −35
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@

#include <linux/dcache.h>
#include <linux/falloc.h>
#include <linux/pagevec.h>

static const struct vm_operations_struct xfs_file_vm_ops;

@@ -959,17 +960,232 @@ xfs_vm_page_mkwrite(
	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
}

/*
 * This type is designed to indicate the type of offset we would like
 * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
 */
enum {
	HOLE_OFF = 0,
	DATA_OFF,
};

/*
 * Lookup the desired type of offset from the given page.
 *
 * On success, return true and the offset argument will point to the
 * start of the region that was found.  Otherwise this function will
 * return false and keep the offset argument unchanged.
 */
STATIC bool
xfs_lookup_buffer_offset(
	struct page		*page,
	loff_t			*offset,
	unsigned int		type)
{
	loff_t			lastoff = page_offset(page);
	bool			found = false;
	struct buffer_head	*bh, *head;

	bh = head = page_buffers(page);
	do {
		/*
		 * Unwritten extents that have data in the page
		 * cache covering them can be identified by the
		 * BH_Unwritten state flag.  Pages with multiple
		 * buffers might have a mix of holes, data and
		 * unwritten extents - any buffer with valid
		 * data in it should have BH_Uptodate flag set
		 * on it.
		 */
		if (buffer_unwritten(bh) ||
		    buffer_uptodate(bh)) {
			if (type == DATA_OFF)
				found = true;
		} else {
			if (type == HOLE_OFF)
				found = true;
		}

		if (found) {
			*offset = lastoff;
			break;
		}
		lastoff += bh->b_size;
	} while ((bh = bh->b_this_page) != head);

	return found;
}

/*
 * This routine is called to find out and return a data or hole offset
 * from the page cache for unwritten extents according to the desired
 * type for xfs_seek_data() or xfs_seek_hole().
 *
 * The argument offset is used to tell where we start to search from the
 * page cache.  Map is used to figure out the end points of the range to
 * lookup pages.
 *
 * Return true if the desired type of offset was found, and the argument
 * offset is filled with that address.  Otherwise, return false and keep
 * offset unchanged.
 */
STATIC bool
xfs_find_get_desired_pgoff(
	struct inode		*inode,
	struct xfs_bmbt_irec	*map,
	unsigned int		type,
	loff_t			*offset)
{
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	struct pagevec		pvec;
	pgoff_t			index;
	pgoff_t			end;
	loff_t			endoff;
	loff_t			startoff = *offset;
	loff_t			lastoff = startoff;
	bool			found = false;

	pagevec_init(&pvec, 0);

	index = startoff >> PAGE_CACHE_SHIFT;
	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
	end = endoff >> PAGE_CACHE_SHIFT;
	do {
		int		want;
		unsigned	nr_pages;
		unsigned int	i;

		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
					  want);
		/*
		 * No page mapped into given range.  If we are searching holes
		 * and if this is the first time we got into the loop, it means
		 * that the given offset is landed in a hole, return it.
		 *
		 * If we have already stepped through some block buffers to find
		 * holes but they all contains data.  In this case, the last
		 * offset is already updated and pointed to the end of the last
		 * mapped page, if it does not reach the endpoint to search,
		 * that means there should be a hole between them.
		 */
		if (nr_pages == 0) {
			/* Data search found nothing */
			if (type == DATA_OFF)
				break;

			ASSERT(type == HOLE_OFF);
			if (lastoff == startoff || lastoff < endoff) {
				found = true;
				*offset = lastoff;
			}
			break;
		}

		/*
		 * At lease we found one page.  If this is the first time we
		 * step into the loop, and if the first page index offset is
		 * greater than the given search offset, a hole was found.
		 */
		if (type == HOLE_OFF && lastoff == startoff &&
		    lastoff < page_offset(pvec.pages[0])) {
			found = true;
			break;
		}

		for (i = 0; i < nr_pages; i++) {
			struct page	*page = pvec.pages[i];
			loff_t		b_offset;

			/*
			 * At this point, the page may be truncated or
			 * invalidated (changing page->mapping to NULL),
			 * or even swizzled back from swapper_space to tmpfs
			 * file mapping. However, page->index will not change
			 * because we have a reference on the page.
			 *
			 * Searching done if the page index is out of range.
			 * If the current offset is not reaches the end of
			 * the specified search range, there should be a hole
			 * between them.
			 */
			if (page->index > end) {
				if (type == HOLE_OFF && lastoff < endoff) {
					*offset = lastoff;
					found = true;
				}
				goto out;
			}

			lock_page(page);
			/*
			 * Page truncated or invalidated(page->mapping == NULL).
			 * We can freely skip it and proceed to check the next
			 * page.
			 */
			if (unlikely(page->mapping != inode->i_mapping)) {
				unlock_page(page);
				continue;
			}

			if (!page_has_buffers(page)) {
				unlock_page(page);
				continue;
			}

			found = xfs_lookup_buffer_offset(page, &b_offset, type);
			if (found) {
				/*
				 * The found offset may be less than the start
				 * point to search if this is the first time to
				 * come here.
				 */
				*offset = max_t(loff_t, startoff, b_offset);
				unlock_page(page);
				goto out;
			}

			/*
			 * We either searching data but nothing was found, or
			 * searching hole but found a data buffer.  In either
			 * case, probably the next page contains the desired
			 * things, update the last offset to it so.
			 */
			lastoff = page_offset(page) + PAGE_SIZE;
			unlock_page(page);
		}

		/*
		 * The number of returned pages less than our desired, search
		 * done.  In this case, nothing was found for searching data,
		 * but we found a hole behind the last offset.
		 */
		if (nr_pages < want) {
			if (type == HOLE_OFF) {
				*offset = lastoff;
				found = true;
			}
			break;
		}

		index = pvec.pages[i - 1]->index + 1;
		pagevec_release(&pvec);
	} while (index <= end);

out:
	pagevec_release(&pvec);
	return found;
}

STATIC loff_t
xfs_seek_data(
	struct file		*file,
	loff_t			start,
	u32			type)
	loff_t			start)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_bmbt_irec	map[2];
	int			nmap = 2;
	loff_t			uninitialized_var(offset);
	xfs_fsize_t		isize;
	xfs_fileoff_t		fsbno;
@@ -985,36 +1201,74 @@ xfs_seek_data(
		goto out_unlock;
	}

	fsbno = XFS_B_TO_FSBT(mp, start);

	/*
	 * Try to read extents from the first block indicated
	 * by fsbno to the end block of the file.
	 */
	fsbno = XFS_B_TO_FSBT(mp, start);
	end = XFS_B_TO_FSB(mp, isize);
	for (;;) {
		struct xfs_bmbt_irec	map[2];
		int			nmap = 2;
		unsigned int		i;

		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
				       XFS_BMAPI_ENTIRE);
		if (error)
			goto out_unlock;

		/* No extents at given offset, must be beyond EOF */
		if (nmap == 0) {
			error = ENXIO;
			goto out_unlock;
		}

		for (i = 0; i < nmap; i++) {
			offset = max_t(loff_t, start,
				       XFS_FSB_TO_B(mp, map[i].br_startoff));

			/* Landed in a data extent */
			if (map[i].br_startblock == DELAYSTARTBLOCK ||
			    (map[i].br_state == XFS_EXT_NORM &&
			     !isnullstartblock(map[i].br_startblock)))
				goto out;

			/*
	 * Treat unwritten extent as data extent since it might
	 * contains dirty data in page cache.
			 * Landed in an unwritten extent, try to search data
			 * from page cache.
			 */
			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
				if (xfs_find_get_desired_pgoff(inode, &map[i],
							DATA_OFF, &offset))
					goto out;
			}
		}

		/*
		 * map[0] is hole or its an unwritten extent but
		 * without data in page cache.  Probably means that
		 * we are reading after EOF if nothing in map[1].
		 */
	if (map[0].br_startblock != HOLESTARTBLOCK) {
		offset = max_t(loff_t, start,
			       XFS_FSB_TO_B(mp, map[0].br_startoff));
	} else {
		if (nmap == 1) {
			error = ENXIO;
			goto out_unlock;
		}

		offset = max_t(loff_t, start,
			       XFS_FSB_TO_B(mp, map[1].br_startoff));
		ASSERT(i > 1);

		/*
		 * Nothing was found, proceed to the next round of search
		 * if reading offset not beyond or hit EOF.
		 */
		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
		start = XFS_FSB_TO_B(mp, fsbno);
		if (start >= isize) {
			error = ENXIO;
			goto out_unlock;
		}
	}

out:
	if (offset != file->f_pos)
		file->f_pos = offset;

@@ -1029,16 +1283,15 @@ out_unlock:
STATIC loff_t
xfs_seek_hole(
	struct file		*file,
	loff_t			start,
	u32			type)
	loff_t			start)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	loff_t			uninitialized_var(offset);
	loff_t			holeoff;
	xfs_fsize_t		isize;
	xfs_fileoff_t		fsbno;
	xfs_filblks_t		end;
	uint			lock;
	int			error;

@@ -1054,21 +1307,77 @@ xfs_seek_hole(
	}

	fsbno = XFS_B_TO_FSBT(mp, start);
	error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
	end = XFS_B_TO_FSB(mp, isize);

	for (;;) {
		struct xfs_bmbt_irec	map[2];
		int			nmap = 2;
		unsigned int		i;

		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
				       XFS_BMAPI_ENTIRE);
		if (error)
			goto out_unlock;

	holeoff = XFS_FSB_TO_B(mp, fsbno);
	if (holeoff <= start)
		offset = start;
	else {
		/* No extents at given offset, must be beyond EOF */
		if (nmap == 0) {
			error = ENXIO;
			goto out_unlock;
		}

		for (i = 0; i < nmap; i++) {
			offset = max_t(loff_t, start,
				       XFS_FSB_TO_B(mp, map[i].br_startoff));

			/* Landed in a hole */
			if (map[i].br_startblock == HOLESTARTBLOCK)
				goto out;

			/*
		 * xfs_bmap_first_unused() could return a value bigger than
		 * isize if there are no more holes past the supplied offset.
			 * Landed in an unwritten extent, try to search hole
			 * from page cache.
			 */
		offset = min_t(loff_t, holeoff, isize);
			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
				if (xfs_find_get_desired_pgoff(inode, &map[i],
							HOLE_OFF, &offset))
					goto out;
			}
		}

		/*
		 * map[0] contains data or its unwritten but contains
		 * data in page cache, probably means that we are
		 * reading after EOF.  We should fix offset to point
		 * to the end of the file(i.e., there is an implicit
		 * hole at the end of any file).
		 */
		if (nmap == 1) {
			offset = isize;
			break;
		}

		ASSERT(i > 1);

		/*
		 * Both mappings contains data, proceed to the next round of
		 * search if the current reading offset not beyond or hit EOF.
		 */
		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
		start = XFS_FSB_TO_B(mp, fsbno);
		if (start >= isize) {
			offset = isize;
			break;
		}
	}

out:
	/*
	 * At this point, we must have found a hole.  However, the returned
	 * offset may be bigger than the file size as it may be aligned to
	 * page boundary for unwritten extents, we need to deal with this
	 * situation in particular.
	 */
	offset = min_t(loff_t, offset, isize);
	if (offset != file->f_pos)
		file->f_pos = offset;

@@ -1092,9 +1401,9 @@ xfs_file_llseek(
	case SEEK_SET:
		return generic_file_llseek(file, offset, origin);
	case SEEK_DATA:
		return xfs_seek_data(file, offset, origin);
		return xfs_seek_data(file, offset);
	case SEEK_HOLE:
		return xfs_seek_hole(file, offset, origin);
		return xfs_seek_hole(file, offset);
	default:
		return -EINVAL;
	}
+1 −1
Original line number Diff line number Diff line
@@ -431,7 +431,7 @@ xfs_ialloc_next_ag(

	spin_lock(&mp->m_agirotor_lock);
	agno = mp->m_agirotor;
	if (++mp->m_agirotor == mp->m_maxagi)
	if (++mp->m_agirotor >= mp->m_maxagi)
		mp->m_agirotor = 0;
	spin_unlock(&mp->m_agirotor_lock);

+5 −38
Original line number Diff line number Diff line
@@ -440,7 +440,7 @@ xfs_initialize_perag(
	xfs_agnumber_t	agcount,
	xfs_agnumber_t	*maxagi)
{
	xfs_agnumber_t	index, max_metadata;
	xfs_agnumber_t	index;
	xfs_agnumber_t	first_initialised = 0;
	xfs_perag_t	*pag;
	xfs_agino_t	agino;
@@ -500,43 +500,10 @@ xfs_initialize_perag(
	else
		mp->m_flags &= ~XFS_MOUNT_32BITINODES;

	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
		/*
		 * Calculate how much should be reserved for inodes to meet
		 * the max inode percentage.
		 */
		if (mp->m_maxicount) {
			__uint64_t	icount;

			icount = sbp->sb_dblocks * sbp->sb_imax_pct;
			do_div(icount, 100);
			icount += sbp->sb_agblocks - 1;
			do_div(icount, sbp->sb_agblocks);
			max_metadata = icount;
		} else {
			max_metadata = agcount;
		}

		for (index = 0; index < agcount; index++) {
			ino = XFS_AGINO_TO_INO(mp, index, agino);
			if (ino > XFS_MAXINUMBER_32) {
				index++;
				break;
			}

			pag = xfs_perag_get(mp, index);
			pag->pagi_inodeok = 1;
			if (index < max_metadata)
				pag->pagf_metadata = 1;
			xfs_perag_put(pag);
		}
	} else {
		for (index = 0; index < agcount; index++) {
			pag = xfs_perag_get(mp, index);
			pag->pagi_inodeok = 1;
			xfs_perag_put(pag);
		}
	}
	if (mp->m_flags & XFS_MOUNT_32BITINODES)
		index = xfs_set_inode32(mp);
	else
		index = xfs_set_inode64(mp);

	if (maxagi)
		*maxagi = index;
+0 −5
Original line number Diff line number Diff line
@@ -54,12 +54,7 @@ typedef struct xfs_trans_reservations {
#include "xfs_sync.h"

struct xlog;
struct xfs_mount_args;
struct xfs_inode;
struct xfs_bmbt_irec;
struct xfs_bmap_free;
struct xfs_extdelta;
struct xfs_swapext;
struct xfs_mru_cache;
struct xfs_nameops;
struct xfs_ail;
+94 −1
Original line number Diff line number Diff line
@@ -88,6 +88,8 @@ mempool_t *xfs_ioend_pool;
					 * unwritten extent conversion */
#define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
#define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
#define MNTOPT_32BITINODE   "inode32"	/* inode allocation limited to
					 * XFS_MAXINUMBER_32 */
#define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
#define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
#define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
@@ -120,12 +122,18 @@ mempool_t *xfs_ioend_pool;
 * in the future, too.
 */
enum {
	Opt_barrier, Opt_nobarrier, Opt_err
	Opt_barrier,
	Opt_nobarrier,
	Opt_inode64,
	Opt_inode32,
	Opt_err
};

static const match_table_t tokens = {
	{Opt_barrier, "barrier"},
	{Opt_nobarrier, "nobarrier"},
	{Opt_inode64, "inode64"},
	{Opt_inode32, "inode32"},
	{Opt_err, NULL}
};

@@ -197,7 +205,9 @@ xfs_parseargs(
	 */
	mp->m_flags |= XFS_MOUNT_BARRIER;
	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
#if !XFS_BIG_INUMS
	mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
#endif

	/*
	 * These can be overridden by the mount option parsing.
@@ -294,6 +304,8 @@ xfs_parseargs(
				return EINVAL;
			}
			dswidth = simple_strtoul(value, &eov, 10);
		} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
#if !XFS_BIG_INUMS
@@ -492,6 +504,7 @@ xfs_showargs(
		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_32BITINODE },
		{ 0, NULL }
	};
	static struct proc_xfs_info xfs_info_unset[] = {
@@ -591,6 +604,80 @@ xfs_max_file_offset(
	return (((__uint64_t)pagefactor) << bitshift) - 1;
}

xfs_agnumber_t
xfs_set_inode32(struct xfs_mount *mp)
{
	xfs_agnumber_t	index = 0;
	xfs_agnumber_t	maxagi = 0;
	xfs_sb_t	*sbp = &mp->m_sb;
	xfs_agnumber_t	max_metadata;
	xfs_agino_t	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
	xfs_ino_t	ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
	xfs_perag_t	*pag;

	/* Calculate how much should be reserved for inodes to meet
	 * the max inode percentage.
	 */
	if (mp->m_maxicount) {
		__uint64_t	icount;

		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
		do_div(icount, 100);
		icount += sbp->sb_agblocks - 1;
		do_div(icount, sbp->sb_agblocks);
		max_metadata = icount;
	} else {
		max_metadata = sbp->sb_agcount;
	}

	for (index = 0; index < sbp->sb_agcount; index++) {
		ino = XFS_AGINO_TO_INO(mp, index, agino);

		if (ino > XFS_MAXINUMBER_32) {
			pag = xfs_perag_get(mp, index);
			pag->pagi_inodeok = 0;
			pag->pagf_metadata = 0;
			xfs_perag_put(pag);
			continue;
		}

		pag = xfs_perag_get(mp, index);
		pag->pagi_inodeok = 1;
		maxagi++;
		if (index < max_metadata)
			pag->pagf_metadata = 1;
		xfs_perag_put(pag);
	}
	mp->m_flags |= (XFS_MOUNT_32BITINODES |
			XFS_MOUNT_SMALL_INUMS);

	return maxagi;
}

xfs_agnumber_t
xfs_set_inode64(struct xfs_mount *mp)
{
	xfs_agnumber_t index = 0;

	for (index = 0; index < mp->m_sb.sb_agcount; index++) {
		struct xfs_perag	*pag;

		pag = xfs_perag_get(mp, index);
		pag->pagi_inodeok = 1;
		pag->pagf_metadata = 0;
		xfs_perag_put(pag);
	}

	/* There is no need for lock protection on m_flags,
	 * the rw_semaphore of the VFS superblock is locked
	 * during mount/umount/remount operations, so this is
	 * enough to avoid concurency on the m_flags field
	 */
	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
			 XFS_MOUNT_SMALL_INUMS);
	return index;
}

STATIC int
xfs_blkdev_get(
	xfs_mount_t		*mp,
@@ -1056,6 +1143,12 @@ xfs_fs_remount(
		case Opt_nobarrier:
			mp->m_flags &= ~XFS_MOUNT_BARRIER;
			break;
		case Opt_inode64:
			mp->m_maxagi = xfs_set_inode64(mp);
			break;
		case Opt_inode32:
			mp->m_maxagi = xfs_set_inode32(mp);
			break;
		default:
			/*
			 * Logically we would return an error here to prevent
Loading