Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7d4fb40a authored by Nathan Scott's avatar Nathan Scott
Browse files

[XFS] Start writeout earlier (on last close) in the case where we have a


truncate down followed by delayed allocation (buffered writes) - worst
case scenario for the notorious NULL files problem.  This reduces the
window where we are exposed to that problem significantly.

SGI-PV: 917976
SGI-Modid: xfs-linux-melb:xfs-kern:26100a

Signed-off-by: default avatarNathan Scott <nathans@sgi.com>
parent 59c1b082
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -1157,6 +1157,18 @@ out_unlock:
	return error;
}

STATIC int
xfs_vm_writepages(
	struct address_space	*mapping,
	struct writeback_control *wbc)
{
	struct vnode		*vp = vn_from_inode(mapping->host);

	if (VN_TRUNC(vp))
		VUNTRUNCATE(vp);
	return generic_writepages(mapping, wbc);
}

/*
 * Called to move a page into cleanable state - and from there
 * to be released. Possibly the page is already clean. We always
@@ -1451,6 +1463,7 @@ struct address_space_operations xfs_address_space_operations = {
	.readpage		= xfs_vm_readpage,
	.readpages		= xfs_vm_readpages,
	.writepage		= xfs_vm_writepage,
	.writepages		= xfs_vm_writepages,
	.sync_page		= block_sync_page,
	.releasepage		= xfs_vm_releasepage,
	.invalidatepage		= xfs_vm_invalidatepage,
+15 −0
Original line number Diff line number Diff line
@@ -323,6 +323,17 @@ xfs_file_open(
	return -error;
}

STATIC int
xfs_file_close(
	struct file	*filp)
{
	vnode_t		*vp = vn_from_inode(filp->f_dentry->d_inode);
	int		error;

	VOP_CLOSE(vp, 0, file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL, error);
	return -error;
}

STATIC int
xfs_file_release(
	struct inode	*inode,
@@ -349,6 +360,8 @@ xfs_file_fsync(

	if (datasync)
		flags |= FSYNC_DATA;
	if (VN_TRUNC(vp))
		VUNTRUNCATE(vp);
	VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
	return -error;
}
@@ -578,6 +591,7 @@ const struct file_operations xfs_file_operations = {
#endif
	.mmap		= xfs_file_mmap,
	.open		= xfs_file_open,
	.flush		= xfs_file_close,
	.release	= xfs_file_release,
	.fsync		= xfs_file_fsync,
#ifdef HAVE_FOP_OPEN_EXEC
@@ -602,6 +616,7 @@ const struct file_operations xfs_invis_file_operations = {
#endif
	.mmap		= xfs_file_mmap,
	.open		= xfs_file_open,
	.flush		= xfs_file_close,
	.release	= xfs_file_release,
	.fsync		= xfs_file_fsync,
};
+9 −44
Original line number Diff line number Diff line
/*
 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
 * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
 * All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or
@@ -15,40 +15,12 @@
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#include "xfs.h"

/*
 * Stub for no-op vnode operations that return error status.
 */
int
fs_noerr(void)
{
	return 0;
}
int  fs_noerr(void) { return 0; }
int  fs_nosys(void) { return ENOSYS; }
void fs_noval(void) { return; }

/*
 * Operation unsupported under this file system.
 */
int
fs_nosys(void)
{
	return ENOSYS;
}

/*
 * Stub for inactive, strategy, and read/write lock/unlock.  Does nothing.
 */
/* ARGSUSED */
void
fs_noval(void)
{
}

/*
 * vnode pcache layer for vnode_tosspages.
 * 'last' parameter unused but left in for IRIX compatibility
 */
void
fs_tosspages(
	bhv_desc_t	*bdp,
@@ -63,11 +35,6 @@ fs_tosspages(
		truncate_inode_pages(ip->i_mapping, first);
}


/*
 * vnode pcache layer for vnode_flushinval_pages.
 * 'last' parameter unused but left in for IRIX compatibility
 */
void
fs_flushinval_pages(
	bhv_desc_t	*bdp,
@@ -79,16 +46,13 @@ fs_flushinval_pages(
	struct inode	*ip = vn_to_inode(vp);

	if (VN_CACHED(vp)) {
		if (VN_TRUNC(vp))
			VUNTRUNCATE(vp);
		filemap_write_and_wait(ip->i_mapping);

		truncate_inode_pages(ip->i_mapping, first);
	}
}

/*
 * vnode pcache layer for vnode_flush_pages.
 * 'last' parameter unused but left in for IRIX compatibility
 */
int
fs_flush_pages(
	bhv_desc_t	*bdp,
@@ -100,12 +64,13 @@ fs_flush_pages(
	vnode_t		*vp = BHV_TO_VNODE(bdp);
	struct inode	*ip = vn_to_inode(vp);

	if (VN_CACHED(vp)) {
	if (VN_DIRTY(vp)) {
		if (VN_TRUNC(vp))
			VUNTRUNCATE(vp);
		filemap_fdatawrite(ip->i_mapping);
		if (flags & XFS_B_ASYNC)
			return 0;
		filemap_fdatawait(ip->i_mapping);
	}

	return 0;
}
+25 −14
Original line number Diff line number Diff line
@@ -56,12 +56,18 @@ typedef xfs_ino_t vnumber_t;
typedef struct dentry vname_t;
typedef bhv_head_t vn_bhv_head_t;

typedef enum vflags {
	VMODIFIED	= 0x08,	/* XFS inode state possibly differs */
				/* to the Linux inode state. */
	VTRUNCATED	= 0x40,	/* truncated down so flush-on-close */
} vflags_t;

/*
 * MP locking protocols:
 *	v_flag, v_vfsp				VN_LOCK/VN_UNLOCK
 */
typedef struct vnode {
	__u32		v_flag;			/* vnode flags (see below) */
	vflags_t	v_flag;			/* vnode flags (see above) */
	struct vfs	*v_vfsp;		/* ptr to containing VFS */
	vnumber_t	v_number;		/* in-core vnode number */
	vn_bhv_head_t	v_bh;			/* behavior head */
@@ -125,12 +131,6 @@ static inline struct inode *vn_to_inode(struct vnode *vnode)
	return &vnode->v_inode;
}

/*
 * Vnode flags.
 */
#define VMODIFIED	       0x8	/* XFS inode state possibly differs */
					/* to the Linux inode state.	*/

/*
 * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
 */
@@ -162,8 +162,10 @@ typedef enum vchange {
	VCHANGE_FLAGS_IOEXCL_COUNT	= 4
} vchange_t;

typedef enum { L_FALSE, L_TRUE } lastclose_t;

typedef int	(*vop_open_t)(bhv_desc_t *, struct cred *);
typedef int	(*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
				const struct iovec *, unsigned int,
				loff_t *, int, struct cred *);
@@ -234,6 +236,7 @@ typedef int (*vop_iflush_t)(bhv_desc_t *, int);
typedef struct vnodeops {
	bhv_position_t  vn_position;    /* position within behavior chain */
	vop_open_t		vop_open;
	vop_close_t		vop_close;
	vop_read_t		vop_read;
	vop_write_t		vop_write;
	vop_sendfile_t		vop_sendfile;
@@ -278,6 +281,10 @@ typedef struct vnodeops {
 */
#define _VOP_(op, vp)	(*((vnodeops_t *)(vp)->v_fops)->op)

#define VOP_OPEN(vp, cr, rv)						\
	rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
#define VOP_CLOSE(vp, f, last, cr, rv)					\
	rv = _VOP_(vop_close, vp)((vp)->v_fbhv, f, last, cr)
#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv)			\
	rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv)		\
@@ -290,8 +297,6 @@ typedef struct vnodeops {
	rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
#define VOP_BMAP(vp,of,sz,rw,b,n,rv)					\
	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
#define VOP_OPEN(vp, cr, rv)						\
	rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
#define VOP_GETATTR(vp, vap, f, cr, rv)					\
	rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
#define	VOP_SETATTR(vp, vap, f, cr, rv)					\
@@ -556,8 +561,6 @@ static inline struct vnode *vn_grab(struct vnode *vp)
 */
#define VN_LOCK(vp)		mutex_spinlock(&(vp)->v_lock)
#define VN_UNLOCK(vp, s)	mutex_spinunlock(&(vp)->v_lock, s)
#define VN_FLAGSET(vp,b)	vn_flagset(vp,b)
#define VN_FLAGCLR(vp,b)	vn_flagclr(vp,b)

static __inline__ void vn_flagset(struct vnode *vp, uint flag)
{
@@ -566,13 +569,22 @@ static __inline__ void vn_flagset(struct vnode *vp, uint flag)
	spin_unlock(&vp->v_lock);
}

static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
static __inline__ uint vn_flagclr(struct vnode *vp, uint flag)
{
	uint	cleared;

	spin_lock(&vp->v_lock);
	cleared = (vp->v_flag & flag);
	vp->v_flag &= ~flag;
	spin_unlock(&vp->v_lock);
	return cleared;
}

#define VMODIFY(vp)	vn_flagset(vp, VMODIFIED)
#define VUNMODIFY(vp)	vn_flagclr(vp, VMODIFIED)
#define VTRUNCATE(vp)	vn_flagset(vp, VTRUNCATED)
#define VUNTRUNCATE(vp)	vn_flagclr(vp, VTRUNCATED)

/*
 * Dealing with bad inodes
 */
@@ -612,8 +624,7 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
#define VN_CACHED(vp)	(vn_to_inode(vp)->i_mapping->nrpages)
#define VN_DIRTY(vp)	mapping_tagged(vn_to_inode(vp)->i_mapping, \
					PAGECACHE_TAG_DIRTY)
#define VMODIFY(vp)	VN_FLAGSET(vp, VMODIFIED)
#define VUNMODIFY(vp)	VN_FLAGCLR(vp, VMODIFIED)
#define VN_TRUNC(vp)	((vp)->v_flag & VTRUNCATED)

/*
 * Flags to VOP_SETATTR/VOP_GETATTR.
+52 −36
Original line number Diff line number Diff line
@@ -16,8 +16,6 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#include <linux/capability.h>

#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
@@ -58,32 +56,14 @@
#include "xfs_log_priv.h"
#include "xfs_mac.h"


/*
 * The maximum pathlen is 1024 bytes. Since the minimum file system
 * blocksize is 512 bytes, we can get a max of 2 extents back from
 * bmapi.
 */
#define SYMLINK_MAPS 2

/*
 * For xfs, we check that the file isn't too big to be opened by this kernel.
 * No other open action is required for regular files.  Devices are handled
 * through the specfs file system, pipes through fifofs.  Device and
 * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
 * when a new vnode is first looked up or created.
 */
STATIC int
xfs_open(
	bhv_desc_t	*bdp,
	cred_t		*credp)
{
	int		mode;
	vnode_t		*vp;
	xfs_inode_t	*ip;

	vp = BHV_TO_VNODE(bdp);
	ip = XFS_BHVTOI(bdp);
	vnode_t		*vp = BHV_TO_VNODE(bdp);
	xfs_inode_t	*ip = XFS_BHVTOI(bdp);

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return XFS_ERROR(EIO);
@@ -101,6 +81,36 @@ xfs_open(
	return 0;
}

STATIC int
xfs_close(
	bhv_desc_t	*bdp,
	int		flags,
	lastclose_t	lastclose,
	cred_t		*credp)
{
	vnode_t		*vp = BHV_TO_VNODE(bdp);
	xfs_inode_t	*ip = XFS_BHVTOI(bdp);
	int		error = 0;

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return XFS_ERROR(EIO);

	if (lastclose != L_TRUE || !VN_ISREG(vp))
		return 0;

	/*
	 * If we previously truncated this file and removed old data in
	 * the process, we want to initiate "early" writeout on the last
	 * close.  This is an attempt to combat the notorious NULL files
	 * problem which is particularly noticable from a truncate down,
	 * buffered (re-)write (delalloc), followed by a crash.  What we
	 * are effectively doing here is significantly reducing the time
	 * window where we'd otherwise be exposed to that problem.
	 */
	if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
		VOP_FLUSH_PAGES(vp, 0, -1, XFS_B_ASYNC, FI_NONE, error);
	return error;
}

/*
 * xfs_getattr
@@ -665,9 +675,17 @@ xfs_setattr(
					    ((ip->i_d.di_nlink != 0 ||
					      !(mp->m_flags & XFS_MOUNT_WSYNC))
					     ? 1 : 0));
			if (code) {
			if (code)
				goto abort_return;
			}
			/*
			 * Truncated "down", so we're removing references
			 * to old data here - if we now delay flushing for
			 * a long time, we expose ourselves unduly to the
			 * notorious NULL files problem.  So, we mark this
			 * vnode and flush it when the file is closed, and
			 * do not wait the usual (long) time for writeout.
			 */
			VTRUNCATE(vp);
		}
		/*
		 * Have to do this even if the file's size doesn't change.
@@ -936,6 +954,13 @@ xfs_access(
}


/*
 * The maximum pathlen is 1024 bytes. Since the minimum file system
 * blocksize is 512 bytes, we can get a max of 2 extents back from
 * bmapi.
 */
#define SYMLINK_MAPS 2

/*
 * xfs_readlink
 *
@@ -1470,9 +1495,6 @@ xfs_inactive_symlink_local(
	return 0;
}

/*
 *
 */
STATIC int
xfs_inactive_attrs(
	xfs_inode_t	*ip,
@@ -1531,10 +1553,10 @@ xfs_release(

	vp = BHV_TO_VNODE(bdp);
	ip = XFS_BHVTOI(bdp);
	mp = ip->i_mount;

	if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) {
	if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
		return 0;
	}

	/* If this is a read-only mount, don't do this (would generate I/O) */
	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
@@ -1546,8 +1568,6 @@ xfs_release(
		return 0;
#endif

	mp = ip->i_mount;

	if (ip->i_d.di_nlink != 0) {
		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
		     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -3745,7 +3765,6 @@ xfs_inode_flush(
	return error;
}


int
xfs_set_dmattrs (
	bhv_desc_t	*bdp,
@@ -3786,10 +3805,6 @@ xfs_set_dmattrs (
	return error;
}


/*
 * xfs_reclaim
 */
STATIC int
xfs_reclaim(
	bhv_desc_t	*bdp)
@@ -4645,6 +4660,7 @@ xfs_change_file_space(
vnodeops_t xfs_vnodeops = {
	BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
	.vop_open		= xfs_open,
	.vop_close		= xfs_close,
	.vop_read		= xfs_read,
#ifdef HAVE_SENDFILE
	.vop_sendfile		= xfs_sendfile,