Loading fs/xfs/xfs_bmap_util.c +15 −16 Original line number Diff line number Diff line Loading @@ -1599,13 +1599,6 @@ xfs_swap_extent_flush( /* Verify O_DIRECT for ftmp */ if (VFS_I(ip)->i_mapping->nrpages) return -EINVAL; /* * Don't try to swap extents on mmap()d files because we can't lock * out races against page faults safely. */ if (mapping_mapped(VFS_I(ip)->i_mapping)) return -EBUSY; return 0; } Loading Loading @@ -1633,13 +1626,14 @@ xfs_swap_extents( } /* * Lock up the inodes against other IO and truncate to begin with. * Then we can ensure the inodes are flushed and have no page cache * safely. Once we have done this we can take the ilocks and do the rest * of the checks. * Lock the inodes against other IO, page faults and truncate to * begin with. Then we can ensure the inodes are flushed and have no * page cache safely. Once we have done this we can take the ilocks and * do the rest of the checks. */ lock_flags = XFS_IOLOCK_EXCL; lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { Loading @@ -1666,8 +1660,16 @@ xfs_swap_extents( xfs_trans_cancel(tp, 0); goto out_unlock; } /* * Lock and join the inodes to the tansaction so that transaction commit * or cancel will unlock the inodes from this point onwards. */ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); lock_flags |= XFS_ILOCK_EXCL; xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ijoin(tp, tip, lock_flags); /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || Loading Loading @@ -1720,9 +1722,6 @@ xfs_swap_extents( goto out_trans_cancel; } xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ijoin(tp, tip, lock_flags); /* * Before we've swapped the forks, lets set the owners of the forks * appropriately. We have to do this as we are demand paging the btree Loading Loading @@ -1856,5 +1855,5 @@ xfs_swap_extents( out_trans_cancel: xfs_trans_cancel(tp, 0); goto out_unlock; goto out; } fs/xfs/xfs_file.c +54 −16 Original line number Diff line number Diff line Loading @@ -847,6 +847,9 @@ xfs_file_fallocate( if (error) goto out_unlock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); iolock |= XFS_MMAPLOCK_EXCL; if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) Loading Loading @@ -996,20 +999,6 @@ xfs_file_mmap( return 0; } /* * mmap()d file has taken write protection fault and is being made * writable. We can set the page state up correctly for a writable * page, which means we can do correct delalloc accounting (ENOSPC * checking!) and unwritten extent mapping. */ STATIC int xfs_vm_page_mkwrite( struct vm_area_struct *vma, struct vm_fault *vmf) { return block_page_mkwrite(vma, vmf, xfs_get_blocks); } /* * This type is designed to indicate the type of offset we would like * to search from page cache for xfs_seek_hole_data(). Loading Loading @@ -1385,6 +1374,55 @@ xfs_file_llseek( } } /* * Locking for serialisation of IO during page faults. This results in a lock * ordering of: * * mmap_sem (MM) * i_mmap_lock (XFS - truncate serialisation) * page_lock (MM) * i_lock (XFS - extent map serialisation) */ STATIC int xfs_filemap_fault( struct vm_area_struct *vma, struct vm_fault *vmf) { struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); int error; trace_xfs_filemap_fault(ip); xfs_ilock(ip, XFS_MMAPLOCK_SHARED); error = filemap_fault(vma, vmf); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); return error; } /* * mmap()d file has taken write protection fault and is being made writable. We * can set the page state up correctly for a writable page, which means we can * do correct delalloc accounting (ENOSPC checking!) and unwritten extent * mapping. */ STATIC int xfs_filemap_page_mkwrite( struct vm_area_struct *vma, struct vm_fault *vmf) { struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); int error; trace_xfs_filemap_page_mkwrite(ip); xfs_ilock(ip, XFS_MMAPLOCK_SHARED); error = block_page_mkwrite(vma, vmf, xfs_get_blocks); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); return error; } const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, .read = new_sync_read, Loading Loading @@ -1417,7 +1455,7 @@ const struct file_operations xfs_dir_file_operations = { }; static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, .fault = xfs_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_vm_page_mkwrite, .page_mkwrite = xfs_filemap_page_mkwrite, }; fs/xfs/xfs_inode.c +97 −31 Original line number Diff line number Diff line Loading @@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared( } /* * The xfs inode contains 2 locks: a multi-reader lock called the * i_iolock and a multi-reader lock called the i_lock. This routine * allows either or both of the locks to be obtained. * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and * the i_lock. This routine allows various combinations of the locks to be * obtained. * * The 2 locks should always be ordered so that the IO lock is * obtained first in order to prevent deadlock. * The 3 locks should always be ordered so that the IO lock is obtained first, * the mmap lock second and the ilock last in order to prevent deadlock. * * ip -- the inode being locked * lock_flags -- this parameter indicates the inode's locks * to be locked. It can be: * XFS_IOLOCK_SHARED, * XFS_IOLOCK_EXCL, * XFS_ILOCK_SHARED, * XFS_ILOCK_EXCL, * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL * Basic locking order: * * i_iolock -> i_mmap_lock -> page_lock -> i_ilock * * mmap_sem locking order: * * i_iolock -> page lock -> mmap_sem * mmap_sem -> i_mmap_lock -> page_lock * * The difference in mmap_sem locking order mean that we cannot hold the * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can * fault in pages during copy in/out (for buffered IO) or require the mmap_sem * in get_user_pages() to map the user pages into the kernel address space for * direct IO. Similarly the i_iolock cannot be taken inside a page fault because * page faults already hold the mmap_sem. * * Hence to serialise fully against both syscall and mmap based IO, we need to * take both the i_iolock and the i_mmap_lock. These locks should *only* be both * taken in places where we need to invalidate the page cache in a race * free manner (e.g. truncate, hole punch and other extent manipulation * functions). */ void xfs_ilock( Loading @@ -150,6 +160,8 @@ xfs_ilock( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); Loading @@ -159,6 +171,11 @@ xfs_ilock( else if (lock_flags & XFS_IOLOCK_SHARED) mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); if (lock_flags & XFS_MMAPLOCK_EXCL) mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); else if (lock_flags & XFS_MMAPLOCK_SHARED) mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); if (lock_flags & XFS_ILOCK_EXCL) mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); else if (lock_flags & XFS_ILOCK_SHARED) Loading Loading @@ -191,6 +208,8 @@ xfs_ilock_nowait( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); Loading @@ -202,15 +221,29 @@ xfs_ilock_nowait( if (!mrtryaccess(&ip->i_iolock)) goto out; } if (lock_flags & XFS_MMAPLOCK_EXCL) { if (!mrtryupdate(&ip->i_mmaplock)) goto out_undo_iolock; } else if (lock_flags & XFS_MMAPLOCK_SHARED) { if (!mrtryaccess(&ip->i_mmaplock)) goto out_undo_iolock; } if (lock_flags & XFS_ILOCK_EXCL) { if (!mrtryupdate(&ip->i_lock)) goto out_undo_iolock; goto out_undo_mmaplock; } else if (lock_flags & XFS_ILOCK_SHARED) { if (!mrtryaccess(&ip->i_lock)) goto out_undo_iolock; goto out_undo_mmaplock; } return 1; out_undo_mmaplock: if (lock_flags & XFS_MMAPLOCK_EXCL) mrunlock_excl(&ip->i_mmaplock); else if (lock_flags & XFS_MMAPLOCK_SHARED) mrunlock_shared(&ip->i_mmaplock); out_undo_iolock: if (lock_flags & XFS_IOLOCK_EXCL) mrunlock_excl(&ip->i_iolock); Loading Loading @@ -244,6 +277,8 @@ xfs_iunlock( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); Loading @@ -254,6 +289,11 @@ xfs_iunlock( else if (lock_flags & XFS_IOLOCK_SHARED) mrunlock_shared(&ip->i_iolock); if (lock_flags & XFS_MMAPLOCK_EXCL) mrunlock_excl(&ip->i_mmaplock); else if (lock_flags & XFS_MMAPLOCK_SHARED) mrunlock_shared(&ip->i_mmaplock); if (lock_flags & XFS_ILOCK_EXCL) mrunlock_excl(&ip->i_lock); else if (lock_flags & XFS_ILOCK_SHARED) Loading @@ -271,11 +311,14 @@ xfs_ilock_demote( xfs_inode_t *ip, uint lock_flags) { ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); if (lock_flags & XFS_ILOCK_EXCL) mrdemote(&ip->i_lock); if (lock_flags & XFS_MMAPLOCK_EXCL) mrdemote(&ip->i_mmaplock); if (lock_flags & XFS_IOLOCK_EXCL) mrdemote(&ip->i_iolock); Loading @@ -294,6 +337,12 @@ xfs_isilocked( return rwsem_is_locked(&ip->i_lock.mr_lock); } if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { if (!(lock_flags & XFS_MMAPLOCK_SHARED)) return !!ip->i_mmaplock.mr_writer; return rwsem_is_locked(&ip->i_mmaplock.mr_lock); } if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { if (!(lock_flags & XFS_IOLOCK_SHARED)) return !!ip->i_iolock.mr_writer; Loading @@ -314,14 +363,27 @@ int xfs_lock_delays; #endif /* * Bump the subclass so xfs_lock_inodes() acquires each lock with * a different value * Bump the subclass so xfs_lock_inodes() acquires each lock with a different * value. This shouldn't be called for page fault locking, but we also need to * ensure we don't overrun the number of lockdep subclasses for the iolock or * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. */ static inline int xfs_lock_inumorder(int lock_mode, int subclass) { if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { ASSERT(subclass + XFS_LOCK_INUMORDER < (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; } if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { ASSERT(subclass + XFS_LOCK_INUMORDER < (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_MMAPLOCK_SHIFT; } if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; Loading Loading @@ -440,10 +502,10 @@ xfs_lock_inodes( } /* * xfs_lock_two_inodes() can only be used to lock one type of lock * at a time - the iolock or the ilock, but not both at once. If * we lock both at once, lockdep will report false positives saying * we have violated locking orders. * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - * the iolock, the mmaplock or the ilock, but not more than one at a time. If we * lock more than one at a time, lockdep will report false positives saying we * have violated locking orders. */ void xfs_lock_two_inodes( Loading @@ -455,8 +517,12 @@ xfs_lock_two_inodes( int attempts = 0; xfs_log_item_t *lp; if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { Loading fs/xfs/xfs_inode.h +22 −7 Original line number Diff line number Diff line Loading @@ -56,6 +56,7 @@ typedef struct xfs_inode { struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ mrlock_t i_mmaplock; /* inode mmap IO lock */ atomic_t i_pincount; /* inode pin count */ spinlock_t i_flags_lock; /* inode i_flags lock */ /* Miscellaneous state. */ Loading Loading @@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHARED (1<<1) #define XFS_ILOCK_EXCL (1<<2) #define XFS_ILOCK_SHARED (1<<3) #define XFS_MMAPLOCK_EXCL (1<<4) #define XFS_MMAPLOCK_SHARED (1<<5) #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED) #define XFS_LOCK_FLAGS \ { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ { XFS_ILOCK_SHARED, "ILOCK_SHARED" } { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \ { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" } /* Loading Loading @@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHIFT 16 #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) #define XFS_MMAPLOCK_SHIFT 20 #define XFS_ILOCK_SHIFT 24 #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) #define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) #define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) #define XFS_IOLOCK_DEP_MASK 0x00ff0000 #define XFS_IOLOCK_DEP_MASK 0x000f0000 #define XFS_MMAPLOCK_DEP_MASK 0x00f00000 #define XFS_ILOCK_DEP_MASK 0xff000000 #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ XFS_MMAPLOCK_DEP_MASK | \ XFS_ILOCK_DEP_MASK) #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \ >> XFS_IOLOCK_SHIFT) #define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \ >> XFS_MMAPLOCK_SHIFT) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \ >> XFS_ILOCK_SHIFT) /* * For multiple groups support: if S_ISGID bit is set in the parent Loading fs/xfs/xfs_ioctl.c +4 −1 Original line number Diff line number Diff line Loading @@ -643,6 +643,9 @@ xfs_ioc_space( if (error) goto out_unlock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); iolock |= XFS_MMAPLOCK_EXCL; switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; Loading Loading
fs/xfs/xfs_bmap_util.c +15 −16 Original line number Diff line number Diff line Loading @@ -1599,13 +1599,6 @@ xfs_swap_extent_flush( /* Verify O_DIRECT for ftmp */ if (VFS_I(ip)->i_mapping->nrpages) return -EINVAL; /* * Don't try to swap extents on mmap()d files because we can't lock * out races against page faults safely. */ if (mapping_mapped(VFS_I(ip)->i_mapping)) return -EBUSY; return 0; } Loading Loading @@ -1633,13 +1626,14 @@ xfs_swap_extents( } /* * Lock up the inodes against other IO and truncate to begin with. * Then we can ensure the inodes are flushed and have no page cache * safely. Once we have done this we can take the ilocks and do the rest * of the checks. * Lock the inodes against other IO, page faults and truncate to * begin with. Then we can ensure the inodes are flushed and have no * page cache safely. Once we have done this we can take the ilocks and * do the rest of the checks. */ lock_flags = XFS_IOLOCK_EXCL; lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { Loading @@ -1666,8 +1660,16 @@ xfs_swap_extents( xfs_trans_cancel(tp, 0); goto out_unlock; } /* * Lock and join the inodes to the tansaction so that transaction commit * or cancel will unlock the inodes from this point onwards. */ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); lock_flags |= XFS_ILOCK_EXCL; xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ijoin(tp, tip, lock_flags); /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || Loading Loading @@ -1720,9 +1722,6 @@ xfs_swap_extents( goto out_trans_cancel; } xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ijoin(tp, tip, lock_flags); /* * Before we've swapped the forks, lets set the owners of the forks * appropriately. We have to do this as we are demand paging the btree Loading Loading @@ -1856,5 +1855,5 @@ xfs_swap_extents( out_trans_cancel: xfs_trans_cancel(tp, 0); goto out_unlock; goto out; }
fs/xfs/xfs_file.c +54 −16 Original line number Diff line number Diff line Loading @@ -847,6 +847,9 @@ xfs_file_fallocate( if (error) goto out_unlock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); iolock |= XFS_MMAPLOCK_EXCL; if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) Loading Loading @@ -996,20 +999,6 @@ xfs_file_mmap( return 0; } /* * mmap()d file has taken write protection fault and is being made * writable. We can set the page state up correctly for a writable * page, which means we can do correct delalloc accounting (ENOSPC * checking!) and unwritten extent mapping. */ STATIC int xfs_vm_page_mkwrite( struct vm_area_struct *vma, struct vm_fault *vmf) { return block_page_mkwrite(vma, vmf, xfs_get_blocks); } /* * This type is designed to indicate the type of offset we would like * to search from page cache for xfs_seek_hole_data(). Loading Loading @@ -1385,6 +1374,55 @@ xfs_file_llseek( } } /* * Locking for serialisation of IO during page faults. This results in a lock * ordering of: * * mmap_sem (MM) * i_mmap_lock (XFS - truncate serialisation) * page_lock (MM) * i_lock (XFS - extent map serialisation) */ STATIC int xfs_filemap_fault( struct vm_area_struct *vma, struct vm_fault *vmf) { struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); int error; trace_xfs_filemap_fault(ip); xfs_ilock(ip, XFS_MMAPLOCK_SHARED); error = filemap_fault(vma, vmf); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); return error; } /* * mmap()d file has taken write protection fault and is being made writable. We * can set the page state up correctly for a writable page, which means we can * do correct delalloc accounting (ENOSPC checking!) and unwritten extent * mapping. */ STATIC int xfs_filemap_page_mkwrite( struct vm_area_struct *vma, struct vm_fault *vmf) { struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); int error; trace_xfs_filemap_page_mkwrite(ip); xfs_ilock(ip, XFS_MMAPLOCK_SHARED); error = block_page_mkwrite(vma, vmf, xfs_get_blocks); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); return error; } const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, .read = new_sync_read, Loading Loading @@ -1417,7 +1455,7 @@ const struct file_operations xfs_dir_file_operations = { }; static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, .fault = xfs_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_vm_page_mkwrite, .page_mkwrite = xfs_filemap_page_mkwrite, };
fs/xfs/xfs_inode.c +97 −31 Original line number Diff line number Diff line Loading @@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared( } /* * The xfs inode contains 2 locks: a multi-reader lock called the * i_iolock and a multi-reader lock called the i_lock. This routine * allows either or both of the locks to be obtained. * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and * the i_lock. This routine allows various combinations of the locks to be * obtained. * * The 2 locks should always be ordered so that the IO lock is * obtained first in order to prevent deadlock. * The 3 locks should always be ordered so that the IO lock is obtained first, * the mmap lock second and the ilock last in order to prevent deadlock. * * ip -- the inode being locked * lock_flags -- this parameter indicates the inode's locks * to be locked. It can be: * XFS_IOLOCK_SHARED, * XFS_IOLOCK_EXCL, * XFS_ILOCK_SHARED, * XFS_ILOCK_EXCL, * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL * Basic locking order: * * i_iolock -> i_mmap_lock -> page_lock -> i_ilock * * mmap_sem locking order: * * i_iolock -> page lock -> mmap_sem * mmap_sem -> i_mmap_lock -> page_lock * * The difference in mmap_sem locking order mean that we cannot hold the * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can * fault in pages during copy in/out (for buffered IO) or require the mmap_sem * in get_user_pages() to map the user pages into the kernel address space for * direct IO. Similarly the i_iolock cannot be taken inside a page fault because * page faults already hold the mmap_sem. * * Hence to serialise fully against both syscall and mmap based IO, we need to * take both the i_iolock and the i_mmap_lock. These locks should *only* be both * taken in places where we need to invalidate the page cache in a race * free manner (e.g. truncate, hole punch and other extent manipulation * functions). */ void xfs_ilock( Loading @@ -150,6 +160,8 @@ xfs_ilock( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); Loading @@ -159,6 +171,11 @@ xfs_ilock( else if (lock_flags & XFS_IOLOCK_SHARED) mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); if (lock_flags & XFS_MMAPLOCK_EXCL) mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); else if (lock_flags & XFS_MMAPLOCK_SHARED) mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); if (lock_flags & XFS_ILOCK_EXCL) mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); else if (lock_flags & XFS_ILOCK_SHARED) Loading Loading @@ -191,6 +208,8 @@ xfs_ilock_nowait( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); Loading @@ -202,15 +221,29 @@ xfs_ilock_nowait( if (!mrtryaccess(&ip->i_iolock)) goto out; } if (lock_flags & XFS_MMAPLOCK_EXCL) { if (!mrtryupdate(&ip->i_mmaplock)) goto out_undo_iolock; } else if (lock_flags & XFS_MMAPLOCK_SHARED) { if (!mrtryaccess(&ip->i_mmaplock)) goto out_undo_iolock; } if (lock_flags & XFS_ILOCK_EXCL) { if (!mrtryupdate(&ip->i_lock)) goto out_undo_iolock; goto out_undo_mmaplock; } else if (lock_flags & XFS_ILOCK_SHARED) { if (!mrtryaccess(&ip->i_lock)) goto out_undo_iolock; goto out_undo_mmaplock; } return 1; out_undo_mmaplock: if (lock_flags & XFS_MMAPLOCK_EXCL) mrunlock_excl(&ip->i_mmaplock); else if (lock_flags & XFS_MMAPLOCK_SHARED) mrunlock_shared(&ip->i_mmaplock); out_undo_iolock: if (lock_flags & XFS_IOLOCK_EXCL) mrunlock_excl(&ip->i_iolock); Loading Loading @@ -244,6 +277,8 @@ xfs_iunlock( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); Loading @@ -254,6 +289,11 @@ xfs_iunlock( else if (lock_flags & XFS_IOLOCK_SHARED) mrunlock_shared(&ip->i_iolock); if (lock_flags & XFS_MMAPLOCK_EXCL) mrunlock_excl(&ip->i_mmaplock); else if (lock_flags & XFS_MMAPLOCK_SHARED) mrunlock_shared(&ip->i_mmaplock); if (lock_flags & XFS_ILOCK_EXCL) mrunlock_excl(&ip->i_lock); else if (lock_flags & XFS_ILOCK_SHARED) Loading @@ -271,11 +311,14 @@ xfs_ilock_demote( xfs_inode_t *ip, uint lock_flags) { ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); if (lock_flags & XFS_ILOCK_EXCL) mrdemote(&ip->i_lock); if (lock_flags & XFS_MMAPLOCK_EXCL) mrdemote(&ip->i_mmaplock); if (lock_flags & XFS_IOLOCK_EXCL) mrdemote(&ip->i_iolock); Loading @@ -294,6 +337,12 @@ xfs_isilocked( return rwsem_is_locked(&ip->i_lock.mr_lock); } if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { if (!(lock_flags & XFS_MMAPLOCK_SHARED)) return !!ip->i_mmaplock.mr_writer; return rwsem_is_locked(&ip->i_mmaplock.mr_lock); } if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { if (!(lock_flags & XFS_IOLOCK_SHARED)) return !!ip->i_iolock.mr_writer; Loading @@ -314,14 +363,27 @@ int xfs_lock_delays; #endif /* * Bump the subclass so xfs_lock_inodes() acquires each lock with * a different value * Bump the subclass so xfs_lock_inodes() acquires each lock with a different * value. This shouldn't be called for page fault locking, but we also need to * ensure we don't overrun the number of lockdep subclasses for the iolock or * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. */ static inline int xfs_lock_inumorder(int lock_mode, int subclass) { if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { ASSERT(subclass + XFS_LOCK_INUMORDER < (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; } if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { ASSERT(subclass + XFS_LOCK_INUMORDER < (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_MMAPLOCK_SHIFT; } if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; Loading Loading @@ -440,10 +502,10 @@ xfs_lock_inodes( } /* * xfs_lock_two_inodes() can only be used to lock one type of lock * at a time - the iolock or the ilock, but not both at once. If * we lock both at once, lockdep will report false positives saying * we have violated locking orders. * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - * the iolock, the mmaplock or the ilock, but not more than one at a time. If we * lock more than one at a time, lockdep will report false positives saying we * have violated locking orders. */ void xfs_lock_two_inodes( Loading @@ -455,8 +517,12 @@ xfs_lock_two_inodes( int attempts = 0; xfs_log_item_t *lp; if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { Loading
fs/xfs/xfs_inode.h +22 −7 Original line number Diff line number Diff line Loading @@ -56,6 +56,7 @@ typedef struct xfs_inode { struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ mrlock_t i_mmaplock; /* inode mmap IO lock */ atomic_t i_pincount; /* inode pin count */ spinlock_t i_flags_lock; /* inode i_flags lock */ /* Miscellaneous state. */ Loading Loading @@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHARED (1<<1) #define XFS_ILOCK_EXCL (1<<2) #define XFS_ILOCK_SHARED (1<<3) #define XFS_MMAPLOCK_EXCL (1<<4) #define XFS_MMAPLOCK_SHARED (1<<5) #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED) #define XFS_LOCK_FLAGS \ { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ { XFS_ILOCK_SHARED, "ILOCK_SHARED" } { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \ { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" } /* Loading Loading @@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHIFT 16 #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) #define XFS_MMAPLOCK_SHIFT 20 #define XFS_ILOCK_SHIFT 24 #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) #define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) #define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) #define XFS_IOLOCK_DEP_MASK 0x00ff0000 #define XFS_IOLOCK_DEP_MASK 0x000f0000 #define XFS_MMAPLOCK_DEP_MASK 0x00f00000 #define XFS_ILOCK_DEP_MASK 0xff000000 #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ XFS_MMAPLOCK_DEP_MASK | \ XFS_ILOCK_DEP_MASK) #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \ >> XFS_IOLOCK_SHIFT) #define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \ >> XFS_MMAPLOCK_SHIFT) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \ >> XFS_ILOCK_SHIFT) /* * For multiple groups support: if S_ISGID bit is set in the parent Loading
fs/xfs/xfs_ioctl.c +4 −1 Original line number Diff line number Diff line Loading @@ -643,6 +643,9 @@ xfs_ioc_space( if (error) goto out_unlock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); iolock |= XFS_MMAPLOCK_EXCL; switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; Loading