Loading fs/xfs/Makefile +0 −1 Original line number Diff line number Diff line Loading @@ -105,7 +105,6 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_globals.o \ xfs_ioctl.o \ xfs_iops.o \ xfs_lrw.o \ xfs_super.o \ xfs_sync.o \ xfs_xattr.o) Loading fs/xfs/linux-2.6/xfs_aops.c +172 −49 Original line number Diff line number Diff line Loading @@ -39,6 +39,7 @@ #include "xfs_iomap.h" #include "xfs_vnodeops.h" #include "xfs_trace.h" #include "xfs_bmap.h" #include <linux/mpage.h> #include <linux/pagevec.h> #include <linux/writeback.h> Loading Loading @@ -163,14 +164,17 @@ xfs_ioend_new_eof( } /* * Update on-disk file size now that data has been written to disk. * The current in-memory file size is i_size. If a write is beyond * eof i_new_size will be the intended file size until i_size is * updated. If this write does not extend all the way to the valid * file size then restrict this update to the end of the write. * Update on-disk file size now that data has been written to disk. The * current in-memory file size is i_size. If a write is beyond eof i_new_size * will be the intended file size until i_size is updated. If this write does * not extend all the way to the valid file size then restrict this update to * the end of the write. * * This function does not block as blocking on the inode lock in IO completion * can lead to IO completion order dependency deadlocks.. If it can't get the * inode ilock it will return EAGAIN. Callers must handle this. */ STATIC void STATIC int xfs_setfilesize( xfs_ioend_t *ioend) { Loading @@ -181,16 +185,40 @@ xfs_setfilesize( ASSERT(ioend->io_type != IOMAP_READ); if (unlikely(ioend->io_error)) return; return 0; if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) return EAGAIN; xfs_ilock(ip, XFS_ILOCK_EXCL); isize = xfs_ioend_new_eof(ioend); if (isize) { ip->i_d.di_size = isize; xfs_mark_inode_dirty_sync(ip); xfs_mark_inode_dirty(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; } /* * Schedule IO completion handling on a xfsdatad if this was * the final hold on this ioend. If we are asked to wait, * flush the workqueue. */ STATIC void xfs_finish_ioend( xfs_ioend_t *ioend, int wait) { if (atomic_dec_and_test(&ioend->io_remaining)) { struct workqueue_struct *wq; wq = (ioend->io_type == IOMAP_UNWRITTEN) ? xfsconvertd_workqueue : xfsdatad_workqueue; queue_work(wq, &ioend->io_work); if (wait) flush_workqueue(wq); } } /* Loading @@ -200,9 +228,9 @@ STATIC void xfs_end_io( struct work_struct *work) { xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); struct xfs_inode *ip = XFS_I(ioend->io_inode); int error = 0; /* * For unwritten extents we need to issue transactions to convert a Loading @@ -210,7 +238,6 @@ xfs_end_io( */ if (ioend->io_type == IOMAP_UNWRITTEN && likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { int error; error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); Loading @@ -222,30 +249,23 @@ xfs_end_io( * We might have to update the on-disk file size after extending * writes. */ if (ioend->io_type != IOMAP_READ) xfs_setfilesize(ioend); xfs_destroy_ioend(ioend); if (ioend->io_type != IOMAP_READ) { error = xfs_setfilesize(ioend); ASSERT(!error || error == EAGAIN); } /* * Schedule IO completion handling on a xfsdatad if this was * the final hold on this ioend. If we are asked to wait, * flush the workqueue. * If we didn't complete processing of the ioend, requeue it to the * tail of the workqueue for another attempt later. Otherwise destroy * it. */ STATIC void xfs_finish_ioend( xfs_ioend_t *ioend, int wait) { if (atomic_dec_and_test(&ioend->io_remaining)) { struct workqueue_struct *wq; wq = (ioend->io_type == IOMAP_UNWRITTEN) ? xfsconvertd_workqueue : xfsdatad_workqueue; queue_work(wq, &ioend->io_work); if (wait) flush_workqueue(wq); } if (error == EAGAIN) { atomic_inc(&ioend->io_remaining); xfs_finish_ioend(ioend, 0); /* ensure we don't spin on blocked ioends */ delay(1); } else xfs_destroy_ioend(ioend); } /* Loading Loading @@ -341,7 +361,7 @@ xfs_submit_ioend_bio( * but don't update the inode size until I/O completion. */ if (xfs_ioend_new_eof(ioend)) xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE, bio); Loading Loading @@ -874,6 +894,118 @@ xfs_cluster_write( } } STATIC void xfs_vm_invalidatepage( struct page *page, unsigned long offset) { trace_xfs_invalidatepage(page->mapping->host, page, offset); block_invalidatepage(page, offset); } /* * If the page has delalloc buffers on it, we need to punch them out before we * invalidate the page. If we don't, we leave a stale delalloc mapping on the * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read * is done on that same region - the delalloc extent is returned when none is * supposed to be there. * * We prevent this by truncating away the delalloc regions on the page before * invalidating it. Because they are delalloc, we can do this without needing a * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this * truncation without a transaction as there is no space left for block * reservation (typically why we see a ENOSPC in writeback). * * This is not a performance critical path, so for now just do the punching a * buffer head at a time. */ STATIC void xfs_aops_discard_page( struct page *page) { struct inode *inode = page->mapping->host; struct xfs_inode *ip = XFS_I(inode); struct buffer_head *bh, *head; loff_t offset = page_offset(page); ssize_t len = 1 << inode->i_blkbits; if (!xfs_is_delayed_page(page, IOMAP_DELAY)) goto out_invalidate; xfs_fs_cmn_err(CE_ALERT, ip->i_mount, "page discard on page %p, inode 0x%llx, offset %llu.", page, ip->i_ino, offset); xfs_ilock(ip, XFS_ILOCK_EXCL); bh = head = page_buffers(page); do { int done; xfs_fileoff_t offset_fsb; xfs_bmbt_irec_t imap; int nimaps = 1; int error; xfs_fsblock_t firstblock; xfs_bmap_free_t flist; if (!buffer_delay(bh)) goto next_buffer; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); /* * Map the range first and check that it is a delalloc extent * before trying to unmap the range. Otherwise we will be * trying to remove a real extent (which requires a * transaction) or a hole, which is probably a bad idea... */ error = xfs_bmapi(NULL, ip, offset_fsb, 1, XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { /* something screwed, just bail */ xfs_fs_cmn_err(CE_ALERT, ip->i_mount, "page discard failed delalloc mapping lookup."); break; } if (!nimaps) { /* nothing there */ goto next_buffer; } if (imap.br_startblock != DELAYSTARTBLOCK) { /* been converted, ignore */ goto next_buffer; } WARN_ON(imap.br_blockcount == 0); /* * Note: while we initialise the firstblock/flist pair, they * should never be used because blocks should never be * allocated or freed for a delalloc extent and hence we need * don't cancel or finish them after the xfs_bunmapi() call. */ xfs_bmap_init(&flist, &firstblock); error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, &flist, NULL, &done); ASSERT(!flist.xbf_count && !flist.xbf_first); if (error) { /* something screwed, just bail */ xfs_fs_cmn_err(CE_ALERT, ip->i_mount, "page discard unable to remove delalloc mapping."); break; } next_buffer: offset += len; } while ((bh = bh->b_this_page) != head); xfs_iunlock(ip, XFS_ILOCK_EXCL); out_invalidate: xfs_vm_invalidatepage(page, 0); return; } /* * Calling this without startio set means we are being asked to make a dirty * page ready for freeing it's buffers. When called with startio set then Loading Loading @@ -1125,7 +1257,7 @@ xfs_page_state_convert( */ if (err != -EAGAIN) { if (!unmapped) block_invalidatepage(page, 0); xfs_aops_discard_page(page); ClearPageUptodate(page); } return err; Loading Loading @@ -1535,15 +1667,6 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } STATIC void xfs_vm_invalidatepage( struct page *page, unsigned long offset) { trace_xfs_invalidatepage(page->mapping->host, page, offset); block_invalidatepage(page, offset); } const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, Loading Loading
fs/xfs/Makefile +0 −1 Original line number Diff line number Diff line Loading @@ -105,7 +105,6 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_globals.o \ xfs_ioctl.o \ xfs_iops.o \ xfs_lrw.o \ xfs_super.o \ xfs_sync.o \ xfs_xattr.o) Loading
fs/xfs/linux-2.6/xfs_aops.c +172 −49 Original line number Diff line number Diff line Loading @@ -39,6 +39,7 @@ #include "xfs_iomap.h" #include "xfs_vnodeops.h" #include "xfs_trace.h" #include "xfs_bmap.h" #include <linux/mpage.h> #include <linux/pagevec.h> #include <linux/writeback.h> Loading Loading @@ -163,14 +164,17 @@ xfs_ioend_new_eof( } /* * Update on-disk file size now that data has been written to disk. * The current in-memory file size is i_size. If a write is beyond * eof i_new_size will be the intended file size until i_size is * updated. If this write does not extend all the way to the valid * file size then restrict this update to the end of the write. * Update on-disk file size now that data has been written to disk. The * current in-memory file size is i_size. If a write is beyond eof i_new_size * will be the intended file size until i_size is updated. If this write does * not extend all the way to the valid file size then restrict this update to * the end of the write. * * This function does not block as blocking on the inode lock in IO completion * can lead to IO completion order dependency deadlocks.. If it can't get the * inode ilock it will return EAGAIN. Callers must handle this. */ STATIC void STATIC int xfs_setfilesize( xfs_ioend_t *ioend) { Loading @@ -181,16 +185,40 @@ xfs_setfilesize( ASSERT(ioend->io_type != IOMAP_READ); if (unlikely(ioend->io_error)) return; return 0; if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) return EAGAIN; xfs_ilock(ip, XFS_ILOCK_EXCL); isize = xfs_ioend_new_eof(ioend); if (isize) { ip->i_d.di_size = isize; xfs_mark_inode_dirty_sync(ip); xfs_mark_inode_dirty(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; } /* * Schedule IO completion handling on a xfsdatad if this was * the final hold on this ioend. If we are asked to wait, * flush the workqueue. */ STATIC void xfs_finish_ioend( xfs_ioend_t *ioend, int wait) { if (atomic_dec_and_test(&ioend->io_remaining)) { struct workqueue_struct *wq; wq = (ioend->io_type == IOMAP_UNWRITTEN) ? xfsconvertd_workqueue : xfsdatad_workqueue; queue_work(wq, &ioend->io_work); if (wait) flush_workqueue(wq); } } /* Loading @@ -200,9 +228,9 @@ STATIC void xfs_end_io( struct work_struct *work) { xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); struct xfs_inode *ip = XFS_I(ioend->io_inode); int error = 0; /* * For unwritten extents we need to issue transactions to convert a Loading @@ -210,7 +238,6 @@ xfs_end_io( */ if (ioend->io_type == IOMAP_UNWRITTEN && likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { int error; error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); Loading @@ -222,30 +249,23 @@ xfs_end_io( * We might have to update the on-disk file size after extending * writes. */ if (ioend->io_type != IOMAP_READ) xfs_setfilesize(ioend); xfs_destroy_ioend(ioend); if (ioend->io_type != IOMAP_READ) { error = xfs_setfilesize(ioend); ASSERT(!error || error == EAGAIN); } /* * Schedule IO completion handling on a xfsdatad if this was * the final hold on this ioend. If we are asked to wait, * flush the workqueue. * If we didn't complete processing of the ioend, requeue it to the * tail of the workqueue for another attempt later. Otherwise destroy * it. */ STATIC void xfs_finish_ioend( xfs_ioend_t *ioend, int wait) { if (atomic_dec_and_test(&ioend->io_remaining)) { struct workqueue_struct *wq; wq = (ioend->io_type == IOMAP_UNWRITTEN) ? xfsconvertd_workqueue : xfsdatad_workqueue; queue_work(wq, &ioend->io_work); if (wait) flush_workqueue(wq); } if (error == EAGAIN) { atomic_inc(&ioend->io_remaining); xfs_finish_ioend(ioend, 0); /* ensure we don't spin on blocked ioends */ delay(1); } else xfs_destroy_ioend(ioend); } /* Loading Loading @@ -341,7 +361,7 @@ xfs_submit_ioend_bio( * but don't update the inode size until I/O completion. */ if (xfs_ioend_new_eof(ioend)) xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE, bio); Loading Loading @@ -874,6 +894,118 @@ xfs_cluster_write( } } STATIC void xfs_vm_invalidatepage( struct page *page, unsigned long offset) { trace_xfs_invalidatepage(page->mapping->host, page, offset); block_invalidatepage(page, offset); } /* * If the page has delalloc buffers on it, we need to punch them out before we * invalidate the page. If we don't, we leave a stale delalloc mapping on the * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read * is done on that same region - the delalloc extent is returned when none is * supposed to be there. * * We prevent this by truncating away the delalloc regions on the page before * invalidating it. Because they are delalloc, we can do this without needing a * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this * truncation without a transaction as there is no space left for block * reservation (typically why we see a ENOSPC in writeback). * * This is not a performance critical path, so for now just do the punching a * buffer head at a time. */ STATIC void xfs_aops_discard_page( struct page *page) { struct inode *inode = page->mapping->host; struct xfs_inode *ip = XFS_I(inode); struct buffer_head *bh, *head; loff_t offset = page_offset(page); ssize_t len = 1 << inode->i_blkbits; if (!xfs_is_delayed_page(page, IOMAP_DELAY)) goto out_invalidate; xfs_fs_cmn_err(CE_ALERT, ip->i_mount, "page discard on page %p, inode 0x%llx, offset %llu.", page, ip->i_ino, offset); xfs_ilock(ip, XFS_ILOCK_EXCL); bh = head = page_buffers(page); do { int done; xfs_fileoff_t offset_fsb; xfs_bmbt_irec_t imap; int nimaps = 1; int error; xfs_fsblock_t firstblock; xfs_bmap_free_t flist; if (!buffer_delay(bh)) goto next_buffer; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); /* * Map the range first and check that it is a delalloc extent * before trying to unmap the range. Otherwise we will be * trying to remove a real extent (which requires a * transaction) or a hole, which is probably a bad idea... */ error = xfs_bmapi(NULL, ip, offset_fsb, 1, XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { /* something screwed, just bail */ xfs_fs_cmn_err(CE_ALERT, ip->i_mount, "page discard failed delalloc mapping lookup."); break; } if (!nimaps) { /* nothing there */ goto next_buffer; } if (imap.br_startblock != DELAYSTARTBLOCK) { /* been converted, ignore */ goto next_buffer; } WARN_ON(imap.br_blockcount == 0); /* * Note: while we initialise the firstblock/flist pair, they * should never be used because blocks should never be * allocated or freed for a delalloc extent and hence we need * don't cancel or finish them after the xfs_bunmapi() call. */ xfs_bmap_init(&flist, &firstblock); error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, &flist, NULL, &done); ASSERT(!flist.xbf_count && !flist.xbf_first); if (error) { /* something screwed, just bail */ xfs_fs_cmn_err(CE_ALERT, ip->i_mount, "page discard unable to remove delalloc mapping."); break; } next_buffer: offset += len; } while ((bh = bh->b_this_page) != head); xfs_iunlock(ip, XFS_ILOCK_EXCL); out_invalidate: xfs_vm_invalidatepage(page, 0); return; } /* * Calling this without startio set means we are being asked to make a dirty * page ready for freeing it's buffers. When called with startio set then Loading Loading @@ -1125,7 +1257,7 @@ xfs_page_state_convert( */ if (err != -EAGAIN) { if (!unmapped) block_invalidatepage(page, 0); xfs_aops_discard_page(page); ClearPageUptodate(page); } return err; Loading Loading @@ -1535,15 +1667,6 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } STATIC void xfs_vm_invalidatepage( struct page *page, unsigned long offset) { trace_xfs_invalidatepage(page->mapping->host, page, offset); block_invalidatepage(page, offset); } const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, Loading