Loading fs/xfs/libxfs/xfs_inode_fork.c +19 −8 Original line number Diff line number Diff line Loading @@ -1518,6 +1518,24 @@ xfs_iext_indirect_to_direct( } } /* * Remove all records from the indirection array. */ STATIC void xfs_iext_irec_remove_all( struct xfs_ifork *ifp) { int nlists; int i; ASSERT(ifp->if_flags & XFS_IFEXTIREC); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; for (i = 0; i < nlists; i++) kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf); kmem_free(ifp->if_u1.if_ext_irec); ifp->if_flags &= ~XFS_IFEXTIREC; } /* * Free incore file extents. */ Loading @@ -1526,14 +1544,7 @@ xfs_iext_destroy( xfs_ifork_t *ifp) /* inode fork pointer */ { if (ifp->if_flags & XFS_IFEXTIREC) { int erp_idx; int nlists; nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { xfs_iext_irec_remove(ifp, erp_idx); } ifp->if_flags &= ~XFS_IFEXTIREC; xfs_iext_irec_remove_all(ifp); } else if (ifp->if_real_bytes) { kmem_free(ifp->if_u1.if_extents); } else if (ifp->if_bytes) { Loading fs/xfs/xfs_icache.c +156 −134 Original line number Diff line number Diff line Loading @@ -37,9 +37,6 @@ #include <linux/kthread.h> #include <linux/freezer.h> STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); /* * Allocate and initialise an xfs_inode. */ Loading Loading @@ -94,13 +91,6 @@ xfs_inode_free_callback( struct inode *inode = container_of(head, struct inode, i_rcu); struct xfs_inode *ip = XFS_I(inode); kmem_zone_free(xfs_inode_zone, ip); } void xfs_inode_free( struct xfs_inode *ip) { switch (VFS_I(ip)->i_mode & S_IFMT) { case S_IFREG: case S_IFDIR: Loading @@ -118,6 +108,25 @@ xfs_inode_free( ip->i_itemp = NULL; } kmem_zone_free(xfs_inode_zone, ip); } static void __xfs_inode_free( struct xfs_inode *ip) { /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } void xfs_inode_free( struct xfs_inode *ip) { /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the Loading @@ -129,12 +138,123 @@ xfs_inode_free( ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(ip->i_mount, vn_active); __xfs_inode_free(ip); } call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); /* * Queue a new inode reclaim pass if there are reclaimable inodes and there * isn't a reclaim pass already in progress. By default it runs every 5s based * on the xfs periodic sync default of 30s. Perhaps this should have it's own * tunable, but that can be done if this method proves to be ineffective or too * aggressive. */ static void xfs_reclaim_work_queue( struct xfs_mount *mp) { rcu_read_lock(); if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); } rcu_read_unlock(); } /* * This is a fast pass over the inode cache to try to get reclaim moving on as * many inodes as possible in a short period of time. It kicks itself every few * seconds, as well as being kicked by the inode cache shrinker when memory * goes low. It scans as quickly as possible avoiding locked inodes or those * already being flushed, and once done schedules a future pass. */ void xfs_reclaim_worker( struct work_struct *work) { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_TRYLOCK); xfs_reclaim_work_queue(mp); } static void xfs_perag_set_reclaim_tag( struct xfs_perag *pag) { struct xfs_mount *mp = pag->pag_mount; ASSERT(spin_is_locked(&pag->pag_ici_lock)); if (pag->pag_ici_reclaimable++) return; /* propagate the reclaim tag up into the perag radix tree */ spin_lock(&mp->m_perag_lock); radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, XFS_ICI_RECLAIM_TAG); spin_unlock(&mp->m_perag_lock); /* schedule periodic background inode reclaim */ xfs_reclaim_work_queue(mp); trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_); } static void xfs_perag_clear_reclaim_tag( struct xfs_perag *pag) { struct xfs_mount *mp = pag->pag_mount; ASSERT(spin_is_locked(&pag->pag_ici_lock)); if (--pag->pag_ici_reclaimable) return; /* clear the reclaim tag from the perag radix tree */ spin_lock(&mp->m_perag_lock); radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, XFS_ICI_RECLAIM_TAG); spin_unlock(&mp->m_perag_lock); trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_); } /* * We set the inode flag atomically with the radix tree tag. * Once we get tag lookups on the radix tree, this inode flag * can go away. */ void xfs_inode_set_reclaim_tag( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); xfs_perag_set_reclaim_tag(pag); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); } STATIC void xfs_inode_clear_reclaim_tag( struct xfs_perag *pag, xfs_ino_t ino) { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(pag->pag_mount, ino), XFS_ICI_RECLAIM_TAG); xfs_perag_clear_reclaim_tag(pag); } /* Loading Loading @@ -264,7 +384,7 @@ xfs_iget_cache_hit( */ ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; ip->i_flags |= XFS_INEW; __xfs_inode_clear_reclaim_tag(mp, pag, ip); xfs_inode_clear_reclaim_tag(pag, ip->i_ino); inode->i_state = I_NEW; ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); Loading Loading @@ -722,121 +842,6 @@ xfs_inode_ag_iterator_tag( return last_error; } /* * Queue a new inode reclaim pass if there are reclaimable inodes and there * isn't a reclaim pass already in progress. By default it runs every 5s based * on the xfs periodic sync default of 30s. Perhaps this should have it's own * tunable, but that can be done if this method proves to be ineffective or too * aggressive. */ static void xfs_reclaim_work_queue( struct xfs_mount *mp) { rcu_read_lock(); if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); } rcu_read_unlock(); } /* * This is a fast pass over the inode cache to try to get reclaim moving on as * many inodes as possible in a short period of time. It kicks itself every few * seconds, as well as being kicked by the inode cache shrinker when memory * goes low. It scans as quickly as possible avoiding locked inodes or those * already being flushed, and once done schedules a future pass. */ void xfs_reclaim_worker( struct work_struct *work) { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_TRYLOCK); xfs_reclaim_work_queue(mp); } static void __xfs_inode_set_reclaim_tag( struct xfs_perag *pag, struct xfs_inode *ip) { radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); if (!pag->pag_ici_reclaimable) { /* propagate the reclaim tag up into the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_set(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); /* schedule periodic background inode reclaim */ xfs_reclaim_work_queue(ip->i_mount); trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } pag->pag_ici_reclaimable++; } /* * We set the inode flag atomically with the radix tree tag. * Once we get tag lookups on the radix tree, this inode flag * can go away. */ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); } STATIC void __xfs_inode_clear_reclaim( xfs_perag_t *pag, xfs_inode_t *ip) { pag->pag_ici_reclaimable--; if (!pag->pag_ici_reclaimable) { /* clear the reclaim tag from the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_clear(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } } STATIC void __xfs_inode_clear_reclaim_tag( xfs_mount_t *mp, xfs_perag_t *pag, xfs_inode_t *ip) { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); __xfs_inode_clear_reclaim(pag, ip); } /* * Grab the inode for reclaim exclusively. * Return 0 if we grabbed it, non-zero otherwise. Loading Loading @@ -929,6 +934,7 @@ xfs_reclaim_inode( int sync_mode) { struct xfs_buf *bp = NULL; xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ int error; restart: Loading Loading @@ -993,6 +999,22 @@ xfs_reclaim_inode( xfs_iflock(ip); reclaim: /* * Because we use RCU freeing we need to ensure the inode always appears * to be reclaimed with an invalid inode number when in the free state. * We do this as early as possible under the ILOCK and flush lock so * that xfs_iflush_cluster() can be guaranteed to detect races with us * here. By doing this, we guarantee that once xfs_iflush_cluster has * locked both the XFS_ILOCK and the flush lock that it will see either * a valid, flushable inode that will serialise correctly against the * locks below, or it will see a clean (and invalid) inode that it can * skip. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); Loading @@ -1006,9 +1028,9 @@ xfs_reclaim_inode( */ spin_lock(&pag->pag_ici_lock); if (!radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) XFS_INO_TO_AGINO(ip->i_mount, ino))) ASSERT(0); __xfs_inode_clear_reclaim(pag, ip); xfs_perag_clear_reclaim_tag(pag); spin_unlock(&pag->pag_ici_lock); /* Loading @@ -1023,7 +1045,7 @@ xfs_reclaim_inode( xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_inode_free(ip); __xfs_inode_free(ip); return error; out_ifunlock: Loading fs/xfs/xfs_inode.c +68 −36 Original line number Diff line number Diff line Loading @@ -3149,16 +3149,16 @@ xfs_rename( STATIC int xfs_iflush_cluster( xfs_inode_t *ip, xfs_buf_t *bp) struct xfs_inode *ip, struct xfs_buf *bp) { xfs_mount_t *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; unsigned long first_index, mask; unsigned long inodes_per_cluster; int ilist_size; xfs_inode_t **ilist; xfs_inode_t *iq; int cilist_size; struct xfs_inode **cilist; struct xfs_inode *cip; int nr_found; int clcount = 0; int bufwasdelwri; Loading @@ -3167,23 +3167,23 @@ xfs_iflush_cluster( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); if (!cilist) goto out_put; mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; rcu_read_lock(); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, first_index, inodes_per_cluster); if (nr_found == 0) goto out_free; for (i = 0; i < nr_found; i++) { iq = ilist[i]; if (iq == ip) cip = cilist[i]; if (cip == ip) continue; /* Loading @@ -3192,20 +3192,30 @@ xfs_iflush_cluster( * We need to check under the i_flags_lock for a valid inode * here. Skip it if it is not valid or the wrong inode. */ spin_lock(&ip->i_flags_lock); if (!ip->i_ino || (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { spin_unlock(&ip->i_flags_lock); spin_lock(&cip->i_flags_lock); if (!cip->i_ino || __xfs_iflags_test(cip, XFS_ISTALE)) { spin_unlock(&cip->i_flags_lock); continue; } spin_unlock(&ip->i_flags_lock); /* * Once we fall off the end of the cluster, no point checking * any more inodes in the list because they will also all be * outside the cluster. */ if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) { spin_unlock(&cip->i_flags_lock); break; } spin_unlock(&cip->i_flags_lock); /* * Do an un-protected check to see if the inode is dirty and * is a candidate for flushing. These checks will be repeated * later after the appropriate locks are acquired. */ if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0) continue; /* Loading @@ -3213,15 +3223,28 @@ xfs_iflush_cluster( * then this inode cannot be flushed and is skipped. */ if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED)) continue; if (!xfs_iflock_nowait(iq)) { xfs_iunlock(iq, XFS_ILOCK_SHARED); if (!xfs_iflock_nowait(cip)) { xfs_iunlock(cip, XFS_ILOCK_SHARED); continue; } if (xfs_ipincount(iq)) { xfs_ifunlock(iq); xfs_iunlock(iq, XFS_ILOCK_SHARED); if (xfs_ipincount(cip)) { xfs_ifunlock(cip); xfs_iunlock(cip, XFS_ILOCK_SHARED); continue; } /* * Check the inode number again, just to be certain we are not * racing with freeing in xfs_reclaim_inode(). See the comments * in that function for more information as to why the initial * check is not sufficient. */ if (!cip->i_ino) { xfs_ifunlock(cip); xfs_iunlock(cip, XFS_ILOCK_SHARED); continue; } Loading @@ -3229,18 +3252,18 @@ xfs_iflush_cluster( * arriving here means that this inode can be flushed. First * re-check that it's dirty before flushing. */ if (!xfs_inode_clean(iq)) { if (!xfs_inode_clean(cip)) { int error; error = xfs_iflush_int(iq, bp); error = xfs_iflush_int(cip, bp); if (error) { xfs_iunlock(iq, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED); goto cluster_corrupt_out; } clcount++; } else { xfs_ifunlock(iq); xfs_ifunlock(cip); } xfs_iunlock(iq, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED); } if (clcount) { Loading @@ -3250,7 +3273,7 @@ xfs_iflush_cluster( out_free: rcu_read_unlock(); kmem_free(ilist); kmem_free(cilist); out_put: xfs_perag_put(pag); return 0; Loading Loading @@ -3293,8 +3316,8 @@ xfs_iflush_cluster( /* * Unlocks the flush lock */ xfs_iflush_abort(iq, false); kmem_free(ilist); xfs_iflush_abort(cip, false); kmem_free(cilist); xfs_perag_put(pag); return -EFSCORRUPTED; } Loading @@ -3314,7 +3337,7 @@ xfs_iflush( struct xfs_buf **bpp) { struct xfs_mount *mp = ip->i_mount; struct xfs_buf *bp; struct xfs_buf *bp = NULL; struct xfs_dinode *dip; int error; Loading Loading @@ -3356,14 +3379,22 @@ xfs_iflush( } /* * Get the buffer containing the on-disk inode. * Get the buffer containing the on-disk inode. We are doing a try-lock * operation here, so we may get an EAGAIN error. In that case, we * simply want to return with the inode still dirty. * * If we get any other error, we effectively have a corruption situation * and we cannot flush the inode, so we treat it the same as failing * xfs_iflush_int(). */ error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 0); if (error || !bp) { if (error == -EAGAIN) { xfs_ifunlock(ip); return error; } if (error) goto corrupt_out; /* * First flush out the inode that xfs_iflush was called with. Loading Loading @@ -3391,6 +3422,7 @@ xfs_iflush( return 0; corrupt_out: if (bp) xfs_buf_relse(bp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); cluster_corrupt_out: Loading fs/xfs/xfs_super.c +7 −21 Original line number Diff line number Diff line Loading @@ -928,7 +928,7 @@ xfs_fs_alloc_inode( /* * Now that the generic code is guaranteed not to be accessing * the linux inode, we can reclaim the inode. * the linux inode, we can inactivate and reclaim the inode. */ STATIC void xfs_fs_destroy_inode( Loading @@ -938,9 +938,14 @@ xfs_fs_destroy_inode( trace_xfs_destroy_inode(ip); XFS_STATS_INC(ip->i_mount, vn_reclaim); ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inactive(ip); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); XFS_STATS_INC(ip->i_mount, vn_reclaim); /* * We should never get here with one of the reclaim flags already set. Loading Loading @@ -987,24 +992,6 @@ xfs_fs_inode_init_once( "xfsino", ip->i_ino); } STATIC void xfs_fs_evict_inode( struct inode *inode) { xfs_inode_t *ip = XFS_I(inode); ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); trace_xfs_evict_inode(ip); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inactive(ip); } /* * We do an unlocked check for XFS_IDONTCACHE here because we are already * serialised against cache hits here via the inode->i_lock and igrab() in Loading Loading @@ -1673,7 +1660,6 @@ xfs_fs_free_cached_objects( static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, .evict_inode = xfs_fs_evict_inode, .drop_inode = xfs_fs_drop_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, Loading Loading
fs/xfs/libxfs/xfs_inode_fork.c +19 −8 Original line number Diff line number Diff line Loading @@ -1518,6 +1518,24 @@ xfs_iext_indirect_to_direct( } } /* * Remove all records from the indirection array. */ STATIC void xfs_iext_irec_remove_all( struct xfs_ifork *ifp) { int nlists; int i; ASSERT(ifp->if_flags & XFS_IFEXTIREC); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; for (i = 0; i < nlists; i++) kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf); kmem_free(ifp->if_u1.if_ext_irec); ifp->if_flags &= ~XFS_IFEXTIREC; } /* * Free incore file extents. */ Loading @@ -1526,14 +1544,7 @@ xfs_iext_destroy( xfs_ifork_t *ifp) /* inode fork pointer */ { if (ifp->if_flags & XFS_IFEXTIREC) { int erp_idx; int nlists; nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { xfs_iext_irec_remove(ifp, erp_idx); } ifp->if_flags &= ~XFS_IFEXTIREC; xfs_iext_irec_remove_all(ifp); } else if (ifp->if_real_bytes) { kmem_free(ifp->if_u1.if_extents); } else if (ifp->if_bytes) { Loading
fs/xfs/xfs_icache.c +156 −134 Original line number Diff line number Diff line Loading @@ -37,9 +37,6 @@ #include <linux/kthread.h> #include <linux/freezer.h> STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); /* * Allocate and initialise an xfs_inode. */ Loading Loading @@ -94,13 +91,6 @@ xfs_inode_free_callback( struct inode *inode = container_of(head, struct inode, i_rcu); struct xfs_inode *ip = XFS_I(inode); kmem_zone_free(xfs_inode_zone, ip); } void xfs_inode_free( struct xfs_inode *ip) { switch (VFS_I(ip)->i_mode & S_IFMT) { case S_IFREG: case S_IFDIR: Loading @@ -118,6 +108,25 @@ xfs_inode_free( ip->i_itemp = NULL; } kmem_zone_free(xfs_inode_zone, ip); } static void __xfs_inode_free( struct xfs_inode *ip) { /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } void xfs_inode_free( struct xfs_inode *ip) { /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the Loading @@ -129,12 +138,123 @@ xfs_inode_free( ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(ip->i_mount, vn_active); __xfs_inode_free(ip); } call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); /* * Queue a new inode reclaim pass if there are reclaimable inodes and there * isn't a reclaim pass already in progress. By default it runs every 5s based * on the xfs periodic sync default of 30s. Perhaps this should have it's own * tunable, but that can be done if this method proves to be ineffective or too * aggressive. */ static void xfs_reclaim_work_queue( struct xfs_mount *mp) { rcu_read_lock(); if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); } rcu_read_unlock(); } /* * This is a fast pass over the inode cache to try to get reclaim moving on as * many inodes as possible in a short period of time. It kicks itself every few * seconds, as well as being kicked by the inode cache shrinker when memory * goes low. It scans as quickly as possible avoiding locked inodes or those * already being flushed, and once done schedules a future pass. */ void xfs_reclaim_worker( struct work_struct *work) { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_TRYLOCK); xfs_reclaim_work_queue(mp); } static void xfs_perag_set_reclaim_tag( struct xfs_perag *pag) { struct xfs_mount *mp = pag->pag_mount; ASSERT(spin_is_locked(&pag->pag_ici_lock)); if (pag->pag_ici_reclaimable++) return; /* propagate the reclaim tag up into the perag radix tree */ spin_lock(&mp->m_perag_lock); radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, XFS_ICI_RECLAIM_TAG); spin_unlock(&mp->m_perag_lock); /* schedule periodic background inode reclaim */ xfs_reclaim_work_queue(mp); trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_); } static void xfs_perag_clear_reclaim_tag( struct xfs_perag *pag) { struct xfs_mount *mp = pag->pag_mount; ASSERT(spin_is_locked(&pag->pag_ici_lock)); if (--pag->pag_ici_reclaimable) return; /* clear the reclaim tag from the perag radix tree */ spin_lock(&mp->m_perag_lock); radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, XFS_ICI_RECLAIM_TAG); spin_unlock(&mp->m_perag_lock); trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_); } /* * We set the inode flag atomically with the radix tree tag. * Once we get tag lookups on the radix tree, this inode flag * can go away. */ void xfs_inode_set_reclaim_tag( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); xfs_perag_set_reclaim_tag(pag); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); } STATIC void xfs_inode_clear_reclaim_tag( struct xfs_perag *pag, xfs_ino_t ino) { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(pag->pag_mount, ino), XFS_ICI_RECLAIM_TAG); xfs_perag_clear_reclaim_tag(pag); } /* Loading Loading @@ -264,7 +384,7 @@ xfs_iget_cache_hit( */ ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; ip->i_flags |= XFS_INEW; __xfs_inode_clear_reclaim_tag(mp, pag, ip); xfs_inode_clear_reclaim_tag(pag, ip->i_ino); inode->i_state = I_NEW; ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); Loading Loading @@ -722,121 +842,6 @@ xfs_inode_ag_iterator_tag( return last_error; } /* * Queue a new inode reclaim pass if there are reclaimable inodes and there * isn't a reclaim pass already in progress. By default it runs every 5s based * on the xfs periodic sync default of 30s. Perhaps this should have it's own * tunable, but that can be done if this method proves to be ineffective or too * aggressive. */ static void xfs_reclaim_work_queue( struct xfs_mount *mp) { rcu_read_lock(); if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); } rcu_read_unlock(); } /* * This is a fast pass over the inode cache to try to get reclaim moving on as * many inodes as possible in a short period of time. It kicks itself every few * seconds, as well as being kicked by the inode cache shrinker when memory * goes low. It scans as quickly as possible avoiding locked inodes or those * already being flushed, and once done schedules a future pass. */ void xfs_reclaim_worker( struct work_struct *work) { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_TRYLOCK); xfs_reclaim_work_queue(mp); } static void __xfs_inode_set_reclaim_tag( struct xfs_perag *pag, struct xfs_inode *ip) { radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); if (!pag->pag_ici_reclaimable) { /* propagate the reclaim tag up into the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_set(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); /* schedule periodic background inode reclaim */ xfs_reclaim_work_queue(ip->i_mount); trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } pag->pag_ici_reclaimable++; } /* * We set the inode flag atomically with the radix tree tag. * Once we get tag lookups on the radix tree, this inode flag * can go away. */ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); } STATIC void __xfs_inode_clear_reclaim( xfs_perag_t *pag, xfs_inode_t *ip) { pag->pag_ici_reclaimable--; if (!pag->pag_ici_reclaimable) { /* clear the reclaim tag from the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_clear(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } } STATIC void __xfs_inode_clear_reclaim_tag( xfs_mount_t *mp, xfs_perag_t *pag, xfs_inode_t *ip) { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); __xfs_inode_clear_reclaim(pag, ip); } /* * Grab the inode for reclaim exclusively. * Return 0 if we grabbed it, non-zero otherwise. Loading Loading @@ -929,6 +934,7 @@ xfs_reclaim_inode( int sync_mode) { struct xfs_buf *bp = NULL; xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ int error; restart: Loading Loading @@ -993,6 +999,22 @@ xfs_reclaim_inode( xfs_iflock(ip); reclaim: /* * Because we use RCU freeing we need to ensure the inode always appears * to be reclaimed with an invalid inode number when in the free state. * We do this as early as possible under the ILOCK and flush lock so * that xfs_iflush_cluster() can be guaranteed to detect races with us * here. By doing this, we guarantee that once xfs_iflush_cluster has * locked both the XFS_ILOCK and the flush lock that it will see either * a valid, flushable inode that will serialise correctly against the * locks below, or it will see a clean (and invalid) inode that it can * skip. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); Loading @@ -1006,9 +1028,9 @@ xfs_reclaim_inode( */ spin_lock(&pag->pag_ici_lock); if (!radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) XFS_INO_TO_AGINO(ip->i_mount, ino))) ASSERT(0); __xfs_inode_clear_reclaim(pag, ip); xfs_perag_clear_reclaim_tag(pag); spin_unlock(&pag->pag_ici_lock); /* Loading @@ -1023,7 +1045,7 @@ xfs_reclaim_inode( xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_inode_free(ip); __xfs_inode_free(ip); return error; out_ifunlock: Loading
fs/xfs/xfs_inode.c +68 −36 Original line number Diff line number Diff line Loading @@ -3149,16 +3149,16 @@ xfs_rename( STATIC int xfs_iflush_cluster( xfs_inode_t *ip, xfs_buf_t *bp) struct xfs_inode *ip, struct xfs_buf *bp) { xfs_mount_t *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; unsigned long first_index, mask; unsigned long inodes_per_cluster; int ilist_size; xfs_inode_t **ilist; xfs_inode_t *iq; int cilist_size; struct xfs_inode **cilist; struct xfs_inode *cip; int nr_found; int clcount = 0; int bufwasdelwri; Loading @@ -3167,23 +3167,23 @@ xfs_iflush_cluster( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); if (!cilist) goto out_put; mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; rcu_read_lock(); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, first_index, inodes_per_cluster); if (nr_found == 0) goto out_free; for (i = 0; i < nr_found; i++) { iq = ilist[i]; if (iq == ip) cip = cilist[i]; if (cip == ip) continue; /* Loading @@ -3192,20 +3192,30 @@ xfs_iflush_cluster( * We need to check under the i_flags_lock for a valid inode * here. Skip it if it is not valid or the wrong inode. */ spin_lock(&ip->i_flags_lock); if (!ip->i_ino || (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { spin_unlock(&ip->i_flags_lock); spin_lock(&cip->i_flags_lock); if (!cip->i_ino || __xfs_iflags_test(cip, XFS_ISTALE)) { spin_unlock(&cip->i_flags_lock); continue; } spin_unlock(&ip->i_flags_lock); /* * Once we fall off the end of the cluster, no point checking * any more inodes in the list because they will also all be * outside the cluster. */ if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) { spin_unlock(&cip->i_flags_lock); break; } spin_unlock(&cip->i_flags_lock); /* * Do an un-protected check to see if the inode is dirty and * is a candidate for flushing. These checks will be repeated * later after the appropriate locks are acquired. */ if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0) continue; /* Loading @@ -3213,15 +3223,28 @@ xfs_iflush_cluster( * then this inode cannot be flushed and is skipped. */ if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED)) continue; if (!xfs_iflock_nowait(iq)) { xfs_iunlock(iq, XFS_ILOCK_SHARED); if (!xfs_iflock_nowait(cip)) { xfs_iunlock(cip, XFS_ILOCK_SHARED); continue; } if (xfs_ipincount(iq)) { xfs_ifunlock(iq); xfs_iunlock(iq, XFS_ILOCK_SHARED); if (xfs_ipincount(cip)) { xfs_ifunlock(cip); xfs_iunlock(cip, XFS_ILOCK_SHARED); continue; } /* * Check the inode number again, just to be certain we are not * racing with freeing in xfs_reclaim_inode(). See the comments * in that function for more information as to why the initial * check is not sufficient. */ if (!cip->i_ino) { xfs_ifunlock(cip); xfs_iunlock(cip, XFS_ILOCK_SHARED); continue; } Loading @@ -3229,18 +3252,18 @@ xfs_iflush_cluster( * arriving here means that this inode can be flushed. First * re-check that it's dirty before flushing. */ if (!xfs_inode_clean(iq)) { if (!xfs_inode_clean(cip)) { int error; error = xfs_iflush_int(iq, bp); error = xfs_iflush_int(cip, bp); if (error) { xfs_iunlock(iq, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED); goto cluster_corrupt_out; } clcount++; } else { xfs_ifunlock(iq); xfs_ifunlock(cip); } xfs_iunlock(iq, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED); } if (clcount) { Loading @@ -3250,7 +3273,7 @@ xfs_iflush_cluster( out_free: rcu_read_unlock(); kmem_free(ilist); kmem_free(cilist); out_put: xfs_perag_put(pag); return 0; Loading Loading @@ -3293,8 +3316,8 @@ xfs_iflush_cluster( /* * Unlocks the flush lock */ xfs_iflush_abort(iq, false); kmem_free(ilist); xfs_iflush_abort(cip, false); kmem_free(cilist); xfs_perag_put(pag); return -EFSCORRUPTED; } Loading @@ -3314,7 +3337,7 @@ xfs_iflush( struct xfs_buf **bpp) { struct xfs_mount *mp = ip->i_mount; struct xfs_buf *bp; struct xfs_buf *bp = NULL; struct xfs_dinode *dip; int error; Loading Loading @@ -3356,14 +3379,22 @@ xfs_iflush( } /* * Get the buffer containing the on-disk inode. * Get the buffer containing the on-disk inode. We are doing a try-lock * operation here, so we may get an EAGAIN error. In that case, we * simply want to return with the inode still dirty. * * If we get any other error, we effectively have a corruption situation * and we cannot flush the inode, so we treat it the same as failing * xfs_iflush_int(). */ error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 0); if (error || !bp) { if (error == -EAGAIN) { xfs_ifunlock(ip); return error; } if (error) goto corrupt_out; /* * First flush out the inode that xfs_iflush was called with. Loading Loading @@ -3391,6 +3422,7 @@ xfs_iflush( return 0; corrupt_out: if (bp) xfs_buf_relse(bp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); cluster_corrupt_out: Loading
fs/xfs/xfs_super.c +7 −21 Original line number Diff line number Diff line Loading @@ -928,7 +928,7 @@ xfs_fs_alloc_inode( /* * Now that the generic code is guaranteed not to be accessing * the linux inode, we can reclaim the inode. * the linux inode, we can inactivate and reclaim the inode. */ STATIC void xfs_fs_destroy_inode( Loading @@ -938,9 +938,14 @@ xfs_fs_destroy_inode( trace_xfs_destroy_inode(ip); XFS_STATS_INC(ip->i_mount, vn_reclaim); ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inactive(ip); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); XFS_STATS_INC(ip->i_mount, vn_reclaim); /* * We should never get here with one of the reclaim flags already set. Loading Loading @@ -987,24 +992,6 @@ xfs_fs_inode_init_once( "xfsino", ip->i_ino); } STATIC void xfs_fs_evict_inode( struct inode *inode) { xfs_inode_t *ip = XFS_I(inode); ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); trace_xfs_evict_inode(ip); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inactive(ip); } /* * We do an unlocked check for XFS_IDONTCACHE here because we are already * serialised against cache hits here via the inode->i_lock and igrab() in Loading Loading @@ -1673,7 +1660,6 @@ xfs_fs_free_cached_objects( static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, .evict_inode = xfs_fs_evict_inode, .drop_inode = xfs_fs_drop_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, Loading