summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-05-04 12:58:20 +1000
committerGreg Kroah-Hartman <gregkh@suse.de>2010-05-12 14:57:13 -0700
commita899608c6a4c0ffd5219e7bf904b42aba0e6b9c1 (patch)
tree306feca16e0abe62ff1e0a50f41862a8e2e0fbc1
parent47d6fa8014314cf139d407840a85a1bd14588561 (diff)
downloadlwn-a899608c6a4c0ffd5219e7bf904b42aba0e6b9c1.tar.gz
lwn-a899608c6a4c0ffd5219e7bf904b42aba0e6b9c1.zip
xfs: add a shrinker to background inode reclaim
commit 9bf729c0af67897ea8498ce17c29b0683f7f2028 upstream On low memory boxes or those with highmem, kernel can OOM before the background reclaims inodes via xfssyncd. Add a shrinker to run inode reclaim so that it inode reclaim is expedited when memory is low. This is more complex than it needs to be because the VM folk don't want a context added to the shrinker infrastructure. Hence we need to add a global list of XFS mount structures so the shrinker can traverse them. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Acked-by: Alex Elder <aelder@sgi.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c107
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_mount.h1
6 files changed, 115 insertions, 9 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index aae1249f8059..d95bfa27c62a 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1164,6 +1164,7 @@ xfs_fs_put_super(
xfs_unmountfs(mp);
xfs_freesb(mp);
+ xfs_inode_shrinker_unregister(mp);
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
xfs_dmops_put(mp);
@@ -1555,6 +1556,8 @@ xfs_fs_fill_super(
if (error)
goto fail_vnrele;
+ xfs_inode_shrinker_register(mp);
+
kfree(mtpt);
xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
@@ -1894,6 +1897,7 @@ init_xfs_fs(void)
goto out_cleanup_procfs;
vfs_initquota();
+ xfs_inode_shrinker_init();
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1923,6 +1927,7 @@ exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
+ xfs_inode_shrinker_destroy();
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index c1b715474917..c82683ad1484 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -94,7 +94,8 @@ xfs_inode_ag_walk(
struct xfs_perag *pag, int flags),
int flags,
int tag,
- int exclusive)
+ int exclusive,
+ int *nr_to_scan)
{
struct xfs_perag *pag = &mp->m_perag[ag];
uint32_t first_index;
@@ -134,7 +135,7 @@ restart:
if (error == EFSCORRUPTED)
break;
- } while (1);
+ } while ((*nr_to_scan)--);
if (skipped) {
delay(1);
@@ -152,23 +153,30 @@ xfs_inode_ag_iterator(
struct xfs_perag *pag, int flags),
int flags,
int tag,
- int exclusive)
+ int exclusive,
+ int *nr_to_scan)
{
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
+ int nr;
+ nr = nr_to_scan ? *nr_to_scan : INT_MAX;
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
if (!mp->m_perag[ag].pag_ici_init)
continue;
error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
- exclusive);
+ exclusive, &nr);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
break;
}
+ if (nr <= 0)
+ break;
}
+ if (nr_to_scan)
+ *nr_to_scan = nr;
return XFS_ERROR(last_error);
}
@@ -288,7 +296,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
- XFS_ICI_NO_TAG, 0);
+ XFS_ICI_NO_TAG, 0, NULL);
if (error)
return XFS_ERROR(error);
@@ -310,7 +318,7 @@ xfs_sync_attr(
ASSERT((flags & ~SYNC_WAIT) == 0);
return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
- XFS_ICI_NO_TAG, 0);
+ XFS_ICI_NO_TAG, 0, NULL);
}
STATIC int
@@ -678,6 +686,7 @@ __xfs_inode_set_reclaim_tag(
radix_tree_tag_set(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
+ pag->pag_ici_reclaimable++;
}
/*
@@ -709,6 +718,7 @@ __xfs_inode_clear_reclaim_tag(
{
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+ pag->pag_ici_reclaimable--;
}
STATIC int
@@ -769,5 +779,88 @@ xfs_reclaim_inodes(
int mode)
{
return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
- XFS_ICI_RECLAIM_TAG, 1);
+ XFS_ICI_RECLAIM_TAG, 1, NULL);
+}
+
+/*
+ * Shrinker infrastructure.
+ *
+ * This is all far more complex than it needs to be. It adds a global list of
+ * mounts because the shrinkers can only call a global context. We need to make
+ * the shrinkers pass a context to avoid the need for global state.
+ */
+static LIST_HEAD(xfs_mount_list);
+static struct rw_semaphore xfs_mount_list_lock;
+
+static int
+xfs_reclaim_inode_shrink(
+ int nr_to_scan,
+ gfp_t gfp_mask)
+{
+ struct xfs_mount *mp;
+ xfs_agnumber_t ag;
+ int reclaimable = 0;
+
+ if (nr_to_scan) {
+ if (!(gfp_mask & __GFP_FS))
+ return -1;
+
+ down_read(&xfs_mount_list_lock);
+ list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+ xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+ XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+ if (nr_to_scan <= 0)
+ break;
+ }
+ up_read(&xfs_mount_list_lock);
+ }
+
+ down_read(&xfs_mount_list_lock);
+ list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+ for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+
+ if (!mp->m_perag[ag].pag_ici_init)
+ continue;
+ reclaimable += mp->m_perag[ag].pag_ici_reclaimable;
+ }
+ }
+ up_read(&xfs_mount_list_lock);
+ return reclaimable;
+}
+
+static struct shrinker xfs_inode_shrinker = {
+ .shrink = xfs_reclaim_inode_shrink,
+ .seeks = DEFAULT_SEEKS,
+};
+
+void __init
+xfs_inode_shrinker_init(void)
+{
+ init_rwsem(&xfs_mount_list_lock);
+ register_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_destroy(void)
+{
+ ASSERT(list_empty(&xfs_mount_list));
+ unregister_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_register(
+ struct xfs_mount *mp)
+{
+ down_write(&xfs_mount_list_lock);
+ list_add_tail(&mp->m_mplist, &xfs_mount_list);
+ up_write(&xfs_mount_list_lock);
+}
+
+void
+xfs_inode_shrinker_unregister(
+ struct xfs_mount *mp)
+{
+ down_write(&xfs_mount_list_lock);
+ list_del(&mp->m_mplist);
+ up_write(&xfs_mount_list_lock);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index ea932b43335d..0b28c13bdf94 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags, int tag, int write_lock);
+ int flags, int tag, int write_lock, int *nr_to_scan);
+
+void xfs_inode_shrinker_init(void);
+void xfs_inode_shrinker_destroy(void);
+void xfs_inode_shrinker_register(struct xfs_mount *mp);
+void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
#endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index f99cfa49eee8..60fe35821cff 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -893,7 +893,8 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
+ XFS_ICI_NO_TAG, 0, NULL);
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a5d54bf4931b..381fba77b28d 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -215,6 +215,7 @@ typedef struct xfs_perag
int pag_ici_init; /* incore inode cache initialised */
rwlock_t pag_ici_lock; /* incore inode lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
+ int pag_ici_reclaimable; /* reclaimable inodes */
#endif
} xfs_perag_t;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1e6094f55df4..08fdb6d43efd 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -243,6 +243,7 @@ typedef struct xfs_mount {
wait_queue_head_t m_wait_single_sync_task;
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
+ struct list_head m_mplist; /* inode shrinker mount list */
} xfs_mount_t;
/*