diff options
author | Jan Kara <jack@suse.cz> | 2017-03-14 12:31:02 +0100 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2017-04-10 17:37:34 +0200 |
commit | 9dd813c15b2c101168808d4f5941a29985758973 (patch) | |
tree | 62646b8b8e4335b9d311bdcd3224d12308961c20 /fs/notify/fsnotify.c | |
parent | c1f33073ac1b33510e956de7181438515e438db0 (diff) | |
download | lwn-9dd813c15b2c101168808d4f5941a29985758973.tar.gz lwn-9dd813c15b2c101168808d4f5941a29985758973.zip |
fsnotify: Move mark list head from object into dedicated structure
Currently notification marks are attached to object (inode or vfsmnt) by
a hlist_head in the object. The list is also protected by a spinlock in
the object. So while there is any mark attached to the list of marks,
the object must be pinned in memory (and thus e.g. last iput() deleting
inode cannot happen). Also for list iteration in fsnotify() to work, we
must hold fsnotify_mark_srcu lock so that mark itself and
mark->obj_list.next cannot get freed. Thus we are required to wait for
response to fanotify events from userspace process with
fsnotify_mark_srcu lock held. That causes issues when userspace process
is buggy and does not reply to some event - basically the whole
notification subsystem gets eventually stuck.
So to be able to drop fsnotify_mark_srcu lock while waiting for
response, we have to pin the mark in memory and make sure it stays in
the object list (as removing the mark waiting for response could lead to
lost notification events for groups later in the list). However we don't
want inode reclaim to block on such mark as that would lead to system
just locking up elsewhere.
This commit is the first in the series that paves way towards solving
these conflicting lifetime needs. Instead of anchoring the list of marks
directly in the object, we anchor it in a dedicated structure
(fsnotify_mark_connector) and just point to that structure from the
object. The following commits will also add spinlock protecting the list
and object pointer to the structure.
Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/notify/fsnotify.c')
-rw-r--r-- | fs/notify/fsnotify.c | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index b41515d3f081..eae621a18ac9 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -193,6 +193,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; struct fsnotify_group *inode_group, *vfsmount_group; + struct fsnotify_mark_connector *inode_conn, *vfsmount_conn; struct mount *mnt; int idx, ret = 0; /* global tests shouldn't care about events on child only the specific event */ @@ -210,8 +211,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (hlist_empty(&to_tell->i_fsnotify_marks) && - (!mnt || hlist_empty(&mnt->mnt_fsnotify_marks))) + if (!to_tell->i_fsnotify_marks && + (!mnt || !mnt->mnt_fsnotify_marks)) return 0; /* * if this is a modify event we may need to clear the ignored masks @@ -226,16 +227,24 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, idx = srcu_read_lock(&fsnotify_mark_srcu); if ((mask & FS_MODIFY) || - (test_mask & to_tell->i_fsnotify_mask)) - inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, - &fsnotify_mark_srcu); + (test_mask & to_tell->i_fsnotify_mask)) { + inode_conn = lockless_dereference(to_tell->i_fsnotify_marks); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, + &fsnotify_mark_srcu); + } if (mnt && ((mask & FS_MODIFY) || (test_mask & mnt->mnt_fsnotify_mask))) { - vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, - &fsnotify_mark_srcu); - inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, - &fsnotify_mark_srcu); + inode_conn = lockless_dereference(to_tell->i_fsnotify_marks); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, + &fsnotify_mark_srcu); + vfsmount_conn = lockless_dereference(mnt->mnt_fsnotify_marks); + if (vfsmount_conn) + vfsmount_node = srcu_dereference( + vfsmount_conn->list.first, + &fsnotify_mark_srcu); } /* @@ -293,6 +302,8 @@ out: } EXPORT_SYMBOL_GPL(fsnotify); +extern struct kmem_cache *fsnotify_mark_connector_cachep; + static __init int fsnotify_init(void) { int ret; @@ -303,6 +314,9 @@ static __init int fsnotify_init(void) if (ret) panic("initializing fsnotify_mark_srcu"); + fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, + SLAB_PANIC); + return 0; } core_initcall(fsnotify_init); |