diff options
Diffstat (limited to 'fs/notify')
| -rw-r--r-- | fs/notify/dnotify/dnotify.c | 8 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify.c | 11 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify.h | 16 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 264 | ||||
| -rw-r--r-- | fs/notify/fdinfo.c | 10 | ||||
| -rw-r--r-- | fs/notify/fsnotify.c | 169 | ||||
| -rw-r--r-- | fs/notify/fsnotify.h | 5 | ||||
| -rw-r--r-- | fs/notify/group.c | 2 | ||||
| -rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 2 | ||||
| -rw-r--r-- | fs/notify/inotify/inotify_user.c | 5 | ||||
| -rw-r--r-- | fs/notify/mark.c | 200 |
11 files changed, 462 insertions, 230 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index c4cdaf5fa7ed..9fb73bafd41d 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -308,6 +308,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } + error = file_f_owner_allocate(filp); + if (error) + goto out_err; + /* new fsnotify mark, we expect most fcntl calls to add a new mark */ new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL); if (!new_dn_mark) { @@ -315,10 +319,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } - error = file_f_owner_allocate(filp); - if (error) - goto out_err; - /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6d386080faf2..38290b9c07f7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -415,7 +415,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, { int dwords, type = 0; char *ext_buf = NULL; - void *buf = fh->buf; + void *buf = fh + 1; int err; fh->type = FILEID_ROOT; @@ -454,7 +454,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, dwords = fh_len >> 2; type = exportfs_encode_fid(inode, buf, &dwords); err = -EINVAL; - if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2) + /* + * Unlike file_handle, type and len of struct fanotify_fh are u8. + * Traditionally, filesystem return handle_type < 0xff, but there + * is no enforcement for that in vfs. + */ + BUILD_BUG_ON(MAX_HANDLE_SZ > 0xff || FILEID_INVALID > 0xff); + if (type <= 0 || type >= FILEID_INVALID || fh_len != dwords << 2) goto out_err; fh->type = type; @@ -1009,6 +1015,7 @@ finish: static void fanotify_free_group_priv(struct fsnotify_group *group) { + put_user_ns(group->user_ns); kfree(group->fanotify_data.merge_hash); if (group->fanotify_data.ucounts) dec_ucount(group->fanotify_data.ucounts, diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index b44e70e44be6..a0619e7694d5 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -2,6 +2,7 @@ #include <linux/fsnotify_backend.h> #include <linux/path.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/exportfs.h> #include <linux/hashtable.h> @@ -25,7 +26,7 @@ enum { * stored in either the first or last 2 dwords. */ #define FANOTIFY_INLINE_FH_LEN (3 << 2) -#define FANOTIFY_FH_HDR_LEN offsetof(struct fanotify_fh, buf) +#define FANOTIFY_FH_HDR_LEN sizeof(struct fanotify_fh) /* Fixed size struct for file handle */ struct fanotify_fh { @@ -34,7 +35,6 @@ struct fanotify_fh { #define FANOTIFY_FH_FLAG_EXT_BUF 1 u8 flags; u8 pad; - unsigned char buf[]; } __aligned(4); /* Variable size struct for dir file handle + child file handle + name */ @@ -92,7 +92,7 @@ static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh) BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4); BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) > FANOTIFY_INLINE_FH_LEN); - return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *)); + return (char **)ALIGN((unsigned long)(fh + 1), __alignof__(char *)); } static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) @@ -102,7 +102,7 @@ static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) static inline void *fanotify_fh_buf(struct fanotify_fh *fh) { - return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf; + return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh + 1; } static inline int fanotify_info_dir_fh_len(struct fanotify_info *info) @@ -219,7 +219,7 @@ static inline void fanotify_info_copy_name(struct fanotify_info *info, return; info->name_len = name->len; - strcpy(fanotify_info_name(info), name->name); + strscpy(fanotify_info_name(info), name->name, name->len + 1); } static inline void fanotify_info_copy_name2(struct fanotify_info *info, @@ -229,7 +229,7 @@ static inline void fanotify_info_copy_name2(struct fanotify_info *info, return; info->name2_len = name->len; - strcpy(fanotify_info_name2(info), name->name); + strscpy(fanotify_info_name2(info), name->name, name->len + 1); } /* @@ -278,7 +278,7 @@ static inline void fanotify_init_event(struct fanotify_event *event, #define FANOTIFY_INLINE_FH(name, size) \ struct { \ struct fanotify_fh name; \ - /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \ + /* Space for filehandle - access with fanotify_fh_buf() */ \ unsigned char _inline_fh_buf[size]; \ } @@ -442,7 +442,9 @@ struct fanotify_perm_event { size_t count; u32 response; /* userspace answer to the event */ unsigned short state; /* state of the event */ + unsigned short watchdog_cnt; /* already scanned by watchdog? */ int fd; /* fd we passed to userspace for this event */ + pid_t recv_pid; /* pid of task receiving the event */ union { struct fanotify_response_info_header hdr; struct fanotify_response_info_audit_rule audit_rule; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f2d840ae4ded..ae904451dfc0 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -50,6 +50,7 @@ /* configurable via /proc/sys/fs/fanotify/ */ static int fanotify_max_queued_events __read_mostly; +static int perm_group_timeout __read_mostly; #ifdef CONFIG_SYSCTL @@ -85,6 +86,14 @@ static const struct ctl_table fanotify_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO }, + { + .procname = "watchdog_timeout", + .data = &perm_group_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, }; static void __init fanotify_sysctls_init(void) @@ -95,6 +104,91 @@ static void __init fanotify_sysctls_init(void) #define fanotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ +static LIST_HEAD(perm_group_list); +static DEFINE_SPINLOCK(perm_group_lock); +static void perm_group_watchdog(struct work_struct *work); +static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog); + +static void perm_group_watchdog_schedule(void) +{ + schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout)); +} + +static void perm_group_watchdog(struct work_struct *work) +{ + struct fsnotify_group *group; + struct fanotify_perm_event *event; + struct task_struct *task; + pid_t failed_pid = 0; + + guard(spinlock)(&perm_group_lock); + if (list_empty(&perm_group_list)) + return; + + list_for_each_entry(group, &perm_group_list, + fanotify_data.perm_grp_list) { + /* + * Ok to test without lock, racing with an addition is + * fine, will deal with it next round + */ + if (list_empty(&group->fanotify_data.access_list)) + continue; + + spin_lock(&group->notification_lock); + list_for_each_entry(event, &group->fanotify_data.access_list, + fae.fse.list) { + if (likely(event->watchdog_cnt == 0)) { + event->watchdog_cnt = 1; + } else if (event->watchdog_cnt == 1) { + /* Report on event only once */ + event->watchdog_cnt = 2; + + /* Do not report same pid repeatedly */ + if (event->recv_pid == failed_pid) + continue; + + failed_pid = event->recv_pid; + rcu_read_lock(); + task = find_task_by_pid_ns(event->recv_pid, + &init_pid_ns); + pr_warn_ratelimited( + "PID %u (%s) failed to respond to fanotify queue for more than %d seconds\n", + event->recv_pid, + task ? task->comm : NULL, + perm_group_timeout); + rcu_read_unlock(); + } + } + spin_unlock(&group->notification_lock); + } + perm_group_watchdog_schedule(); +} + +static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group) +{ + if (!list_empty(&group->fanotify_data.perm_grp_list)) { + /* Perm event watchdog can no longer scan this group. */ + spin_lock(&perm_group_lock); + list_del_init(&group->fanotify_data.perm_grp_list); + spin_unlock(&perm_group_lock); + } +} + +static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group) +{ + if (!perm_group_timeout) + return; + + spin_lock(&perm_group_lock); + if (list_empty(&group->fanotify_data.perm_grp_list)) { + /* Add to perm_group_list for monitoring by watchdog. */ + if (list_empty(&perm_group_list)) + perm_group_watchdog_schedule(); + list_add_tail(&group->fanotify_data.perm_grp_list, &perm_group_list); + } + spin_unlock(&perm_group_lock); +} + /* * All flags that may be specified in parameter event_f_flags of fanotify_init. * @@ -953,6 +1047,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, spin_lock(&group->notification_lock); list_add_tail(&event->fse.list, &group->fanotify_data.access_list); + FANOTIFY_PERM(event)->recv_pid = current->pid; spin_unlock(&group->notification_lock); } } @@ -1012,6 +1107,8 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ fsnotify_group_stop_queueing(group); + fanotify_perm_watchdog_group_remove(group); + /* * Process all permission events on access_list and notification queue * and simulate reply from userspace. @@ -1113,6 +1210,7 @@ static int fanotify_find_path(int dfd, const char __user *filename, *path = fd_file(f)->f_path; path_get(path); + ret = 0; } else { unsigned int lookup_flags = 0; @@ -1122,22 +1220,7 @@ static int fanotify_find_path(int dfd, const char __user *filename, lookup_flags |= LOOKUP_DIRECTORY; ret = user_path_at(dfd, filename, lookup_flags, path); - if (ret) - goto out; } - - /* you can only watch an inode if you have read permissions on it */ - ret = path_permission(path, MAY_READ); - if (ret) { - path_put(path); - goto out; - } - - ret = security_path_notify(path, mask, obj_type); - if (ret) - path_put(path); - -out: return ret; } @@ -1334,6 +1417,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, * A group with FAN_UNLIMITED_MARKS does not contribute to mark count * in the limited groups account. */ + BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS)); if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) return ERR_PTR(-ENOSPC); @@ -1464,6 +1548,10 @@ out: fsnotify_group_unlock(group); fsnotify_put_mark(fsn_mark); + + if (!ret && (mask & FANOTIFY_PERM_EVENTS)) + fanotify_perm_watchdog_group_add(group); + return ret; } @@ -1471,7 +1559,7 @@ static struct fsnotify_event *fanotify_alloc_overflow_event(void) { struct fanotify_event *oevent; - oevent = kmalloc(sizeof(*oevent), GFP_KERNEL_ACCOUNT); + oevent = kmalloc_obj(*oevent, GFP_KERNEL_ACCOUNT); if (!oevent) return NULL; @@ -1495,29 +1583,36 @@ static struct hlist_head *fanotify_alloc_merge_hash(void) return hash; } +DEFINE_CLASS(fsnotify_group, + struct fsnotify_group *, + if (!IS_ERR_OR_NULL(_T)) fsnotify_destroy_group(_T), + fsnotify_alloc_group(ops, flags), + const struct fsnotify_ops *ops, int flags) + /* fanotify syscalls */ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { - struct fsnotify_group *group; + struct user_namespace *user_ns = current_user_ns(); int f_flags, fd; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int internal_flags = 0; - struct file *file; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); - if (!capable(CAP_SYS_ADMIN)) { - /* - * An unprivileged user can setup an fanotify group with - * limited functionality - an unprivileged group is limited to - * notification events with file handles and it cannot use - * unlimited queue/marks. - */ - if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) - return -EPERM; + /* + * An unprivileged user can setup an fanotify group with limited + * functionality - an unprivileged group is limited to notification + * events with file handles or mount ids and it cannot use unlimited + * queue/marks. + */ + if (((flags & FANOTIFY_ADMIN_INIT_FLAGS) || + !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT))) && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) { /* * Setting the internal flag FANOTIFY_UNPRIV on the group * prevents setting mount/filesystem marks on this group and @@ -1586,42 +1681,36 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (flags & FAN_NONBLOCK) f_flags |= O_NONBLOCK; - /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ - group = fsnotify_alloc_group(&fanotify_fsnotify_ops, + CLASS(fsnotify_group, group)(&fanotify_fsnotify_ops, FSNOTIFY_GROUP_USER); - if (IS_ERR(group)) { + /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ + if (IS_ERR(group)) return PTR_ERR(group); - } /* Enforce groups limits per user in all containing user ns */ - group->fanotify_data.ucounts = inc_ucount(current_user_ns(), - current_euid(), + group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(), UCOUNT_FANOTIFY_GROUPS); - if (!group->fanotify_data.ucounts) { - fd = -EMFILE; - goto out_destroy_group; - } + if (!group->fanotify_data.ucounts) + return -EMFILE; group->fanotify_data.flags = flags | internal_flags; group->memcg = get_mem_cgroup_from_mm(current->mm); + group->user_ns = get_user_ns(user_ns); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); - if (!group->fanotify_data.merge_hash) { - fd = -ENOMEM; - goto out_destroy_group; - } + if (!group->fanotify_data.merge_hash) + return -ENOMEM; group->overflow_event = fanotify_alloc_overflow_event(); - if (unlikely(!group->overflow_event)) { - fd = -ENOMEM; - goto out_destroy_group; - } + if (unlikely(!group->overflow_event)) + return -ENOMEM; if (force_o_largefile()) event_f_flags |= O_LARGEFILE; group->fanotify_data.f_flags = event_f_flags; init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); + INIT_LIST_HEAD(&group->fanotify_data.perm_grp_list); switch (class) { case FAN_CLASS_NOTIF: group->priority = FSNOTIFY_PRIO_NORMAL; @@ -1633,47 +1722,26 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->priority = FSNOTIFY_PRIO_PRE_CONTENT; break; default: - fd = -EINVAL; - goto out_destroy_group; + return -EINVAL; } + BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE)); if (flags & FAN_UNLIMITED_QUEUE) { - fd = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out_destroy_group; group->max_events = UINT_MAX; } else { group->max_events = fanotify_max_queued_events; } - if (flags & FAN_UNLIMITED_MARKS) { - fd = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out_destroy_group; - } - if (flags & FAN_ENABLE_AUDIT) { - fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) - goto out_destroy_group; + return -EPERM; } - fd = get_unused_fd_flags(f_flags); - if (fd < 0) - goto out_destroy_group; - - file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, - f_flags, FMODE_NONOTIFY); - if (IS_ERR(file)) { - put_unused_fd(fd); - fd = PTR_ERR(file); - goto out_destroy_group; - } - fd_install(fd, file); - return fd; - -out_destroy_group: - fsnotify_destroy_group(group); + fd = FD_ADD(f_flags, + anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, + group, f_flags, FMODE_NONOTIFY)); + if (fd >= 0) + retain_and_null_ptr(group); return fd; } @@ -1811,6 +1879,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct path path; struct fan_fsid __fsid, *fsid = NULL; + struct user_namespace *user_ns = NULL; + struct mnt_namespace *mntns; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; @@ -1904,13 +1974,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* - * An unprivileged user is not allowed to setup mount nor filesystem - * marks. This also includes setting up such marks by a group that - * was initialized by an unprivileged user. + * A user is allowed to setup sb/mount/mntns marks only if it is + * capable in the user ns where the group was created. */ - if ((!capable(CAP_SYS_ADMIN) || - FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && - mark_type != FAN_MARK_INODE) + if (mark_type != FAN_MARK_INODE && + !ns_capable(group->user_ns, CAP_SYS_ADMIN)) return -EPERM; /* @@ -1961,12 +2029,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { - if (mark_type == FAN_MARK_MOUNT) - fsnotify_clear_vfsmount_marks_by_group(group); - else if (mark_type == FAN_MARK_FILESYSTEM) - fsnotify_clear_sb_marks_by_group(group); - else - fsnotify_clear_inode_marks_by_group(group); + fsnotify_clear_marks_by_group(group, obj_type); return 0; } @@ -1981,6 +2044,15 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto path_put_and_out; } + /* you can only watch an inode if you have read permissions on it */ + ret = path_permission(&path, MAY_READ); + if (ret) + goto path_put_and_out; + + ret = security_path_notify(&path, mask, obj_type); + if (ret) + goto path_put_and_out; + if (fid_mode) { ret = fanotify_test_fsid(path.dentry, flags, &__fsid); if (ret) @@ -1993,18 +2065,34 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, fsid = &__fsid; } - /* inode held in place by reference to path; group by fget on fd */ + /* + * In addition to being capable in the user ns where group was created, + * the user also needs to be capable in the user ns associated with + * the filesystem or in the user ns associated with the mntns + * (when marking mntns). + */ if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { inode = path.dentry->d_inode; obj = inode; } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt; } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { + user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt->mnt_sb; } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { - obj = mnt_ns_from_dentry(path.dentry); + ret = -EINVAL; + mntns = mnt_ns_from_dentry(path.dentry); + if (!mntns) + goto path_put_and_out; + user_ns = mntns->user_ns; + obj = mntns; } + ret = -EPERM; + if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN)) + goto path_put_and_out; + ret = -EINVAL; if (!obj) goto path_put_and_out; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 1161eabf11ee..0f731eddeb8b 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -17,6 +17,7 @@ #include "fanotify/fanotify.h" #include "fdinfo.h" #include "fsnotify.h" +#include "../internal.h" #if defined(CONFIG_PROC_FS) @@ -46,7 +47,12 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f->handle_bytes >> 2; + if (!super_trylock_shared(inode->i_sb)) + return; + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); + up_read(&inode->i_sb->s_umount); + if ((ret == FILEID_INVALID) || (ret < 0)) return; @@ -78,7 +84,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); inode = igrab(fsnotify_conn_inode(mark->connector)); if (inode) { - seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ", + seq_printf(m, "inotify wd:%x ino:%llx sdev:%x mask:%x ignored_mask:0 ", inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, inotify_mark_user_mask(mark)); show_mark_fhandle(m, inode); @@ -105,7 +111,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) inode = igrab(fsnotify_conn_inode(mark->connector)); if (!inode) return; - seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", + seq_printf(m, "fanotify ino:%llx sdev:%x mflags:%x mask:%x ignored_mask:%x ", inode->i_ino, inode->i_sb->s_dev, mflags, mark->mask, mark->ignore_mask); show_mark_fhandle(m, inode); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index e2b4f17a48bb..2dac70b99b0d 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -33,65 +33,6 @@ void __fsnotify_mntns_delete(struct mnt_namespace *mntns) fsnotify_clear_marks_by_mntns(mntns); } -/** - * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. - * @sb: superblock being unmounted. - * - * Called during unmount with no locks held, so needs to be safe against - * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. - */ -static void fsnotify_unmount_inodes(struct super_block *sb) -{ - struct inode *inode, *iput_inode = NULL; - - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - /* - * We cannot __iget() an inode in state I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. - */ - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } - - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with SB_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - * However, we should have been called /after/ evict_inodes - * removed all zero refcount inodes, in any case. Test to - * be sure. - */ - if (!atomic_read(&inode->i_count)) { - spin_unlock(&inode->i_lock); - continue; - } - - __iget(inode); - spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_list_lock); - - iput(iput_inode); - - /* for each watch, send FS_UNMOUNT and then remove it */ - fsnotify_inode(inode, FS_UNMOUNT); - - fsnotify_inode_delete(inode); - - iput_inode = inode; - - cond_resched(); - spin_lock(&sb->s_inode_list_lock); - } - spin_unlock(&sb->s_inode_list_lock); - - iput(iput_inode); -} - void fsnotify_sb_delete(struct super_block *sb) { struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); @@ -100,7 +41,7 @@ void fsnotify_sb_delete(struct super_block *sb) if (!sbinfo) return; - fsnotify_unmount_inodes(sb); + fsnotify_unmount_inodes(sbinfo); fsnotify_clear_marks_by_sb(sb); /* Wait for outstanding object references from connectors */ wait_var_event(fsnotify_sb_watched_objects(sb), @@ -112,7 +53,10 @@ void fsnotify_sb_delete(struct super_block *sb) void fsnotify_sb_free(struct super_block *sb) { - kfree(sb->s_fsnotify_info); + if (sb->s_fsnotify_info) { + WARN_ON_ONCE(!list_empty(&sb->s_fsnotify_info->inode_conn_list)); + kfree(sb->s_fsnotify_info); + } } /* @@ -132,7 +76,7 @@ void fsnotify_set_children_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + for_each_alias(alias, inode) { struct dentry *child; /* run all of the children of the original inode and fix their @@ -199,8 +143,8 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, } /* Are there any inode/mount/sb objects that watch for these events? */ -static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, - __u32 mask) +static inline __u32 fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, + __u32 mask) { __u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask | READ_ONCE(inode->i_sb->s_fsnotify_mask); @@ -270,8 +214,15 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, /* * Include parent/name in notification either if some notification * groups require parent info or the parent is interested in this event. + * The parent interest in ACCESS/MODIFY events does not apply to special + * files, where read/write are not on the filesystem of the parent and + * events can provide an undesirable side-channel for information + * exfiltration. */ - parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; + parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS && + !(data_type == FSNOTIFY_EVENT_PATH && + d_is_special(dentry) && + (mask & (FS_ACCESS | FS_MODIFY))); if (parent_needed || parent_interested) { /* When notifying parent, child should be passed as data */ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); @@ -437,7 +388,7 @@ static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } -static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) +struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) { struct hlist_node *node = NULL; @@ -656,20 +607,20 @@ EXPORT_SYMBOL_GPL(fsnotify); #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* - * At open time we check fsnotify_sb_has_priority_watchers() and set the - * FMODE_NONOTIFY_ mode bits accordignly. + * At open time we check fsnotify_sb_has_priority_watchers(), call the open perm + * hook and set the FMODE_NONOTIFY_ mode bits accordignly. * Later, fsnotify permission hooks do not check if there are permission event * watches, but that there were permission event watches at open time. */ -void file_set_fsnotify_mode_from_watchers(struct file *file) +int fsnotify_open_perm_and_set_mode(struct file *file) { struct dentry *dentry = file->f_path.dentry, *parent; struct super_block *sb = dentry->d_sb; - __u32 mnt_mask, p_mask; + __u32 mnt_mask, p_mask = 0; /* Is it a file opened by fanotify? */ if (FMODE_FSNOTIFY_NONE(file->f_mode)) - return; + return 0; /* * Permission events is a super set of pre-content events, so if there @@ -679,45 +630,64 @@ void file_set_fsnotify_mode_from_watchers(struct file *file) if (likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT))) { file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); - return; + return 0; } /* - * If there are permission event watchers but no pre-content event - * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + * OK, there are some permission event watchers. Check if anybody is + * watching for permission events on *this* file. */ - if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || - likely(!fsnotify_sb_has_priority_watchers(sb, - FSNOTIFY_PRIO_PRE_CONTENT))) { - file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); - return; + mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); + p_mask = fsnotify_object_watched(d_inode(dentry), mnt_mask, + ALL_FSNOTIFY_PERM_EVENTS); + if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { + parent = dget_parent(dentry); + p_mask |= fsnotify_inode_watches_children(d_inode(parent)); + dput(parent); } /* - * OK, there are some pre-content watchers. Check if anybody is - * watching for pre-content events on *this* file. + * Legacy FAN_ACCESS_PERM events have very high performance overhead, + * so unlikely to be used in the wild. If they are used there will be + * no optimizations at all. */ - mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); - if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, - FSNOTIFY_PRE_CONTENT_EVENTS))) { - /* Enable pre-content events */ + if (unlikely(p_mask & FS_ACCESS_PERM)) { + /* Enable all permission and pre-content events */ file_set_fsnotify_mode(file, 0); - return; + goto open_perm; } - /* Is parent watching for pre-content events on this file? */ - if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { - parent = dget_parent(dentry); - p_mask = fsnotify_inode_watches_children(d_inode(parent)); - dput(parent); - if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) { - /* Enable pre-content events */ - file_set_fsnotify_mode(file, 0); - return; - } + /* + * Pre-content events are only supported on regular files. + * If there are pre-content event watchers and no permission access + * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + * That is the common case with HSM service. + */ + if (d_is_reg(dentry) && (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)) { + file_set_fsnotify_mode(file, FMODE_NONOTIFY | + FMODE_NONOTIFY_PERM); + goto open_perm; } - /* Nobody watching for pre-content events from this file */ - file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); + + /* Nobody watching permission and pre-content events on this file */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); + +open_perm: + /* + * Send open perm events depending on object masks and regardless of + * FMODE_NONOTIFY_PERM. + */ + if (file->f_flags & __FMODE_EXEC && p_mask & FS_OPEN_EXEC_PERM) { + int ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); + + if (ret) + return ret; + } + + if (p_mask & FS_OPEN_PERM) + return fsnotify_path(&file->f_path, FS_OPEN_PERM); + + return 0; } #endif @@ -751,8 +721,7 @@ static __init int fsnotify_init(void) if (ret) panic("initializing fsnotify_mark_srcu"); - fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, - SLAB_PANIC); + fsnotify_init_connector_caches(); return 0; } diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 5950c7a67f41..58c7bb25e571 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -77,6 +77,9 @@ extern struct srcu_struct fsnotify_mark_srcu; extern int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b); +/* Destroy all inode marks for given superblock */ +void fsnotify_unmount_inodes(struct fsnotify_sb_info *sbinfo); + /* Destroy all marks attached to an object via connector */ extern void fsnotify_destroy_marks(fsnotify_connp_t *connp); /* run the list of all marks associated with inode and destroy them */ @@ -106,6 +109,6 @@ static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns) */ extern void fsnotify_set_children_dentry_flags(struct inode *inode); -extern struct kmem_cache *fsnotify_mark_connector_cachep; +void fsnotify_init_connector_caches(void); #endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/fs/notify/group.c b/fs/notify/group.c index 18446b7b0d49..b56d1c1d9644 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -117,7 +117,7 @@ static struct fsnotify_group *__fsnotify_alloc_group( { struct fsnotify_group *group; - group = kzalloc(sizeof(struct fsnotify_group), gfp); + group = kzalloc_obj(struct fsnotify_group, gfp); if (!group) return ERR_PTR(-ENOMEM); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index cd7d11b0eb08..7c326ec2e8a8 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -10,7 +10,7 @@ * Copyright 2006 Hewlett-Packard Development Company, L.P. * * Copyright (C) 2009 Eric Paris <Red Hat Inc> - * inotify was largely rewriten to make use of the fsnotify infrastructure + * inotify was largely rewritten to make use of the fsnotify infrastructure */ #include <linux/dcache.h> /* d_unlinked */ diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index b372fb2c56bd..ed37491c1618 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -573,7 +573,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* update the inode with this new fsn_mark */ if (dropped || do_inode) - fsnotify_recalc_mask(inode->i_fsnotify_marks); + fsnotify_recalc_mask(fsn_mark->connector); } @@ -621,6 +621,7 @@ static int inotify_new_watch(struct fsnotify_group *group, if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); + dec_inotify_watches(group->inotify_data.ucounts); goto out_err; } @@ -660,7 +661,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) if (IS_ERR(group)) return group; - oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT); + oevent = kmalloc_obj(struct inotify_event_info, GFP_KERNEL_ACCOUNT); if (unlikely(!oevent)) { fsnotify_destroy_group(group); return ERR_PTR(-ENOMEM); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 798340db69d7..e256b420100d 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -79,7 +79,8 @@ #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ struct srcu_struct fsnotify_mark_srcu; -struct kmem_cache *fsnotify_mark_connector_cachep; +static struct kmem_cache *fsnotify_mark_connector_cachep; +static struct kmem_cache *fsnotify_inode_mark_connector_cachep; static DEFINE_SPINLOCK(destroy_lock); static LIST_HEAD(destroy_list); @@ -237,7 +238,12 @@ static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn, return inode; } -static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) +/* + * Calculate mask of events for a list of marks. + * + * Return true if any of the attached marks want to hold an inode reference. + */ +static bool __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; bool want_iref = false; @@ -261,6 +267,34 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) */ WRITE_ONCE(*fsnotify_conn_mask_p(conn), new_mask); + return want_iref; +} + +/* + * Calculate mask of events for a list of marks after attach/modify mark + * and get an inode reference for the connector if needed. + * + * A concurrent add of evictable mark and detach of non-evictable mark can + * lead to __fsnotify_recalc_mask() returning false want_iref, but in this + * case we defer clearing iref to fsnotify_recalc_mask_clear_iref() called + * from fsnotify_put_mark(). + */ +static void fsnotify_recalc_mask_set_iref(struct fsnotify_mark_connector *conn) +{ + bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF; + bool want_iref = __fsnotify_recalc_mask(conn) || has_iref; + + (void) fsnotify_update_iref(conn, want_iref); +} + +/* + * Calculate mask of events for a list of marks after detach mark + * and return the inode object if its reference is no longer needed. + */ +static void *fsnotify_recalc_mask_clear_iref(struct fsnotify_mark_connector *conn) +{ + bool want_iref = __fsnotify_recalc_mask(conn); + return fsnotify_update_iref(conn, want_iref); } @@ -297,7 +331,7 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) spin_lock(&conn->lock); update_children = !fsnotify_conn_watches_children(conn); - __fsnotify_recalc_mask(conn); + fsnotify_recalc_mask_set_iref(conn); update_children &= fsnotify_conn_watches_children(conn); spin_unlock(&conn->lock); /* @@ -323,10 +357,12 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) while (conn) { free = conn; conn = conn->destroy_next; - kmem_cache_free(fsnotify_mark_connector_cachep, free); + kfree(free); } } +static void fsnotify_untrack_connector(struct fsnotify_mark_connector *conn); + static void *fsnotify_detach_connector_from_object( struct fsnotify_mark_connector *conn, unsigned int *type) @@ -342,6 +378,7 @@ static void *fsnotify_detach_connector_from_object( if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; + fsnotify_untrack_connector(conn); /* Unpin inode when detaching from connector */ if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF)) @@ -415,7 +452,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) /* Update watched objects after detaching mark */ if (sb) fsnotify_update_sb_watchers(sb, conn); - objp = __fsnotify_recalc_mask(conn); + objp = fsnotify_recalc_mask_clear_iref(conn); type = conn->type; } WRITE_ONCE(mark->connector, NULL); @@ -428,7 +465,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) conn->destroy_next = connector_destroy_list; connector_destroy_list = conn; spin_unlock(&destroy_lock); - queue_work(system_unbound_wq, &connector_reaper_work); + queue_work(system_dfl_wq, &connector_reaper_work); } /* * Note that we didn't update flags telling whether inode cares about @@ -439,7 +476,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) spin_lock(&destroy_lock); list_add(&mark->g_list, &destroy_list); spin_unlock(&destroy_lock); - queue_delayed_work(system_unbound_wq, &reaper_work, + queue_delayed_work(system_dfl_wq, &reaper_work, FSNOTIFY_REAPER_DELAY); } EXPORT_SYMBOL_GPL(fsnotify_put_mark); @@ -453,9 +490,6 @@ EXPORT_SYMBOL_GPL(fsnotify_put_mark); */ static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { - if (!mark) - return true; - if (refcount_inc_not_zero(&mark->refcnt)) { spin_lock(&mark->lock); if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { @@ -496,15 +530,22 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) int type; fsnotify_foreach_iter_type(type) { + struct fsnotify_mark *mark = iter_info->marks[type]; + /* This can fail if mark is being removed */ - if (!fsnotify_get_mark_safe(iter_info->marks[type])) { - __release(&fsnotify_mark_srcu); - goto fail; + while (mark && !fsnotify_get_mark_safe(mark)) { + if (mark->group == iter_info->current_group) { + __release(&fsnotify_mark_srcu); + goto fail; + } + /* This is a mark in an unrelated group, skip */ + mark = fsnotify_next_mark(mark); + iter_info->marks[type] = mark; } } /* - * Now that both marks are pinned by refcount in the inode / vfsmount + * Now that all marks are pinned by refcount in the inode / vfsmount / etc * lists, we can drop SRCU lock, and safely resume the list iteration * once userspace returns. */ @@ -640,10 +681,12 @@ static int fsnotify_attach_info_to_sb(struct super_block *sb) struct fsnotify_sb_info *sbinfo; /* sb info is freed on fsnotify_sb_delete() */ - sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); + sbinfo = kzalloc_obj(*sbinfo); if (!sbinfo) return -ENOMEM; + INIT_LIST_HEAD(&sbinfo->inode_conn_list); + spin_lock_init(&sbinfo->list_lock); /* * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() * will observe an initialized structure @@ -655,20 +698,123 @@ static int fsnotify_attach_info_to_sb(struct super_block *sb) return 0; } -static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - void *obj, unsigned int obj_type) +struct fsnotify_inode_mark_connector { + struct fsnotify_mark_connector common; + struct list_head conns_list; +}; + +static struct inode *fsnotify_get_living_inode(struct fsnotify_sb_info *sbinfo) { - struct fsnotify_mark_connector *conn; + struct fsnotify_inode_mark_connector *iconn; + struct inode *inode; + + spin_lock(&sbinfo->list_lock); + /* Find the first non-evicting inode */ + list_for_each_entry(iconn, &sbinfo->inode_conn_list, conns_list) { + /* All connectors on the list are still attached to an inode */ + inode = iconn->common.obj; + /* + * For connectors without FSNOTIFY_CONN_FLAG_HAS_IREF + * (evictable marks) corresponding inode may well have 0 + * refcount and can be undergoing eviction. OTOH list_lock + * protects us from the connector getting detached and inode + * freed. So we can poke around the inode safely. + */ + spin_lock(&inode->i_lock); + if (likely( + !(inode_state_read(inode) & (I_FREEING | I_WILL_FREE)))) { + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&sbinfo->list_lock); + return inode; + } + spin_unlock(&inode->i_lock); + } + spin_unlock(&sbinfo->list_lock); - conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); - if (!conn) - return -ENOMEM; + return NULL; +} + +/** + * fsnotify_unmount_inodes - an sb is unmounting. Handle any watched inodes. + * @sbinfo: fsnotify info for superblock being unmounted. + * + * Walk all inode connectors for the superblock and free all associated marks. + */ +void fsnotify_unmount_inodes(struct fsnotify_sb_info *sbinfo) +{ + struct inode *inode; + + while ((inode = fsnotify_get_living_inode(sbinfo))) { + fsnotify_inode(inode, FS_UNMOUNT); + fsnotify_clear_marks_by_inode(inode); + iput(inode); + cond_resched(); + } +} + +static void fsnotify_init_connector(struct fsnotify_mark_connector *conn, + void *obj, unsigned int obj_type) +{ spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); conn->flags = 0; conn->prio = 0; conn->type = obj_type; conn->obj = obj; +} + +static struct fsnotify_mark_connector * +fsnotify_alloc_inode_connector(struct inode *inode) +{ + struct fsnotify_inode_mark_connector *iconn; + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(inode->i_sb); + + iconn = kmem_cache_alloc(fsnotify_inode_mark_connector_cachep, + GFP_KERNEL); + if (!iconn) + return NULL; + + fsnotify_init_connector(&iconn->common, inode, FSNOTIFY_OBJ_TYPE_INODE); + spin_lock(&sbinfo->list_lock); + list_add(&iconn->conns_list, &sbinfo->inode_conn_list); + spin_unlock(&sbinfo->list_lock); + + return &iconn->common; +} + +static void fsnotify_untrack_connector(struct fsnotify_mark_connector *conn) +{ + struct fsnotify_inode_mark_connector *iconn; + struct fsnotify_sb_info *sbinfo; + + if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) + return; + + iconn = container_of(conn, struct fsnotify_inode_mark_connector, common); + sbinfo = fsnotify_sb_info(fsnotify_conn_inode(conn)->i_sb); + spin_lock(&sbinfo->list_lock); + list_del(&iconn->conns_list); + spin_unlock(&sbinfo->list_lock); +} + +static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, + void *obj, unsigned int obj_type) +{ + struct fsnotify_mark_connector *conn; + + if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { + struct inode *inode = obj; + + conn = fsnotify_alloc_inode_connector(inode); + } else { + conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, + GFP_KERNEL); + if (conn) + fsnotify_init_connector(conn, obj, obj_type); + } + if (!conn) + return -ENOMEM; /* * cmpxchg() provides the barrier so that readers of *connp can see @@ -676,7 +822,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, */ if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ - kmem_cache_free(fsnotify_mark_connector_cachep, conn); + fsnotify_untrack_connector(conn); + kfree(conn); } return 0; } @@ -1007,3 +1154,12 @@ void fsnotify_wait_marks_destroyed(void) flush_delayed_work(&reaper_work); } EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed); + +__init void fsnotify_init_connector_caches(void) +{ + fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, + SLAB_PANIC); + fsnotify_inode_mark_connector_cachep = KMEM_CACHE( + fsnotify_inode_mark_connector, + SLAB_PANIC); +} |
