diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2010-07-13 15:57:14 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-07-13 15:57:14 +0200 |
commit | ec646ea8db21abc1db436aac580a0464e460bd9d (patch) | |
tree | e1c137718bd94548589f0e2f2a89342c75e052da /include | |
parent | 596fc8ee275b6e4b441b6aa1e2c1a89aeeccb877 (diff) | |
download | lwn-ec646ea8db21abc1db436aac580a0464e460bd9d.tar.gz lwn-ec646ea8db21abc1db436aac580a0464e460bd9d.zip |
vfs: Revert the scalability patches
We still have sporadic and hard to debug problems. Revert it for now
and revisit with Nick's new version.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/dcache.h | 142 | ||||
-rw-r--r-- | include/linux/fs.h | 50 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 9 | ||||
-rw-r--r-- | include/linux/inotify.h | 4 | ||||
-rw-r--r-- | include/linux/mount.h | 40 | ||||
-rw-r--r-- | include/linux/tty.h | 1 | ||||
-rw-r--r-- | include/linux/writeback.h | 4 |
7 files changed, 116 insertions, 134 deletions
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 63b98bec6630..30b93b2a01a4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -5,7 +5,6 @@ #include <linux/list.h> #include <linux/rculist.h> #include <linux/spinlock.h> -#include <linux/seqlock.h> #include <linux/cache.h> #include <linux/rcupdate.h> @@ -38,8 +37,8 @@ struct qstr { }; struct dentry_stat_t { - int nr_dentry; /* unused */ - int nr_unused; /* protected by dcache_lru_lock */ + int nr_dentry; + int nr_unused; int age_limit; /* age in seconds */ int want_pages; /* pages requested by system */ int dummy[2]; @@ -88,30 +87,20 @@ full_name_hash(const unsigned char *name, unsigned int len) #endif struct dentry { - /* - * The following 64 bytes of fields (on 64-bit) fit into a 64 byte - * cacheline. They are critical for path lookups. We can do most - * path lookups in 2 cachelines (these + name string) if we have - * correct sizing and alignment here. - * - * XXX: d_sb for revalidate needs to be duplicated into a d_flag. - */ atomic_t d_count; unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ int d_mounted; - seqcount_t d_seq; /* per dentry seqlock */ struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ + /* + * The next three fields are touched by __d_lookup. Place them here + * so they all fit in a cache line. + */ struct hlist_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; - const struct dentry_operations *d_op; - /* - * The following 64 bytes of lists tend to be required for tree - * manipulation, not required for lookups. - */ struct list_head d_lru; /* LRU list */ /* * d_child and d_rcu can share memory @@ -122,14 +111,10 @@ struct dentry { } d_u; struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ - - /* - * These following fields may be needed by some types of lookups, and - * d_iname is likely to be required too, so keep them together. - */ + unsigned long d_time; /* used by d_revalidate */ + const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ void *d_fsdata; /* fs-specific data */ - unsigned long d_time; /* used by d_revalidate */ unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; @@ -165,44 +150,43 @@ struct dentry_operations { /* locking rules: - big lock d_lock may block -d_revalidate: no no yes -d_hash no no yes -d_compare: no yes no -d_delete: no no no -d_release: no no yes -d_iput: no no yes + big lock dcache_lock d_lock may block +d_revalidate: no no no yes +d_hash no no no yes +d_compare: no yes yes no +d_delete: no yes no no +d_release: no no no yes +d_iput: no no no yes */ /* d_flags entries */ #define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */ -#define DCACHE_NFSFS_RENAMED 0x0002 - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ - -#define DCACHE_DISCONNECTED 0x0004 - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ +#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly + * renamed" and has to be + * deleted on the last dput() + */ +#define DCACHE_DISCONNECTED 0x0004 + /* This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it finds + * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move + * that dentry into place and return that dentry rather than the passed one, + * typically using d_splice_alias. + */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 - /* Parent inode is watched by inotify */ + +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ #define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 - /* Parent inode is watched by some fsnotify listener */ -#define DCACHE_MOUNTED 0x0100 /* is a mountpoint */ -#define DCACHE_GENOCIDE 0x0200 /* being genocided */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ +extern spinlock_t dcache_lock; extern seqlock_t rename_lock; /** @@ -220,8 +204,23 @@ extern seqlock_t rename_lock; * * __d_drop requires dentry->d_lock. */ -void d_drop(struct dentry *dentry); -void __d_drop(struct dentry *dentry); + +static inline void __d_drop(struct dentry *dentry) +{ + if (!(dentry->d_flags & DCACHE_UNHASHED)) { + dentry->d_flags |= DCACHE_UNHASHED; + hlist_del_rcu(&dentry->d_hash); + } +} + +static inline void d_drop(struct dentry *dentry) +{ + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); +} static inline int dname_external(struct dentry *dentry) { @@ -300,11 +299,9 @@ extern void d_move(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ -extern struct dentry *d_lookup(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup(struct dentry *, struct qstr *); -extern struct dentry *d_lookup_rcu(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup_rcu(struct dentry *, struct qstr *); -extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); +extern struct dentry * d_lookup(struct dentry *, struct qstr *); +extern struct dentry * __d_lookup(struct dentry *, struct qstr *); +extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *); /* validate "insecure" dentry pointer */ extern int d_validate(struct dentry *, struct dentry *); @@ -321,29 +318,28 @@ extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ /** - * dget, dget_dlock - get a reference to a dentry + * dget, dget_locked - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be - * destroyed when it has references. + * destroyed when it has references. dget() should never be + * called for dentries with zero reference counter. For these cases + * (preferably none, functions in dcache.c are sufficient for normal + * needs and they take necessary precautions) you should hold dcache_lock + * and call dget_locked() instead of dget(). */ -static inline struct dentry *dget_dlock(struct dentry *dentry) -{ - if (dentry) - atomic_inc(&dentry->d_count); - return dentry; -} - + static inline struct dentry *dget(struct dentry *dentry) { if (dentry) { - dget_dlock(dentry); + BUG_ON(!atomic_read(&dentry->d_count)); + atomic_inc(&dentry->d_count); } return dentry; } -extern struct dentry *dget_parent(struct dentry *dentry); +extern struct dentry * dget_locked(struct dentry *); /** * d_unhashed - is dentry hashed @@ -362,6 +358,16 @@ static inline int d_unlinked(struct dentry *dentry) return d_unhashed(dentry) && !IS_ROOT(dentry); } +static inline struct dentry *dget_parent(struct dentry *dentry) +{ + struct dentry *ret; + + spin_lock(&dentry->d_lock); + ret = dget(dentry->d_parent); + spin_unlock(&dentry->d_lock); + return ret; +} + extern void dput(struct dentry *); static inline int d_mountpoint(struct dentry *dentry) diff --git a/include/linux/fs.h b/include/linux/fs.h index 4981e6ee3ba5..5191f49c2fec 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -406,8 +406,6 @@ extern struct files_stat_struct files_stat; extern int get_max_files(void); extern int sysctl_nr_open; extern struct inodes_stat_t inodes_stat; -extern struct percpu_counter nr_inodes; -extern int get_nr_inodes(void); extern int leases_enable, lease_break_time; #ifdef CONFIG_DNOTIFY extern int dir_notify_enable; @@ -727,15 +725,9 @@ struct inode { struct hlist_node i_hash; struct list_head i_list; /* backing dev IO list */ struct list_head i_sb_list; - union { - struct list_head i_dentry; - struct rcu_head i_rcu; - }; + struct list_head i_dentry; unsigned long i_ino; -#ifdef CONFIG_SMP - int i_sb_list_cpu; -#endif - unsigned int i_count; + atomic_t i_count; unsigned int i_nlink; uid_t i_uid; gid_t i_gid; @@ -932,9 +924,6 @@ struct file { #define f_vfsmnt f_path.mnt const struct file_operations *f_op; spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ -#ifdef CONFIG_SMP - int f_sb_list_cpu; -#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -959,6 +948,9 @@ struct file { unsigned long f_mnt_write_state; #endif }; +extern spinlock_t files_lock; +#define file_list_lock() spin_lock(&files_lock); +#define file_list_unlock() spin_unlock(&files_lock); #define get_file(x) atomic_long_inc(&(x)->f_count) #define file_count(x) atomic_long_read(&(x)->f_count) @@ -1347,17 +1339,9 @@ struct super_block { #endif struct xattr_handler **s_xattr; - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ -#ifdef CONFIG_SMP - struct list_head *s_inodes; -#else struct list_head s_inodes; /* all inodes */ -#endif -#ifdef CONFIG_SMP - struct list_head *s_files; -#else + struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ struct list_head s_files; -#endif /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ @@ -2053,7 +2037,6 @@ extern const struct file_operations read_pipefifo_fops; extern const struct file_operations write_pipefifo_fops; extern const struct file_operations rdwr_pipefifo_fops; -extern void mark_files_ro(struct super_block *sb); extern int fs_may_remount_ro(struct super_block *); #ifdef CONFIG_BLOCK @@ -2185,6 +2168,7 @@ extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struc extern int insert_inode_locked(struct inode *); extern void unlock_new_inode(struct inode *); +extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void destroy_inode(struct inode *); @@ -2193,17 +2177,14 @@ extern struct inode *new_inode(struct super_block *); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); -extern void inode_sb_list_del(struct inode *inode); extern void __insert_inode_hash(struct inode *, unsigned long hashval); -extern void __remove_inode_hash(struct inode *); extern void remove_inode_hash(struct inode *); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern struct file * get_empty_filp(void); -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); +extern void file_move(struct file *f, struct list_head *list); +extern void file_kill(struct file *f); #ifdef CONFIG_BLOCK struct bio; extern void submit_bio(int, struct bio *); @@ -2404,20 +2385,10 @@ extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); extern void save_mount_options(struct super_block *sb, char *options); extern void replace_mount_options(struct super_block *sb, char *options); -static inline void __iget(struct inode *inode) -{ - assert_spin_locked(&inode->i_lock); - inode->i_count++; -} - static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; - /* - * Don't strictly need d_lock here? If the parent ino could change - * then surely we'd have a deeper race in the caller? - */ spin_lock(&dentry->d_lock); res = dentry->d_parent->d_inode->i_ino; spin_unlock(&dentry->d_lock); @@ -2493,8 +2464,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -int proc_nr_inodes(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + int __init get_filesystem_list(char *buf); #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e776fb5ed01a..4d6f47b51189 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -276,10 +276,10 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) { struct dentry *parent; + assert_spin_locked(&dcache_lock); assert_spin_locked(&dentry->d_lock); parent = dentry->d_parent; - /* XXX: after dcache_lock removal, there is a race with parent->d_inode and fsnotify_inode_watches_children. must fix */ if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else @@ -288,12 +288,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) /* * fsnotify_d_instantiate - instantiate a dentry for inode + * Called with dcache_lock held. */ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) { if (!inode) return; + assert_spin_locked(&dcache_lock); + spin_lock(&dentry->d_lock); __fsnotify_update_dcache_flags(dentry); spin_unlock(&dentry->d_lock); @@ -344,7 +347,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); -extern void fsnotify_unmount_inodes(struct super_block *sb); +extern void fsnotify_unmount_inodes(struct list_head *list); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, @@ -374,7 +377,7 @@ static inline u32 fsnotify_get_cookie(void) return 0; } -static inline void fsnotify_unmount_inodes(struct super_block *sb) +static inline void fsnotify_unmount_inodes(struct list_head *list) {} #endif /* CONFIG_FSNOTIFY */ diff --git a/include/linux/inotify.h b/include/linux/inotify.h index e8bcd7c6c0cc..37ea2894b3c0 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -111,7 +111,7 @@ extern void inotify_inode_queue_event(struct inode *, __u32, __u32, const char *, struct inode *); extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, const char *); -extern void inotify_unmount_inodes(struct super_block *); +extern void inotify_unmount_inodes(struct list_head *); extern void inotify_inode_is_dead(struct inode *); extern u32 inotify_get_cookie(void); @@ -161,7 +161,7 @@ static inline void inotify_dentry_parent_queue_event(struct dentry *dentry, { } -static inline void inotify_unmount_inodes(struct super_block *sb) +static inline void inotify_unmount_inodes(struct list_head *list) { } diff --git a/include/linux/mount.h b/include/linux/mount.h index 849e70535047..5d5275364867 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -31,13 +31,11 @@ struct mnt_namespace; #define MNT_SHRINKABLE 0x100 #define MNT_WRITE_HOLD 0x200 -#define MNT_MOUNTED 0x400 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ #define MNT_PNODE_MASK 0x3000 /* propagation flag mask */ - struct vfsmount { struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ @@ -58,6 +56,12 @@ struct vfsmount { struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ + /* + * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount + * to let these frequently modified fields in a separate cache line + * (so that reads of mnt_flags wont ping-pong on SMP machines) + */ + atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; @@ -66,11 +70,6 @@ struct vfsmount { #else int mnt_writers; #endif -#ifdef CONFIG_SMP - int *mnt_count; -#else - int mnt_count; -#endif }; static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) @@ -82,28 +81,32 @@ static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) #endif } -struct file; /* forward dec */ - -extern void vfsmount_read_lock(int cpu); -extern void vfsmount_read_unlock(int cpu); -extern void vfsmount_write_lock(void); -extern void vfsmount_write_unlock(void); +static inline struct vfsmount *mntget(struct vfsmount *mnt) +{ + if (mnt) + atomic_inc(&mnt->mnt_count); + return mnt; +} -extern unsigned int count_mnt_count(struct vfsmount *mnt); +struct file; /* forward dec */ extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); - extern void mntput_no_expire(struct vfsmount *mnt); -extern struct vfsmount *mntget(struct vfsmount *mnt); -extern void mntput(struct vfsmount *mnt); - extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); +static inline void mntput(struct vfsmount *mnt) +{ + if (mnt) { + mnt->mnt_expiry_mark = 0; + mntput_no_expire(mnt); + } +} + extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); @@ -120,6 +123,7 @@ extern int do_add_mount(struct vfsmount *newmnt, struct path *path, extern void mark_mounts_for_expiry(struct list_head *mounts); +extern spinlock_t vfsmount_lock; extern dev_t name_to_dev_t(char *name); #endif /* _LINUX_MOUNT_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index e5c5ba2327f1..42f207676016 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -465,7 +465,6 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); extern struct mutex tty_mutex; -extern spinlock_t tty_files_lock; extern void tty_write_unlock(struct tty_struct *tty); extern int tty_write_lock(struct tty_struct *tty, int ndelay); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 15e8bcd90cd1..76e8903cd204 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -9,8 +9,8 @@ struct backing_dev_info; -extern spinlock_t sb_inode_list_lock; -extern spinlock_t wb_inode_list_lock; +extern spinlock_t inode_lock; +extern struct list_head inode_in_use; extern struct list_head inode_unused; /* |