From 905cd0e1bf9ffe82d6906a01fd974ea0f70be97a Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Tue, 30 Apr 2013 15:26:50 -0700 Subject: mm: frontswap: lazy initialization to allow tmem backends to build/run as modules With the goal of allowing tmem backends (zcache, ramster, Xen tmem) to be built/loaded as modules rather than built-in and enabled by a boot parameter, this patch provides "lazy initialization", allowing backends to register to frontswap even after swapon was run. Before a backend registers all calls to init are recorded and the creation of tmem_pools delayed until a backend registers or until a frontswap store is attempted. Signed-off-by: Stefan Hengelein Signed-off-by: Florian Schmaus Signed-off-by: Andor Daam Signed-off-by: Dan Magenheimer [v1: Fixes per Seth Jennings suggestions] [v2: Removed FRONTSWAP_HAS_.. ] [v3: Fix up per Bob Liu recommendations] [v4: Fix up per Andrew's comments] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Dan Magenheimer Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/frontswap.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/mm/frontswap.c b/mm/frontswap.c index 2890e67d6026..cbd2b8af8129 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -80,6 +80,46 @@ static inline void inc_frontswap_succ_stores(void) { } static inline void inc_frontswap_failed_stores(void) { } static inline void inc_frontswap_invalidates(void) { } #endif + +/* + * Due to the asynchronous nature of the backends loading potentially + * _after_ the swap system has been activated, we have chokepoints + * on all frontswap functions to not call the backend until the backend + * has registered. + * + * Specifically when no backend is registered (nobody called + * frontswap_register_ops) all calls to frontswap_init (which is done via + * swapon -> enable_swap_info -> frontswap_init) are registered and remembered + * (via the setting of need_init bitmap) but fail to create tmem_pools. When a + * backend registers with frontswap at some later point the previous + * calls to frontswap_init are executed (by iterating over the need_init + * bitmap) to create tmem_pools and set the respective poolids. All of that is + * guarded by us using atomic bit operations on the 'need_init' bitmap. + * + * This would not guards us against the user deciding to call swapoff right as + * we are calling the backend to initialize (so swapon is in action). + * Fortunatly for us, the swapon_mutex has been taked by the callee so we are + * OK. The other scenario where calls to frontswap_store (called via + * swap_writepage) is racing with frontswap_invalidate_area (called via + * swapoff) is again guarded by the swap subsystem. + * + * While no backend is registered all calls to frontswap_[store|load| + * invalidate_area|invalidate_page] are ignored or fail. + * + * The time between the backend being registered and the swap file system + * calling the backend (via the frontswap_* functions) is indeterminate as + * backend_registered is not atomic_t (or a value guarded by a spinlock). + * That is OK as we are comfortable missing some of these calls to the newly + * registered backend. + * + * Obviously the opposite (unloading the backend) must be done after all + * the frontswap_[store|load|invalidate_area|invalidate_page] start + * ignorning or failing the requests - at which point backend_registered + * would have to be made in some fashion atomic. + */ +static DECLARE_BITMAP(need_init, MAX_SWAPFILES); +static bool backend_registered __read_mostly; + /* * Register operations for frontswap, returning previous thus allowing * detection of multiple backends and possible nesting. @@ -87,9 +127,22 @@ static inline void inc_frontswap_invalidates(void) { } struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) { struct frontswap_ops old = frontswap_ops; + int i; frontswap_ops = *ops; frontswap_enabled = true; + + for (i = 0; i < MAX_SWAPFILES; i++) { + if (test_and_clear_bit(i, need_init)) + (*frontswap_ops.init)(i); + } + /* + * We MUST have backend_registered set _after_ the frontswap_init's + * have been called. Otherwise __frontswap_store might fail. Hence + * the barrier to make sure compiler does not re-order us. + */ + barrier(); + backend_registered = true; return old; } EXPORT_SYMBOL(frontswap_register_ops); @@ -119,10 +172,16 @@ void __frontswap_init(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; - frontswap_ops.init(type); + if (backend_registered) { + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + (*frontswap_ops.init)(type); + } else { + BUG_ON(type > MAX_SWAPFILES); + set_bit(type, need_init); + } + } EXPORT_SYMBOL(__frontswap_init); @@ -147,6 +206,11 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); + if (!backend_registered) { + inc_frontswap_failed_stores(); + return ret; + } + BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) @@ -186,6 +250,9 @@ int __frontswap_load(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); + if (!backend_registered) + return ret; + BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) @@ -209,6 +276,9 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; + if (!backend_registered) + return; + BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) { frontswap_ops.invalidate_page(type, offset); @@ -226,12 +296,15 @@ void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; - frontswap_ops.invalidate_area(type); - atomic_set(&sis->frontswap_pages, 0); - memset(sis->frontswap_map, 0, sis->max / sizeof(long)); + if (backend_registered) { + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + (*frontswap_ops.invalidate_area)(type); + atomic_set(&sis->frontswap_pages, 0); + memset(sis->frontswap_map, 0, sis->max / sizeof(long)); + } + clear_bit(type, need_init); } EXPORT_SYMBOL(__frontswap_invalidate_area); @@ -364,6 +437,7 @@ static int __init init_frontswap(void) debugfs_create_u64("invalidates", S_IRUGO, root, &frontswap_invalidates); #endif + frontswap_enabled = 1; return 0; } -- cgit v1.2.3 From 1e01c968db3d0aebd48e31db15f24516b03128df Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 30 Apr 2013 15:26:51 -0700 Subject: frontswap: make frontswap_init use a pointer for the ops This simplifies the code in the frontswap - we can get rid of the 'backend_registered' test and instead check against frontswap_ops. [v1: Rebase on top of 703ba7fe5e0 (ramster->zcache move] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/zcache/zcache-main.c | 8 ++++---- drivers/xen/tmem.c | 6 +++--- include/linux/frontswap.h | 2 +- mm/frontswap.c | 38 +++++++++++++++++------------------- 4 files changed, 26 insertions(+), 28 deletions(-) (limited to 'mm') diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index e23d814b5392..09c69c8026f9 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1707,9 +1707,9 @@ static struct frontswap_ops zcache_frontswap_ops = { .init = zcache_frontswap_init }; -struct frontswap_ops zcache_frontswap_register_ops(void) +struct frontswap_ops *zcache_frontswap_register_ops(void) { - struct frontswap_ops old_ops = + struct frontswap_ops *old_ops = frontswap_register_ops(&zcache_frontswap_ops); return old_ops; @@ -1874,7 +1874,7 @@ static int __init zcache_init(void) pr_warn("%s: cleancache_ops overridden\n", namestr); } if (zcache_enabled && !disable_frontswap) { - struct frontswap_ops old_ops; + struct frontswap_ops *old_ops; old_ops = zcache_frontswap_register_ops(); if (frontswap_has_exclusive_gets) @@ -1886,7 +1886,7 @@ static int __init zcache_init(void) namestr, frontswap_has_exclusive_gets, !disable_frontswap_ignore_nonactive); #endif - if (old_ops.init != NULL) + if (old_ops != NULL) pr_warn("%s: frontswap_ops overridden\n", namestr); } if (ramster_enabled) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 3ee836d42581..7a01a5fd0f63 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -362,7 +362,7 @@ static int __init no_frontswap(char *s) } __setup("nofrontswap", no_frontswap); -static struct frontswap_ops __initdata tmem_frontswap_ops = { +static struct frontswap_ops tmem_frontswap_ops = { .store = tmem_frontswap_store, .load = tmem_frontswap_load, .invalidate_page = tmem_frontswap_flush_page, @@ -378,11 +378,11 @@ static int __init xen_tmem_init(void) #ifdef CONFIG_FRONTSWAP if (tmem_enabled && use_frontswap) { char *s = ""; - struct frontswap_ops old_ops = + struct frontswap_ops *old_ops = frontswap_register_ops(&tmem_frontswap_ops); tmem_frontswap_poolid = -1; - if (old_ops.init != NULL) + if (old_ops) s = " (WARNING: frontswap_ops overridden)"; printk(KERN_INFO "frontswap enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 30442547b9e6..d4f29875c7cc 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -14,7 +14,7 @@ struct frontswap_ops { }; extern bool frontswap_enabled; -extern struct frontswap_ops +extern struct frontswap_ops * frontswap_register_ops(struct frontswap_ops *ops); extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); diff --git a/mm/frontswap.c b/mm/frontswap.c index cbd2b8af8129..e44c9cbd1443 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -24,7 +24,7 @@ * frontswap_ops is set by frontswap_register_ops to contain the pointers * to the frontswap "backend" implementation functions. */ -static struct frontswap_ops frontswap_ops __read_mostly; +static struct frontswap_ops *frontswap_ops __read_mostly; /* * This global enablement flag reduces overhead on systems where frontswap_ops @@ -108,41 +108,39 @@ static inline void inc_frontswap_invalidates(void) { } * * The time between the backend being registered and the swap file system * calling the backend (via the frontswap_* functions) is indeterminate as - * backend_registered is not atomic_t (or a value guarded by a spinlock). + * frontswap_ops is not atomic_t (or a value guarded by a spinlock). * That is OK as we are comfortable missing some of these calls to the newly * registered backend. * * Obviously the opposite (unloading the backend) must be done after all * the frontswap_[store|load|invalidate_area|invalidate_page] start - * ignorning or failing the requests - at which point backend_registered + * ignorning or failing the requests - at which point frontswap_ops * would have to be made in some fashion atomic. */ static DECLARE_BITMAP(need_init, MAX_SWAPFILES); -static bool backend_registered __read_mostly; /* * Register operations for frontswap, returning previous thus allowing * detection of multiple backends and possible nesting. */ -struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) +struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) { - struct frontswap_ops old = frontswap_ops; + struct frontswap_ops *old = frontswap_ops; int i; - frontswap_ops = *ops; frontswap_enabled = true; for (i = 0; i < MAX_SWAPFILES; i++) { if (test_and_clear_bit(i, need_init)) - (*frontswap_ops.init)(i); + ops->init(i); } /* - * We MUST have backend_registered set _after_ the frontswap_init's + * We MUST have frontswap_ops set _after_ the frontswap_init's * have been called. Otherwise __frontswap_store might fail. Hence * the barrier to make sure compiler does not re-order us. */ barrier(); - backend_registered = true; + frontswap_ops = ops; return old; } EXPORT_SYMBOL(frontswap_register_ops); @@ -172,11 +170,11 @@ void __frontswap_init(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - if (backend_registered) { + if (frontswap_ops) { BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; - (*frontswap_ops.init)(type); + frontswap_ops->init(type); } else { BUG_ON(type > MAX_SWAPFILES); set_bit(type, need_init); @@ -206,7 +204,7 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!backend_registered) { + if (!frontswap_ops) { inc_frontswap_failed_stores(); return ret; } @@ -215,7 +213,7 @@ int __frontswap_store(struct page *page) BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) dup = 1; - ret = frontswap_ops.store(type, offset, page); + ret = frontswap_ops->store(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); inc_frontswap_succ_stores(); @@ -250,13 +248,13 @@ int __frontswap_load(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!backend_registered) + if (!frontswap_ops) return ret; BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) - ret = frontswap_ops.load(type, offset, page); + ret = frontswap_ops->load(type, offset, page); if (ret == 0) { inc_frontswap_loads(); if (frontswap_tmem_exclusive_gets_enabled) { @@ -276,12 +274,12 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; - if (!backend_registered) + if (!frontswap_ops) return; BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) { - frontswap_ops.invalidate_page(type, offset); + frontswap_ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); } @@ -296,11 +294,11 @@ void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - if (backend_registered) { + if (frontswap_ops) { BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; - (*frontswap_ops.invalidate_area)(type); + frontswap_ops->invalidate_area(type); atomic_set(&sis->frontswap_pages, 0); memset(sis->frontswap_map, 0, sis->max / sizeof(long)); } -- cgit v1.2.3 From f066ea230a65f939afc354beae62716ab5f0e645 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 30 Apr 2013 15:26:53 -0700 Subject: mm: frontswap: cleanup code After allowing tmem backends to build/run as modules, frontswap_enabled always true if defined CONFIG_FRONTSWAP. But frontswap_test() depends on whether backend is registered, mv it into frontswap.c using fronstswap_ops to make the decision. frontswap_set/clear are not used outside frontswap, so don't export them. Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Konrad Rzeszutek Wilk Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 28 +++-------------------- mm/frontswap.c | 57 +++++++++++++++++++++++++---------------------- 2 files changed, 33 insertions(+), 52 deletions(-) (limited to 'mm') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index d4f29875c7cc..6c49e1eba552 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -22,6 +22,7 @@ extern void frontswap_writethrough(bool); #define FRONTSWAP_HAS_EXCLUSIVE_GETS extern void frontswap_tmem_exclusive_gets(bool); +extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); extern void __frontswap_init(unsigned type); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); @@ -29,26 +30,11 @@ extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); #ifdef CONFIG_FRONTSWAP +#define frontswap_enabled (1) static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { - bool ret = false; - - if (frontswap_enabled && sis->frontswap_map) - ret = test_bit(offset, sis->frontswap_map); - return ret; -} - -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - set_bit(offset, sis->frontswap_map); -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - clear_bit(offset, sis->frontswap_map); + return __frontswap_test(sis, offset); } static inline void frontswap_map_set(struct swap_info_struct *p, @@ -71,14 +57,6 @@ static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) return false; } -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ -} - static inline void frontswap_map_set(struct swap_info_struct *p, unsigned long *map) { diff --git a/mm/frontswap.c b/mm/frontswap.c index e44c9cbd1443..2760b0f98822 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -26,14 +26,6 @@ */ static struct frontswap_ops *frontswap_ops __read_mostly; -/* - * This global enablement flag reduces overhead on systems where frontswap_ops - * has not been registered, so is preferred to the slower alternative: a - * function call that checks a non-global. - */ -bool frontswap_enabled __read_mostly; -EXPORT_SYMBOL(frontswap_enabled); - /* * If enabled, frontswap_store will return failure even on success. As * a result, the swap subsystem will always write the page to swap, in @@ -128,8 +120,6 @@ struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) struct frontswap_ops *old = frontswap_ops; int i; - frontswap_enabled = true; - for (i = 0; i < MAX_SWAPFILES; i++) { if (test_and_clear_bit(i, need_init)) ops->init(i); @@ -183,9 +173,21 @@ void __frontswap_init(unsigned type) } EXPORT_SYMBOL(__frontswap_init); -static inline void __frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +bool __frontswap_test(struct swap_info_struct *sis, + pgoff_t offset) +{ + bool ret = false; + + if (frontswap_ops && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} +EXPORT_SYMBOL(__frontswap_test); + +static inline void __frontswap_clear(struct swap_info_struct *sis, + pgoff_t offset) { - frontswap_clear(sis, offset); + clear_bit(offset, sis->frontswap_map); atomic_dec(&sis->frontswap_pages); } @@ -204,18 +206,20 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!frontswap_ops) { - inc_frontswap_failed_stores(); + /* + * Return if no backend registed. + * Don't need to inc frontswap_failed_stores here. + */ + if (!frontswap_ops) return ret; - } BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) + if (__frontswap_test(sis, offset)) dup = 1; ret = frontswap_ops->store(type, offset, page); if (ret == 0) { - frontswap_set(sis, offset); + set_bit(offset, sis->frontswap_map); inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); @@ -248,18 +252,18 @@ int __frontswap_load(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!frontswap_ops) - return ret; - BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) + /* + * __frontswap_test() will check whether there is backend registered + */ + if (__frontswap_test(sis, offset)) ret = frontswap_ops->load(type, offset, page); if (ret == 0) { inc_frontswap_loads(); if (frontswap_tmem_exclusive_gets_enabled) { SetPageDirty(page); - frontswap_clear(sis, offset); + __frontswap_clear(sis, offset); } } return ret; @@ -274,11 +278,11 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; - if (!frontswap_ops) - return; - BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) { + /* + * __frontswap_test() will check whether there is backend registered + */ + if (__frontswap_test(sis, offset)) { frontswap_ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); @@ -435,7 +439,6 @@ static int __init init_frontswap(void) debugfs_create_u64("invalidates", S_IRUGO, root, &frontswap_invalidates); #endif - frontswap_enabled = 1; return 0; } -- cgit v1.2.3 From 4f89849da22db9d0edb378acea65e23fcd546173 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 30 Apr 2013 15:26:54 -0700 Subject: frontswap: get rid of swap_lock dependency Frontswap initialization routine depends on swap_lock, which want to be atomic about frontswap's first appearance. IOW, frontswap is not present and will fail all calls OR frontswap is fully functional but if new swap_info_struct isn't registered by enable_swap_info, swap subsystem doesn't start I/O so there is no race between init procedure and page I/O working on frontswap. So let's remove unnecessary swap_lock dependency. Cc: Dan Magenheimer Signed-off-by: Minchan Kim [v1: Rebased on my branch, reworked to work with backends loading late] [v2: Added a check for !map] [v3: Made the invalidate path follow the init path] [v4: Address comments by Wanpeng Li ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Florian Schmaus Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 6 +++--- mm/frontswap.c | 31 +++++++++++++++++++++++-------- mm/swapfile.c | 17 +++++++++-------- 3 files changed, 35 insertions(+), 19 deletions(-) (limited to 'mm') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 6c49e1eba552..8293262401de 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -23,7 +23,7 @@ extern void frontswap_writethrough(bool); extern void frontswap_tmem_exclusive_gets(bool); extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); -extern void __frontswap_init(unsigned type); +extern void __frontswap_init(unsigned type, unsigned long *map); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); @@ -98,10 +98,10 @@ static inline void frontswap_invalidate_area(unsigned type) __frontswap_invalidate_area(type); } -static inline void frontswap_init(unsigned type) +static inline void frontswap_init(unsigned type, unsigned long *map) { if (frontswap_enabled) - __frontswap_init(type); + __frontswap_init(type, map); } #endif /* _LINUX_FRONTSWAP_H */ diff --git a/mm/frontswap.c b/mm/frontswap.c index 2760b0f98822..538367ef1372 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -121,8 +121,13 @@ struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) int i; for (i = 0; i < MAX_SWAPFILES; i++) { - if (test_and_clear_bit(i, need_init)) + if (test_and_clear_bit(i, need_init)) { + struct swap_info_struct *sis = swap_info[i]; + /* __frontswap_init _should_ have set it! */ + if (!sis->frontswap_map) + return ERR_PTR(-EINVAL); ops->init(i); + } } /* * We MUST have frontswap_ops set _after_ the frontswap_init's @@ -156,20 +161,30 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets); /* * Called when a swap device is swapon'd. */ -void __frontswap_init(unsigned type) +void __frontswap_init(unsigned type, unsigned long *map) { struct swap_info_struct *sis = swap_info[type]; - if (frontswap_ops) { - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; + BUG_ON(sis == NULL); + + /* + * p->frontswap is a bitmap that we MUST have to figure out which page + * has gone in frontswap. Without it there is no point of continuing. + */ + if (WARN_ON(!map)) + return; + /* + * Irregardless of whether the frontswap backend has been loaded + * before this function or it will be later, we _MUST_ have the + * p->frontswap set to something valid to work properly. + */ + frontswap_map_set(sis, map); + if (frontswap_ops) frontswap_ops->init(type); - } else { + else { BUG_ON(type > MAX_SWAPFILES); set_bit(type, need_init); } - } EXPORT_SYMBOL(__frontswap_init); diff --git a/mm/swapfile.c b/mm/swapfile.c index d417efddfe74..6c340d908b27 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1509,8 +1509,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) } static void _enable_swap_info(struct swap_info_struct *p, int prio, - unsigned char *swap_map, - unsigned long *frontswap_map) + unsigned char *swap_map) { int i, prev; @@ -1519,7 +1518,6 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, else p->prio = --least_priority; p->swap_map = swap_map; - frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; atomic_long_add(p->pages, &nr_swap_pages); total_swap_pages += p->pages; @@ -1542,10 +1540,10 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, unsigned char *swap_map, unsigned long *frontswap_map) { + frontswap_init(p->type, frontswap_map); spin_lock(&swap_lock); spin_lock(&p->lock); - _enable_swap_info(p, prio, swap_map, frontswap_map); - frontswap_init(p->type); + _enable_swap_info(p, prio, swap_map); spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1554,7 +1552,7 @@ static void reinsert_swap_info(struct swap_info_struct *p) { spin_lock(&swap_lock); spin_lock(&p->lock); - _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); + _enable_swap_info(p, p->prio, p->swap_map); spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1563,6 +1561,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; unsigned char *swap_map; + unsigned long *frontswap_map; struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; @@ -1662,12 +1661,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; - frontswap_invalidate_area(type); + frontswap_map = frontswap_map_get(p); + frontswap_map_set(p, NULL); spin_unlock(&p->lock); spin_unlock(&swap_lock); + frontswap_invalidate_area(type); mutex_unlock(&swapon_mutex); vfree(swap_map); - vfree(frontswap_map_get(p)); + vfree(frontswap_map); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); -- cgit v1.2.3 From 49a9ab815acb8379a2f5fd43abe40038821e8f87 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Tue, 30 Apr 2013 15:26:56 -0700 Subject: mm: cleancache: lazy initialization to allow tmem backends to build/run as modules With the goal of allowing tmem backends (zcache, ramster, Xen tmem) to be built/loaded as modules rather than built-in and enabled by a boot parameter, this patch provides "lazy initialization", allowing backends to register to cleancache even after filesystems were mounted. Calls to init_fs and init_shared_fs are remembered as fake poolids but no real tmem_pools created. On backend registration the fake poolids are mapped to real poolids and respective tmem_pools. Signed-off-by: Stefan Hengelein Signed-off-by: Florian Schmaus Signed-off-by: Andor Daam Signed-off-by: Dan Magenheimer [v1: Minor fixes: used #define for some values and bools] [v2: Removed CLEANCACHE_HAS_LAZY_INIT] [v3: Added more comments, added a lock for [shared_|]fs_poolid_map] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/cleancache.c | 240 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 219 insertions(+), 21 deletions(-) (limited to 'mm') diff --git a/mm/cleancache.c b/mm/cleancache.c index d76ba74be2d0..0cecdbba4bcd 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -45,15 +45,99 @@ static u64 cleancache_puts; static u64 cleancache_invalidates; /* - * register operations for cleancache, returning previous thus allowing - * detection of multiple backends and possible nesting + * When no backend is registered all calls to init_fs and init_shared_fs + * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or + * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array + * [shared_|]fs_poolid_map) are given to the respective super block + * (sb->cleancache_poolid) and no tmem_pools are created. When a backend + * registers with cleancache the previous calls to init_fs and init_shared_fs + * are executed to create tmem_pools and set the respective poolids. While no + * backend is registered all "puts", "gets" and "flushes" are ignored or failed. + */ +#define MAX_INITIALIZABLE_FS 32 +#define FAKE_FS_POOLID_OFFSET 1000 +#define FAKE_SHARED_FS_POOLID_OFFSET 2000 + +#define FS_NO_BACKEND (-1) +#define FS_UNKNOWN (-2) +static int fs_poolid_map[MAX_INITIALIZABLE_FS]; +static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS]; +static char *uuids[MAX_INITIALIZABLE_FS]; +/* + * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads + * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple + * threads calling mount (and ending up in __cleancache_init_[shared|]fs). + */ +static DEFINE_MUTEX(poolid_mutex); +/* + * When set to false (default) all calls to the cleancache functions, except + * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded + * by the if (!backend_registered) return. This means multiple threads (from + * different filesystems) will be checking backend_registered. The usage of a + * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are + * OK if the time between the backend's have been initialized (and + * backend_registered has been set to true) and when the filesystems start + * actually calling the backends. The inverse (when unloading) is obviously + * not good - but this shim does not do that (yet). + */ +static bool backend_registered __read_mostly; + +/* + * The backends and filesystems work all asynchronously. This is b/c the + * backends can be built as modules. + * The usual sequence of events is: + * a) mount / -> __cleancache_init_fs is called. We set the + * [shared_|]fs_poolid_map and uuids for. + * + * b). user does I/Os -> we call the rest of __cleancache_* functions + * which return immediately as backend_registered is false. + * + * c). modprobe zcache -> cleancache_register_ops. We init the backend + * and set backend_registered to true, and for any fs_poolid_map + * (which is set by __cleancache_init_fs) we initialize the poolid. + * + * d). user does I/Os -> now that backend_registered is true all the + * __cleancache_* functions can call the backend. They all check + * that fs_poolid_map is valid and if so invoke the backend. + * + * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is + * reset (which is the second check in the __cleancache_* ops + * to call the backend). + * + * The sequence of event could also be c), followed by a), and d). and e). The + * c) would not happen anymore. There is also the chance of c), and one thread + * doing a) + d), and another doing e). For that case we depend on the + * filesystem calling __cleancache_invalidate_fs in the proper sequence (so + * that it handles all I/Os before it invalidates the fs (which is last part + * of unmounting process). + * + * Note: The acute reader will notice that there is no "rmmod zcache" case. + * This is b/c the functionality for that is not yet implemented and when + * done, will require some extra locking not yet devised. + */ + +/* + * Register operations for cleancache, returning previous thus allowing + * detection of multiple backends and possible nesting. */ struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) { struct cleancache_ops old = cleancache_ops; + int i; + mutex_lock(&poolid_mutex); cleancache_ops = *ops; - cleancache_enabled = 1; + + backend_registered = true; + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (fs_poolid_map[i] == FS_NO_BACKEND) + fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + if (shared_fs_poolid_map[i] == FS_NO_BACKEND) + shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + (uuids[i], PAGE_SIZE); + } +out: + mutex_unlock(&poolid_mutex); return old; } EXPORT_SYMBOL(cleancache_register_ops); @@ -61,15 +145,42 @@ EXPORT_SYMBOL(cleancache_register_ops); /* Called by a cleancache-enabled filesystem at time of mount */ void __cleancache_init_fs(struct super_block *sb) { - sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); + int i; + + mutex_lock(&poolid_mutex); + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (fs_poolid_map[i] == FS_UNKNOWN) { + sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; + if (backend_registered) + fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + else + fs_poolid_map[i] = FS_NO_BACKEND; + break; + } + } + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_init_fs); /* Called by a cleancache-enabled clustered filesystem at time of mount */ void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) { - sb->cleancache_poolid = - (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); + int i; + + mutex_lock(&poolid_mutex); + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (shared_fs_poolid_map[i] == FS_UNKNOWN) { + sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; + uuids[i] = uuid; + if (backend_registered) + shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + (uuid, PAGE_SIZE); + else + shared_fs_poolid_map[i] = FS_NO_BACKEND; + break; + } + } + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_init_shared_fs); @@ -98,28 +209,54 @@ static int cleancache_get_key(struct inode *inode, return 0; } +/* + * Returns a pool_id that is associated with a given fake poolid. + */ +static int get_poolid_from_fake(int fake_pool_id) +{ + if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) + return shared_fs_poolid_map[fake_pool_id - + FAKE_SHARED_FS_POOLID_OFFSET]; + else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) + return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET]; + return FS_NO_BACKEND; +} + /* * "Get" data from cleancache associated with the poolid/inode/index * that were specified when the data was put to cleanache and, if * successful, use it to fill the specified page with data and return 0. * The pageframe is unchanged and returns -1 if the get fails. * Page must be locked by caller. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ int __cleancache_get_page(struct page *page) { int ret = -1; int pool_id; + int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!backend_registered) { + cleancache_failed_gets++; + goto out; + } + VM_BUG_ON(!PageLocked(page)); - pool_id = page->mapping->host->i_sb->cleancache_poolid; - if (pool_id < 0) + fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (fake_pool_id < 0) goto out; + pool_id = get_poolid_from_fake(fake_pool_id); if (cleancache_get_key(page->mapping->host, &key) < 0) goto out; - ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); + if (pool_id >= 0) + ret = (*cleancache_ops.get_page)(pool_id, + key, page->index, page); if (ret == 0) cleancache_succ_gets++; else @@ -134,16 +271,31 @@ EXPORT_SYMBOL(__cleancache_get_page); * (previously-obtained per-filesystem) poolid and the page's, * inode and page index. Page must be locked. Note that a put_page * always "succeeds", though a subsequent get_page may succeed or fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_put_page(struct page *page) { int pool_id; + int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!backend_registered) { + cleancache_puts++; + return; + } + VM_BUG_ON(!PageLocked(page)); - pool_id = page->mapping->host->i_sb->cleancache_poolid; + fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (fake_pool_id < 0) + return; + + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id >= 0 && - cleancache_get_key(page->mapping->host, &key) >= 0) { + cleancache_get_key(page->mapping->host, &key) >= 0) { (*cleancache_ops.put_page)(pool_id, key, page->index, page); cleancache_puts++; } @@ -153,19 +305,31 @@ EXPORT_SYMBOL(__cleancache_put_page); /* * Invalidate any data from cleancache associated with the poolid and the * page's inode and page index so that a subsequent "get" will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_invalidate_page(struct address_space *mapping, struct page *page) { /* careful... page->mapping is NULL sometimes when this is called */ - int pool_id = mapping->host->i_sb->cleancache_poolid; + int pool_id; + int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (pool_id >= 0) { + if (!backend_registered) + return; + + if (fake_pool_id >= 0) { + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id < 0) + return; + VM_BUG_ON(!PageLocked(page)); if (cleancache_get_key(mapping->host, &key) >= 0) { (*cleancache_ops.invalidate_page)(pool_id, - key, page->index); + key, page->index); cleancache_invalidates++; } } @@ -176,12 +340,25 @@ EXPORT_SYMBOL(__cleancache_invalidate_page); * Invalidate all data from cleancache associated with the poolid and the * mappings's inode so that all subsequent gets to this poolid/inode * will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_invalidate_inode(struct address_space *mapping) { - int pool_id = mapping->host->i_sb->cleancache_poolid; + int pool_id; + int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!backend_registered) + return; + + if (fake_pool_id < 0) + return; + + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) (*cleancache_ops.invalidate_inode)(pool_id, key); } @@ -189,21 +366,37 @@ EXPORT_SYMBOL(__cleancache_invalidate_inode); /* * Called by any cleancache-enabled filesystem at time of unmount; - * note that pool_id is surrendered and may be reutrned by a subsequent - * cleancache_init_fs or cleancache_init_shared_fs + * note that pool_id is surrendered and may be returned by a subsequent + * cleancache_init_fs or cleancache_init_shared_fs. */ void __cleancache_invalidate_fs(struct super_block *sb) { - if (sb->cleancache_poolid >= 0) { - int old_poolid = sb->cleancache_poolid; - sb->cleancache_poolid = -1; - (*cleancache_ops.invalidate_fs)(old_poolid); + int index; + int fake_pool_id = sb->cleancache_poolid; + int old_poolid = fake_pool_id; + + mutex_lock(&poolid_mutex); + if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) { + index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET; + old_poolid = shared_fs_poolid_map[index]; + shared_fs_poolid_map[index] = FS_UNKNOWN; + uuids[index] = NULL; + } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) { + index = fake_pool_id - FAKE_FS_POOLID_OFFSET; + old_poolid = fs_poolid_map[index]; + fs_poolid_map[index] = FS_UNKNOWN; } + sb->cleancache_poolid = -1; + if (backend_registered) + (*cleancache_ops.invalidate_fs)(old_poolid); + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_invalidate_fs); static int __init init_cleancache(void) { + int i; + #ifdef CONFIG_DEBUG_FS struct dentry *root = debugfs_create_dir("cleancache", NULL); if (root == NULL) @@ -215,6 +408,11 @@ static int __init init_cleancache(void) debugfs_create_u64("invalidates", S_IRUGO, root, &cleancache_invalidates); #endif + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + fs_poolid_map[i] = FS_UNKNOWN; + shared_fs_poolid_map[i] = FS_UNKNOWN; + } + cleancache_enabled = 1; return 0; } module_init(init_cleancache) -- cgit v1.2.3 From 833f8662af9659508afc3cb80f09138eade378e2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 30 Apr 2013 15:26:57 -0700 Subject: cleancache: Make cleancache_init use a pointer for the ops Instead of using a backend_registered to determine whether a backend is enabled. This allows us to remove the backend_register check and just do 'if (cleancache_ops)' [v1: Rebase on top of b97c4b430b0a (ramster->zcache move] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/zcache/zcache-main.c | 8 ++--- drivers/xen/tmem.c | 6 ++-- include/linux/cleancache.h | 2 +- mm/cleancache.c | 62 +++++++++++++++++++----------------- 4 files changed, 40 insertions(+), 38 deletions(-) (limited to 'mm') diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 09c69c8026f9..6bd4ebb3494d 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1576,9 +1576,9 @@ static struct cleancache_ops zcache_cleancache_ops = { .init_fs = zcache_cleancache_init_fs }; -struct cleancache_ops zcache_cleancache_register_ops(void) +struct cleancache_ops *zcache_cleancache_register_ops(void) { - struct cleancache_ops old_ops = + struct cleancache_ops *old_ops = cleancache_register_ops(&zcache_cleancache_ops); return old_ops; @@ -1860,7 +1860,7 @@ static int __init zcache_init(void) } zbud_init(); if (zcache_enabled && !disable_cleancache) { - struct cleancache_ops old_ops; + struct cleancache_ops *old_ops; register_shrinker(&zcache_shrinker); old_ops = zcache_cleancache_register_ops(); @@ -1870,7 +1870,7 @@ static int __init zcache_init(void) pr_info("%s: cleancache: ignorenonactive = %d\n", namestr, !disable_cleancache_ignore_nonactive); #endif - if (old_ops.init_fs != NULL) + if (old_ops != NULL) pr_warn("%s: cleancache_ops overridden\n", namestr); } if (zcache_enabled && !disable_frontswap) { diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 7a01a5fd0f63..fd79eab08368 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -236,7 +236,7 @@ static int __init no_cleancache(char *s) } __setup("nocleancache", no_cleancache); -static struct cleancache_ops __initdata tmem_cleancache_ops = { +static struct cleancache_ops tmem_cleancache_ops = { .put_page = tmem_cleancache_put_page, .get_page = tmem_cleancache_get_page, .invalidate_page = tmem_cleancache_flush_page, @@ -392,9 +392,9 @@ static int __init xen_tmem_init(void) BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - struct cleancache_ops old_ops = + struct cleancache_ops *old_ops = cleancache_register_ops(&tmem_cleancache_ops); - if (old_ops.init_fs != NULL) + if (old_ops) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 42e55deee757..3af5ea839558 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -33,7 +33,7 @@ struct cleancache_ops { void (*invalidate_fs)(int); }; -extern struct cleancache_ops +extern struct cleancache_ops * cleancache_register_ops(struct cleancache_ops *ops); extern void __cleancache_init_fs(struct super_block *); extern void __cleancache_init_shared_fs(char *, struct super_block *); diff --git a/mm/cleancache.c b/mm/cleancache.c index 0cecdbba4bcd..b3ae19b72035 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -32,7 +32,7 @@ EXPORT_SYMBOL(cleancache_enabled); * cleancache_ops is set by cleancache_ops_register to contain the pointers * to the cleancache "backend" implementation functions. */ -static struct cleancache_ops cleancache_ops __read_mostly; +static struct cleancache_ops *cleancache_ops __read_mostly; /* * Counters available via /sys/kernel/debug/frontswap (if debugfs is @@ -72,15 +72,14 @@ static DEFINE_MUTEX(poolid_mutex); /* * When set to false (default) all calls to the cleancache functions, except * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded - * by the if (!backend_registered) return. This means multiple threads (from - * different filesystems) will be checking backend_registered. The usage of a + * by the if (!cleancache_ops) return. This means multiple threads (from + * different filesystems) will be checking cleancache_ops. The usage of a * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are * OK if the time between the backend's have been initialized (and - * backend_registered has been set to true) and when the filesystems start + * cleancache_ops has been set to not NULL) and when the filesystems start * actually calling the backends. The inverse (when unloading) is obviously * not good - but this shim does not do that (yet). */ -static bool backend_registered __read_mostly; /* * The backends and filesystems work all asynchronously. This is b/c the @@ -90,13 +89,13 @@ static bool backend_registered __read_mostly; * [shared_|]fs_poolid_map and uuids for. * * b). user does I/Os -> we call the rest of __cleancache_* functions - * which return immediately as backend_registered is false. + * which return immediately as cleancache_ops is false. * * c). modprobe zcache -> cleancache_register_ops. We init the backend - * and set backend_registered to true, and for any fs_poolid_map + * and set cleancache_ops to true, and for any fs_poolid_map * (which is set by __cleancache_init_fs) we initialize the poolid. * - * d). user does I/Os -> now that backend_registered is true all the + * d). user does I/Os -> now that cleancache_ops is true all the * __cleancache_* functions can call the backend. They all check * that fs_poolid_map is valid and if so invoke the backend. * @@ -120,23 +119,26 @@ static bool backend_registered __read_mostly; * Register operations for cleancache, returning previous thus allowing * detection of multiple backends and possible nesting. */ -struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) +struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops) { - struct cleancache_ops old = cleancache_ops; + struct cleancache_ops *old = cleancache_ops; int i; mutex_lock(&poolid_mutex); - cleancache_ops = *ops; - - backend_registered = true; for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { if (fs_poolid_map[i] == FS_NO_BACKEND) - fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + fs_poolid_map[i] = ops->init_fs(PAGE_SIZE); if (shared_fs_poolid_map[i] == FS_NO_BACKEND) - shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + shared_fs_poolid_map[i] = ops->init_shared_fs (uuids[i], PAGE_SIZE); } -out: + /* + * We MUST set cleancache_ops _after_ we have called the backends + * init_fs or init_shared_fs functions. Otherwise the compiler might + * re-order where cleancache_ops is set in this function. + */ + barrier(); + cleancache_ops = ops; mutex_unlock(&poolid_mutex); return old; } @@ -151,8 +153,8 @@ void __cleancache_init_fs(struct super_block *sb) for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { if (fs_poolid_map[i] == FS_UNKNOWN) { sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; - if (backend_registered) - fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + if (cleancache_ops) + fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE); else fs_poolid_map[i] = FS_NO_BACKEND; break; @@ -172,8 +174,8 @@ void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) if (shared_fs_poolid_map[i] == FS_UNKNOWN) { sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; uuids[i] = uuid; - if (backend_registered) - shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + if (cleancache_ops) + shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs (uuid, PAGE_SIZE); else shared_fs_poolid_map[i] = FS_NO_BACKEND; @@ -240,7 +242,7 @@ int __cleancache_get_page(struct page *page) int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) { + if (!cleancache_ops) { cleancache_failed_gets++; goto out; } @@ -255,7 +257,7 @@ int __cleancache_get_page(struct page *page) goto out; if (pool_id >= 0) - ret = (*cleancache_ops.get_page)(pool_id, + ret = cleancache_ops->get_page(pool_id, key, page->index, page); if (ret == 0) cleancache_succ_gets++; @@ -282,7 +284,7 @@ void __cleancache_put_page(struct page *page) int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) { + if (!cleancache_ops) { cleancache_puts++; return; } @@ -296,7 +298,7 @@ void __cleancache_put_page(struct page *page) if (pool_id >= 0 && cleancache_get_key(page->mapping->host, &key) >= 0) { - (*cleancache_ops.put_page)(pool_id, key, page->index, page); + cleancache_ops->put_page(pool_id, key, page->index, page); cleancache_puts++; } } @@ -318,7 +320,7 @@ void __cleancache_invalidate_page(struct address_space *mapping, int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) + if (!cleancache_ops) return; if (fake_pool_id >= 0) { @@ -328,7 +330,7 @@ void __cleancache_invalidate_page(struct address_space *mapping, VM_BUG_ON(!PageLocked(page)); if (cleancache_get_key(mapping->host, &key) >= 0) { - (*cleancache_ops.invalidate_page)(pool_id, + cleancache_ops->invalidate_page(pool_id, key, page->index); cleancache_invalidates++; } @@ -351,7 +353,7 @@ void __cleancache_invalidate_inode(struct address_space *mapping) int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) + if (!cleancache_ops) return; if (fake_pool_id < 0) @@ -360,7 +362,7 @@ void __cleancache_invalidate_inode(struct address_space *mapping) pool_id = get_poolid_from_fake(fake_pool_id); if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) - (*cleancache_ops.invalidate_inode)(pool_id, key); + cleancache_ops->invalidate_inode(pool_id, key); } EXPORT_SYMBOL(__cleancache_invalidate_inode); @@ -387,8 +389,8 @@ void __cleancache_invalidate_fs(struct super_block *sb) fs_poolid_map[index] = FS_UNKNOWN; } sb->cleancache_poolid = -1; - if (backend_registered) - (*cleancache_ops.invalidate_fs)(old_poolid); + if (cleancache_ops) + cleancache_ops->invalidate_fs(old_poolid); mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_invalidate_fs); -- cgit v1.2.3 From ff610a1d55da22bf95bbc6a8b193e052169b34b7 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 30 Apr 2013 15:26:58 -0700 Subject: mm: cleancache: clean up cleancache_enabled cleancache_ops is used to decide whether backend is registered. So now cleancache_enabled is always true if defined CONFIG_CLEANCACHE. Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Konrad Rzeszutek Wilk Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cleancache.h | 2 +- mm/cleancache.c | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'mm') diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 3af5ea839558..4ce9056b31a8 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -42,9 +42,9 @@ extern void __cleancache_put_page(struct page *); extern void __cleancache_invalidate_page(struct address_space *, struct page *); extern void __cleancache_invalidate_inode(struct address_space *); extern void __cleancache_invalidate_fs(struct super_block *); -extern int cleancache_enabled; #ifdef CONFIG_CLEANCACHE +#define cleancache_enabled (1) static inline bool cleancache_fs_enabled(struct page *page) { return page->mapping->host->i_sb->cleancache_poolid >= 0; diff --git a/mm/cleancache.c b/mm/cleancache.c index b3ae19b72035..5875f48ce279 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -18,16 +18,6 @@ #include #include -/* - * This global enablement flag may be read thousands of times per second - * by cleancache_get/put/invalidate even on systems where cleancache_ops - * is not claimed (e.g. cleancache is config'ed on but remains - * disabled), so is preferred to the slower alternative: a function - * call that checks a non-global. - */ -int cleancache_enabled __read_mostly; -EXPORT_SYMBOL(cleancache_enabled); - /* * cleancache_ops is set by cleancache_ops_register to contain the pointers * to the cleancache "backend" implementation functions. @@ -414,7 +404,6 @@ static int __init init_cleancache(void) fs_poolid_map[i] = FS_UNKNOWN; shared_fs_poolid_map[i] = FS_UNKNOWN; } - cleancache_enabled = 1; return 0; } module_init(init_cleancache) -- cgit v1.2.3