writeback: implement unlocked_inode_to_wb transaction and use it for stat updates

The mechanism for detecting whether an inode should switch its wb (bdi_writeback) association is now in place. This patch build the framework for the actual switching. This patch adds a new inode flag I_WB_SWITCHING, which has two functions. First, the easy one, it ensures that there's only one switching in progress for a give inode. Second, it's used as a mechanism to synchronize wb stat updates. The two stats, WB_RECLAIMABLE and WB_WRITEBACK, aren't event counters but track the current number of dirty pages and pages under writeback respectively. As such, when an inode is moved from one wb to another, the inode's portion of those stats have to be transferred together; unfortunately, this is a bit tricky as those stat updates are percpu operations which are performed without holding any lock in some places. This patch solves the problem in a similar way as memcg. Each such lockless stat updates are wrapped in transaction surrounded by unlocked_inode_to_wb_begin/end(). During normal operation, they map to rcu_read_lock/unlock(); however, if I_WB_SWITCHING is asserted, mapping->tree_lock is grabbed across the transaction. In turn, the switching path sets I_WB_SWITCHING and waits for a RCU grace period to pass before actually starting to switch, which guarantees that all stat update paths are synchronizing against mapping->tree_lock. This patch still doesn't implement the actual switching. v3: Updated on top of the recent cancel_dirty_page() updates. unlocked_inode_to_wb_begin() now nests inside mem_cgroup_begin_page_stat() to match the locking order. v2: The i_wb access transaction will be used for !stat accesses too. Function names and comments updated accordingly. s/inode_wb_stat_unlocked_{begin|end}/unlocked_inode_to_wb_{begin|end}/ s/switch_wb/switch_wbs/ Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Jan Kara <jack@suse.cz> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Greg Thelen <gthelen@google.com> Signed-off-by: Jens Axboe <axboe@fb.com>
author: Tejun Heo <tj@kernel.org> 2015-05-28 14:50:53 -0400
committer: Jens Axboe <axboe@fb.com> 2015-06-02 08:40:20 -0600
commit: 682aa8e1a6a1504a4caaa62e6c2c9daae3757210 (patch)
tree: d2471a86d2a286ef8076f629f9709310d68f4e95 /mm/page-writeback.c
parent: 87e1d789bf55b12fa7c1cdce024499aee3bc0af0 (diff)
download: lwn-682aa8e1a6a1504a4caaa62e6c2c9daae3757210.tar.gz
lwn-682aa8e1a6a1504a4caaa62e6c2c9daae3757210.zip
1 files changed, 21 insertions, 6 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e8903356f423..e1514d5b4e9b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2422,12 +2422,12 @@ EXPORT_SYMBOL(account_page_dirtied);
  * Caller must hold mem_cgroup_begin_page_stat().
  */
 void account_page_cleaned(struct page *page, struct address_space *mapping,
-			  struct mem_cgroup *memcg)
+			  struct mem_cgroup *memcg, struct bdi_writeback *wb)
 {
 	if (mapping_cap_account_dirty(mapping)) {
 		mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
 		dec_zone_page_state(page, NR_FILE_DIRTY);
-		dec_wb_stat(inode_to_wb(mapping->host), WB_RECLAIMABLE);
+		dec_wb_stat(wb, WB_RECLAIMABLE);
 		task_io_account_cancelled_write(PAGE_CACHE_SIZE);
 	}
 }
@@ -2490,11 +2490,15 @@ void account_page_redirty(struct page *page)
 	struct address_space *mapping = page->mapping;
 
 	if (mapping && mapping_cap_account_dirty(mapping)) {
-		struct bdi_writeback *wb = inode_to_wb(mapping->host);
+		struct inode *inode = mapping->host;
+		struct bdi_writeback *wb;
+		bool locked;
 
+		wb = unlocked_inode_to_wb_begin(inode, &locked);
 		current->nr_dirtied--;
 		dec_zone_page_state(page, NR_DIRTIED);
 		dec_wb_stat(wb, WB_DIRTIED);
+		unlocked_inode_to_wb_end(inode, locked);
 	}
 }
 EXPORT_SYMBOL(account_page_redirty);
@@ -2597,13 +2601,18 @@ void cancel_dirty_page(struct page *page)
 	struct address_space *mapping = page_mapping(page);
 
 	if (mapping_cap_account_dirty(mapping)) {
+		struct inode *inode = mapping->host;
+		struct bdi_writeback *wb;
 		struct mem_cgroup *memcg;
+		bool locked;
 
 		memcg = mem_cgroup_begin_page_stat(page);
+		wb = unlocked_inode_to_wb_begin(inode, &locked);
 
 		if (TestClearPageDirty(page))
-			account_page_cleaned(page, mapping, memcg);
+			account_page_cleaned(page, mapping, memcg, wb);
 
+		unlocked_inode_to_wb_end(inode, locked);
 		mem_cgroup_end_page_stat(memcg);
 	} else {
 		ClearPageDirty(page);
@@ -2628,12 +2637,16 @@ EXPORT_SYMBOL(cancel_dirty_page);
 int clear_page_dirty_for_io(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
-	struct mem_cgroup *memcg;
 	int ret = 0;
 
 	BUG_ON(!PageLocked(page));
 
 	if (mapping && mapping_cap_account_dirty(mapping)) {
+		struct inode *inode = mapping->host;
+		struct bdi_writeback *wb;
+		struct mem_cgroup *memcg;
+		bool locked;
+
 		/*
 		 * Yes, Virginia, this is indeed insane.
 		 *
@@ -2670,12 +2683,14 @@ int clear_page_dirty_for_io(struct page *page)
 		 * exclusion.
 		 */
 		memcg = mem_cgroup_begin_page_stat(page);
+		wb = unlocked_inode_to_wb_begin(inode, &locked);
 		if (TestClearPageDirty(page)) {
 			mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
 			dec_zone_page_state(page, NR_FILE_DIRTY);
-			dec_wb_stat(inode_to_wb(mapping->host), WB_RECLAIMABLE);
+			dec_wb_stat(wb, WB_RECLAIMABLE);
 			ret = 1;
 		}
+		unlocked_inode_to_wb_end(inode, locked);
 		mem_cgroup_end_page_stat(memcg);
 		return ret;
 	}
author	Tejun Heo <tj@kernel.org>	2015-05-28 14:50:53 -0400
committer	Jens Axboe <axboe@fb.com>	2015-06-02 08:40:20 -0600
commit	682aa8e1a6a1504a4caaa62e6c2c9daae3757210 (patch)
tree	d2471a86d2a286ef8076f629f9709310d68f4e95 /mm/page-writeback.c
parent	87e1d789bf55b12fa7c1cdce024499aee3bc0af0 (diff)
download	lwn-682aa8e1a6a1504a4caaa62e6c2c9daae3757210.tar.gz lwn-682aa8e1a6a1504a4caaa62e6c2c9daae3757210.zip