diff options
author | Jan Kara <jack@suse.cz> | 2011-05-24 11:52:40 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-05-24 11:52:40 -0400 |
commit | 81be12c8179c1c397d3f179cdd9b3f7146cf47f1 (patch) | |
tree | b535687e88af17fc6b25329decf28563ed519395 | |
parent | b221349fa8b45d13c3650089f0514df7d1eb36c3 (diff) | |
download | lwn-81be12c8179c1c397d3f179cdd9b3f7146cf47f1.tar.gz lwn-81be12c8179c1c397d3f179cdd9b3f7146cf47f1.zip |
jbd2: fix sending of data flush on journal commit
In data=ordered mode, it's theoretically possible (however rare) that
an inode is filed to transaction's t_inode_list and a flusher thread
writes all the data and inode is reclaimed before the transaction
starts to commit. In such a case, we could erroneously omit sending a
flush to file system device when it is different from the journal
device (because data can still be in disk cache only).
Fix the problem by setting a flag in a transaction when some inode is added
to it and then send disk flush in the commit code when the flag is set.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/jbd2/commit.c | 3 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 7 | ||||
-rw-r--r-- | include/linux/jbd2.h | 4 |
3 files changed, 11 insertions, 3 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 78c299218681..2d5095ecc25f 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal, ret = err; spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); - commit_transaction->t_flushed_data_blocks = 1; clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); smp_mb__after_clear_bit(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); @@ -683,7 +682,7 @@ start_journal_io: * then we must flush the file system device before we issue * the commit record */ - if (commit_transaction->t_flushed_data_blocks && + if (commit_transaction->t_need_data_flush && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 85a055ef93fe..20065c9f2479 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2147,6 +2147,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) jinode->i_next_transaction == transaction) goto done; + /* + * We only ever set this variable to 1 so the test is safe. Since + * t_need_data_flush is likely to be set, we do the test to save some + * cacheline bouncing + */ + if (!transaction->t_need_data_flush) + transaction->t_need_data_flush = 1; /* On some different transaction's list - should be * the committing one */ if (jinode->i_transaction) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a32dcaec04e1..4d57955061f4 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -658,7 +658,9 @@ struct transaction_s * waiting for it to finish. */ unsigned int t_synchronous_commit:1; - unsigned int t_flushed_data_blocks:1; + + /* Disk flush needs to be sent to fs partition [no locking] */ + int t_need_data_flush; /* * For use by the filesystem to store fs-specific data |