diff options
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/commit.c | 10 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 38 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 80 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 21 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 21 |
5 files changed, 93 insertions, 77 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index e8e80761ac73..1c7c49356878 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -57,8 +57,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) * So here, we have a buffer which has just come off the forget list. Look to * see if we can strip all buffers from the backing page. * - * Called under lock_journal(), and possibly under journal_datalist_lock. The - * caller provided us with a ref against the buffer, and we drop that here. + * Called under j_list_lock. The caller provided us with a ref against the + * buffer, and we drop that here. */ static void release_buffer_page(struct buffer_head *bh) { @@ -738,10 +738,8 @@ start_journal_io: err = journal_finish_inode_data_buffers(journal, commit_transaction); if (err) { printk(KERN_WARNING - "JBD2: Detected IO errors while flushing file data " - "on %s\n", journal->j_devname); - if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) - jbd2_journal_abort(journal, err); + "JBD2: Detected IO errors %d while flushing file data on %s\n", + err, journal->j_devname); err = 0; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index d8084b31b361..743a1d7633cd 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -197,7 +197,7 @@ loop: if (journal->j_commit_sequence != journal->j_commit_request) { jbd2_debug(1, "OK, requests differ\n"); write_unlock(&journal->j_state_lock); - del_timer_sync(&journal->j_commit_timer); + timer_delete_sync(&journal->j_commit_timer); jbd2_journal_commit_transaction(journal); write_lock(&journal->j_state_lock); goto loop; @@ -246,7 +246,7 @@ loop: goto loop; end_loop: - del_timer_sync(&journal->j_commit_timer); + timer_delete_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); jbd2_debug(1, "Journal thread exiting.\n"); @@ -603,7 +603,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid) { int ret = 0; - transaction_t *commit_trans; + transaction_t *commit_trans, *running_trans; if (!(journal->j_flags & JBD2_BARRIER)) return 0; @@ -613,6 +613,16 @@ int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid) goto out; commit_trans = journal->j_committing_transaction; if (!commit_trans || commit_trans->t_tid != tid) { + running_trans = journal->j_running_transaction; + /* + * The query transaction hasn't started committing, + * it must still be running. + */ + if (WARN_ON_ONCE(!running_trans || + running_trans->t_tid != tid)) + goto out; + + running_trans->t_need_data_flush = 1; ret = 1; goto out; } @@ -947,7 +957,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, * descriptor blocks we do need to generate bona fide buffers. * * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying - * the buffer's contents they really should run flush_dcache_page(bh->b_page). + * the buffer's contents they really should run flush_dcache_folio(bh->b_folio). * But we don't bother doing that, so there will be coherency problems with * mmaps of blockdevs which hold live JBD-controlled filesystems. */ @@ -1361,7 +1371,7 @@ static int journal_check_superblock(journal_t *journal) return err; } - if (jbd2_journal_has_csum_v2or3_feature(journal) && + if (jbd2_journal_has_csum_v2or3(journal) && jbd2_has_feature_checksum(journal)) { /* Can't have checksum v1 and v2 on at the same time! */ printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " @@ -1369,7 +1379,7 @@ static int journal_check_superblock(journal_t *journal) return err; } - if (jbd2_journal_has_csum_v2or3_feature(journal)) { + if (jbd2_journal_has_csum_v2or3(journal)) { if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) { printk(KERN_ERR "JBD2: Unknown checksum type\n"); return err; @@ -1869,7 +1879,6 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, /* Log is no longer empty */ write_lock(&journal->j_state_lock); - WARN_ON(!sb->s_sequence); journal->j_flags &= ~JBD2_FLUSHED; write_unlock(&journal->j_state_lock); @@ -1965,17 +1974,15 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) return err; } - if (block_start == ~0ULL) { - block_start = phys_block; - block_stop = block_start - 1; - } + if (block_start == ~0ULL) + block_stop = block_start = phys_block; /* * last block not contiguous with current block, * process last contiguous region and return to this block on * next loop */ - if (phys_block != block_stop + 1) { + if (phys_block != block_stop) { block--; } else { block_stop++; @@ -1994,11 +2001,10 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) */ byte_start = block_start * journal->j_blocksize; byte_stop = block_stop * journal->j_blocksize; - byte_count = (block_stop - block_start + 1) * - journal->j_blocksize; + byte_count = (block_stop - block_start) * journal->j_blocksize; truncate_inode_pages_range(journal->j_dev->bd_mapping, - byte_start, byte_stop); + byte_start, byte_stop - 1); if (flags & JBD2_JOURNAL_FLUSH_DISCARD) { err = blkdev_issue_discard(journal->j_dev, @@ -2013,7 +2019,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) } if (unlikely(err != 0)) { - pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu", + pr_err("JBD2: (error %d) unable to wipe journal at physical blocks [%llu, %llu)", err, block_start, block_stop); return err; } diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 9192be7c19d8..c271a050b7e6 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -39,7 +39,7 @@ struct recovery_info static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass); -static int scan_revoke_records(journal_t *, struct buffer_head *, +static int scan_revoke_records(journal_t *, enum passtype, struct buffer_head *, tid_t, struct recovery_info *); #ifdef __KERNEL__ @@ -65,9 +65,8 @@ static void journal_brelse_array(struct buffer_head *b[], int n) */ #define MAXBUF 8 -static int do_readahead(journal_t *journal, unsigned int start) +static void do_readahead(journal_t *journal, unsigned int start) { - int err; unsigned int max, nbufs, next; unsigned long long blocknr; struct buffer_head *bh; @@ -85,7 +84,7 @@ static int do_readahead(journal_t *journal, unsigned int start) nbufs = 0; for (next = start; next < max; next++) { - err = jbd2_journal_bmap(journal, next, &blocknr); + int err = jbd2_journal_bmap(journal, next, &blocknr); if (err) { printk(KERN_ERR "JBD2: bad block at offset %u\n", @@ -94,10 +93,8 @@ static int do_readahead(journal_t *journal, unsigned int start) } bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - if (!bh) { - err = -ENOMEM; + if (!bh) goto failed; - } if (!buffer_uptodate(bh) && !buffer_locked(bh)) { bufs[nbufs++] = bh; @@ -112,12 +109,10 @@ static int do_readahead(journal_t *journal, unsigned int start) if (nbufs) bh_readahead_batch(nbufs, bufs, 0); - err = 0; failed: if (nbufs) journal_brelse_array(bufs, nbufs); - return err; } #endif /* __KERNEL__ */ @@ -287,19 +282,20 @@ static int fc_do_one_pass(journal_t *journal, int jbd2_journal_recover(journal_t *journal) { int err, err2; - journal_superblock_t * sb; - struct recovery_info info; memset(&info, 0, sizeof(info)); - sb = journal->j_superblock; /* * The journal superblock's s_start field (the current log head) * is always zero if, and only if, the journal was cleanly - * unmounted. + * unmounted. We use its in-memory version j_tail here because + * jbd2_journal_wipe() could have updated it without updating journal + * superblock. */ - if (!sb->s_start) { + if (!journal->j_tail) { + journal_superblock_t *sb = journal->j_superblock; + jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n", be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head)); journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; @@ -327,6 +323,12 @@ int jbd2_journal_recover(journal_t *journal) journal->j_transaction_sequence, journal->j_head); jbd2_journal_clear_revoke(journal); + /* Free revoke table allocated for replay */ + if (journal->j_revoke != journal->j_revoke_table[0] && + journal->j_revoke != journal->j_revoke_table[1]) { + jbd2_journal_destroy_revoke_table(journal->j_revoke); + journal->j_revoke = journal->j_revoke_table[1]; + } err2 = sync_blockdev(journal->j_fs_dev); if (!err) err = err2; @@ -612,6 +614,31 @@ static int do_one_pass(journal_t *journal, first_commit_ID = next_commit_ID; if (pass == PASS_SCAN) info->start_transaction = first_commit_ID; + else if (pass == PASS_REVOKE) { + /* + * Would the default revoke table have too long hash chains + * during replay? + */ + if (info->nr_revokes > JOURNAL_REVOKE_DEFAULT_HASH * 16) { + unsigned int hash_size; + + /* + * Aim for average chain length of 8, limit at 1M + * entries to avoid problems with malicious + * filesystems. + */ + hash_size = min(roundup_pow_of_two(info->nr_revokes / 8), + 1U << 20); + journal->j_revoke = + jbd2_journal_init_revoke_table(hash_size); + if (!journal->j_revoke) { + printk(KERN_ERR + "JBD2: failed to allocate revoke table for replay with %u entries. " + "Journal replay may be slow.\n", hash_size); + journal->j_revoke = journal->j_revoke_table[1]; + } + } + } jbd2_debug(1, "Starting recovery pass %d\n", pass); @@ -852,6 +879,13 @@ chksum_ok: case JBD2_REVOKE_BLOCK: /* + * If we aren't in the SCAN or REVOKE pass, then we can + * just skip over this block. + */ + if (pass != PASS_REVOKE && pass != PASS_SCAN) + continue; + + /* * Check revoke block crc in pass_scan, if csum verify * failed, check commit block time later. */ @@ -863,12 +897,7 @@ chksum_ok: need_check_commit_time = true; } - /* If we aren't in the REVOKE pass, then we can - * just skip over this block. */ - if (pass != PASS_REVOKE) - continue; - - err = scan_revoke_records(journal, bh, + err = scan_revoke_records(journal, pass, bh, next_commit_ID, info); if (err) goto failed; @@ -922,8 +951,9 @@ chksum_ok: /* Scan a revoke record, marking all blocks mentioned as revoked. */ -static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, - tid_t sequence, struct recovery_info *info) +static int scan_revoke_records(journal_t *journal, enum passtype pass, + struct buffer_head *bh, tid_t sequence, + struct recovery_info *info) { jbd2_journal_revoke_header_t *header; int offset, max; @@ -944,6 +974,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, if (jbd2_has_feature_64bit(journal)) record_len = 8; + if (pass == PASS_SCAN) { + info->nr_revokes += (max - offset) / record_len; + return 0; + } + while (offset + record_len <= max) { unsigned long long blocknr; int err; @@ -956,7 +991,6 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, err = jbd2_journal_set_revoke(journal, blocknr, sequence); if (err) return err; - ++info->nr_revokes; } return 0; } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index ce63d5fde9c3..0cf0fddbee81 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -215,7 +215,7 @@ int __init jbd2_journal_init_revoke_table_cache(void) return 0; } -static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) +struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) { int shift = 0; int tmp = hash_size; @@ -231,7 +231,7 @@ static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) table->hash_size = hash_size; table->hash_shift = shift; table->hash_table = - kmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL); + kvmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL); if (!table->hash_table) { kmem_cache_free(jbd2_revoke_table_cache, table); table = NULL; @@ -245,7 +245,7 @@ out: return table; } -static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) +void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) { int i; struct list_head *hash_list; @@ -255,7 +255,7 @@ static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) J_ASSERT(list_empty(hash_list)); } - kfree(table->hash_table); + kvfree(table->hash_table); kmem_cache_free(jbd2_revoke_table_cache, table); } @@ -420,12 +420,11 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, * do not trust the Revoked bit on buffers unless RevokeValid is also * set. */ -int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) +void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) { struct jbd2_revoke_record_s *record; journal_t *journal = handle->h_transaction->t_journal; int need_cancel; - int did_revoke = 0; /* akpm: debug */ struct buffer_head *bh = jh2bh(jh); jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh); @@ -450,7 +449,6 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) list_del(&record->hash); spin_unlock(&journal->j_revoke_lock); kmem_cache_free(jbd2_revoke_record_cache, record); - did_revoke = 1; } } @@ -473,11 +471,10 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) __brelse(bh2); } } - return did_revoke; } /* - * journal_clear_revoked_flag clears revoked flag of buffers in + * jbd2_clear_buffer_revoked_flags clears revoked flag of buffers in * revoke table to reflect there is no revoked buffers in the next * transaction which is going to be started. */ @@ -506,9 +503,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal) } } -/* journal_switch_revoke table select j_revoke for next transaction - * we do not want to suspend any processing until all revokes are - * written -bzzz +/* jbd2_journal_switch_revoke_table table select j_revoke for next + * transaction we do not want to suspend any processing until all + * revokes are written -bzzz */ void jbd2_journal_switch_revoke_table(journal_t *journal) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 66513c18ca29..cbc4785462f5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -92,7 +92,6 @@ static void jbd2_get_transaction(journal_t *journal, atomic_set(&transaction->t_outstanding_revokes, 0); atomic_set(&transaction->t_handle_count, 0); INIT_LIST_HEAD(&transaction->t_inode_list); - INIT_LIST_HEAD(&transaction->t_private_list); /* Set up the commit timer for the new transaction. */ journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires); @@ -114,12 +113,9 @@ static void jbd2_get_transaction(journal_t *journal, */ /* - * Update transaction's maximum wait time, if debugging is enabled. - * * t_max_wait is carefully updated here with use of atomic compare exchange. * Note that there could be multiplre threads trying to do this simultaneously * hence using cmpxchg to avoid any use of locks in this case. - * With this t_max_wait can be updated w/o enabling jbd2_journal_enable_debug. */ static inline void update_t_max_wait(transaction_t *transaction, unsigned long ts) @@ -2079,21 +2075,6 @@ static void __jbd2_journal_unfile_buffer(struct journal_head *jh) jh->b_transaction = NULL; } -void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) -{ - struct buffer_head *bh = jh2bh(jh); - - /* Get reference so that buffer cannot be freed before we unlock it */ - get_bh(bh); - spin_lock(&jh->b_state_lock); - spin_lock(&journal->j_list_lock); - __jbd2_journal_unfile_buffer(jh); - spin_unlock(&journal->j_list_lock); - spin_unlock(&jh->b_state_lock); - jbd2_journal_put_journal_head(jh); - __brelse(bh); -} - /** * jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation @@ -2192,7 +2173,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) /* * We don't want to write the buffer anymore, clear the * bit so that we don't confuse checks in - * __journal_file_buffer + * __jbd2_journal_file_buffer */ clear_buffer_dirty(bh); __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); |