diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-21 07:46:34 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-21 07:46:34 -0800 |
commit | 9763dd6f8160dc9cc239fc2427c8173073204457 (patch) | |
tree | 651df084655e0a6e71a96dbf4316a74aafcc0108 /fs/gfs2 | |
parent | 70fcf5c339b12743133050842252e20cfd6d42b5 (diff) | |
parent | c548a1c175608e268f6495f3f82461303584d1c9 (diff) | |
download | lwn-9763dd6f8160dc9cc239fc2427c8173073204457.tar.gz lwn-9763dd6f8160dc9cc239fc2427c8173073204457.zip |
Merge tag 'gfs2-4.11.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2
Pull GFS2 updates from Robert Peterson:
"We've got eight GFS2 patches for this merge window:
- Andy Price submitted a patch to make gfs2_write_full_page a static
function.
- Dan Carpenter submitted a patch to fix a ERR_PTR thinko.
Three patches fix bugs related to deleting very large files, which
cause GFS2 to run out of journal space:
- The first one prevents GFS2 delete operation from requesting too
much journal space.
- The second one fixes a problem whereby GFS2 can hang because it
wasn't taking journal space demand into its calculations.
- The third one wakes up IO waiters when a flush is done to restart
processes stuck waiting for journal space to become available.
The final three patches are a performance improvement related to
spin_lock contention between multiple writers:
- The "tr_touched" variable was switched to a flag to be more atomic
and eliminate the possibility of some races.
- Function meta_lo_add was moved inline with its only caller to make
the code more readable and efficient.
- Contention on the gfs2_log_lock spinlock was greatly reduced by
avoiding the lock altogether in cases where we don't really need
it: buffers that already appear in the appropriate metadata list
for the journal. Many thanks to Steve Whitehouse for the ideas and
principles behind these patches"
* tag 'gfs2-4.11.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2:
gfs2: Make gfs2_write_full_page static
GFS2: Reduce contention on gfs2_log_lock
GFS2: Inline function meta_lo_add
GFS2: Switch tr_touched to flag in transaction
GFS2: Wake up io waiters whenever a flush is done
GFS2: Made logd daemon take into account log demand
GFS2: Limit number of transaction blocks requested for truncates
GFS2: Fix reference to ERR_PTR in gfs2_glock_iter_next
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/aops.c | 4 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 29 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 12 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 11 | ||||
-rw-r--r-- | fs/gfs2/log.c | 21 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 6 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 1 | ||||
-rw-r--r-- | fs/gfs2/trans.c | 81 |
8 files changed, 105 insertions, 60 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 6b039d7ce160..ed7a2e252ad8 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -143,8 +143,8 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) /* This is the same as calling block_write_full_page, but it also * writes pages outside of i_size */ -int gfs2_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) +static int gfs2_write_full_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc) { struct inode * const inode = page->mapping->host; loff_t i_size = i_size_read(inode); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index fc5da4cbe88c..01b97c012c6e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -720,6 +720,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrp_list rlist; + struct gfs2_trans *tr; u64 bn, bstart; u32 blen, btotal; __be64 *p; @@ -728,6 +729,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, unsigned int revokes = 0; int x; int error; + int jblocks_rqsted; error = gfs2_rindex_update(sdp); if (error) @@ -791,12 +793,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */ gfs2_rs_deltree(&ip->i_res); - error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + - RES_INDIRECT + RES_STATFS + RES_QUOTA, - revokes); +restart: + jblocks_rqsted = rg_blocks + RES_DINODE + + RES_INDIRECT + RES_STATFS + RES_QUOTA + + gfs2_struct2blk(sdp, revokes, sizeof(u64)); + if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2)) + jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2); + error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); if (error) goto out_rg_gunlock; + tr = current->journal_info; down_write(&ip->i_rw_mutex); gfs2_trans_add_meta(ip->i_gl, dibh); @@ -810,6 +817,16 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (!*p) continue; + /* check for max reasonable journal transaction blocks */ + if (tr->tr_num_buf_new + RES_STATFS + + RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { + if (rg_blocks >= tr->tr_num_buf_new) + rg_blocks -= tr->tr_num_buf_new; + else + rg_blocks = 0; + break; + } + bn = be64_to_cpu(*p); if (bstart + blen == bn) @@ -827,6 +844,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, *p = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); } + if (p == bottom) + rg_blocks = 0; + if (bstart) { __gfs2_free_blocks(ip, bstart, blen, metadata); btotal += blen; @@ -844,6 +864,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_trans_end(sdp); + if (rg_blocks) + goto restart; + out_rg_gunlock: gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); out_rlist: diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 94f50cac91c6..a2d45db32cd5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1802,16 +1802,18 @@ void gfs2_glock_exit(void) static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { - do { - gi->gl = rhashtable_walk_next(&gi->hti); + while ((gi->gl = rhashtable_walk_next(&gi->hti))) { if (IS_ERR(gi->gl)) { if (PTR_ERR(gi->gl) == -EAGAIN) continue; gi->gl = NULL; + return; } - /* Skip entries for other sb and dead entries */ - } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) || - __lockref_is_dead(&gi->gl->gl_lockref))); + /* Skip entries for other sb and dead entries */ + if (gi->sdp == gi->gl->gl_name.ln_sbd && + !__lockref_is_dead(&gi->gl->gl_lockref)) + return; + } } static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a6a3389a07fc..c45084ac642d 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -470,15 +470,19 @@ struct gfs2_quota_data { struct rcu_head qd_rcu; }; +enum { + TR_TOUCHED = 1, + TR_ATTACHED = 2, + TR_ALLOCED = 3, +}; + struct gfs2_trans { unsigned long tr_ip; unsigned int tr_blocks; unsigned int tr_revokes; unsigned int tr_reserved; - unsigned int tr_touched:1; - unsigned int tr_attached:1; - unsigned int tr_alloced:1; + unsigned long tr_flags; unsigned int tr_num_buf_new; unsigned int tr_num_databuf_new; @@ -794,6 +798,7 @@ struct gfs2_sbd { atomic_t sd_log_thresh1; atomic_t sd_log_thresh2; atomic_t sd_log_blks_free; + atomic_t sd_log_blks_needed; wait_queue_head_t sd_log_waitq; wait_queue_head_t sd_logd_waitq; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 27c00a16def0..f865b96374df 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -349,6 +349,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) if (gfs2_assert_warn(sdp, blks) || gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) return -EINVAL; + atomic_add(blks, &sdp->sd_log_blks_needed); retry: free_blocks = atomic_read(&sdp->sd_log_blks_free); if (unlikely(free_blocks <= wanted)) { @@ -370,6 +371,7 @@ retry: wake_up(&sdp->sd_reserving_log_wait); goto retry; } + atomic_sub(blks, &sdp->sd_log_blks_needed); trace_gfs2_log_blocks(sdp, -blks); /* @@ -797,7 +799,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) { - WARN_ON_ONCE(old->tr_attached != 1); + WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags)); old->tr_num_buf_new += new->tr_num_buf_new; old->tr_num_databuf_new += new->tr_num_databuf_new; @@ -821,9 +823,9 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) if (sdp->sd_log_tr) { gfs2_merge_trans(sdp->sd_log_tr, tr); } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { - gfs2_assert_withdraw(sdp, tr->tr_alloced); + gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags)); sdp->sd_log_tr = tr; - tr->tr_attached = 1; + set_bit(TR_ATTACHED, &tr->tr_flags); } sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; @@ -891,13 +893,16 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) { - return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1)); + return (atomic_read(&sdp->sd_log_pinned) + + atomic_read(&sdp->sd_log_blks_needed) >= + atomic_read(&sdp->sd_log_thresh1)); } static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) { unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free); - return used_blocks >= atomic_read(&sdp->sd_log_thresh2); + return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >= + atomic_read(&sdp->sd_log_thresh2); } /** @@ -913,12 +918,15 @@ int gfs2_logd(void *data) struct gfs2_sbd *sdp = data; unsigned long t = 1; DEFINE_WAIT(wait); + bool did_flush; while (!kthread_should_stop()) { + did_flush = false; if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp); gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); + did_flush = true; } if (gfs2_ail_flush_reqd(sdp)) { @@ -926,9 +934,10 @@ int gfs2_logd(void *data) gfs2_ail1_wait(sdp); gfs2_ail1_empty(sdp); gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); + did_flush = true; } - if (!gfs2_ail_flush_reqd(sdp)) + if (!gfs2_ail_flush_reqd(sdp) || did_flush) wake_up(&sdp->sd_log_waitq); t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 49db8ef13fdf..663ffc135ef3 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -292,7 +292,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, wait_on_buffer(bh); if (unlikely(!buffer_uptodate(bh))) { struct gfs2_trans *tr = current->journal_info; - if (tr && tr->tr_touched) + if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) gfs2_io_error_bh(sdp, bh); brelse(bh); *bhp = NULL; @@ -319,7 +319,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) if (!buffer_uptodate(bh)) { struct gfs2_trans *tr = current->journal_info; - if (tr && tr->tr_touched) + if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) gfs2_io_error_bh(sdp, bh); return -EIO; } @@ -345,7 +345,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, int meta) tr->tr_num_buf_rm++; else tr->tr_num_databuf_rm++; - tr->tr_touched = 1; + set_bit(TR_TOUCHED, &tr->tr_flags); was_pinned = 1; brelse(bh); } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a34308df927f..58704799f0b9 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -683,6 +683,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) goto fail_jindex; } + atomic_set(&sdp->sd_log_blks_needed, 0); if (sdp->sd_args.ar_spectator) { sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 0c1bde395062..affef3c066e0 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -48,7 +48,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, tr->tr_blocks = blocks; tr->tr_revokes = revokes; tr->tr_reserved = 1; - tr->tr_alloced = 1; + set_bit(TR_ALLOCED, &tr->tr_flags); if (blocks) tr->tr_reserved += 6 + blocks; if (revokes) @@ -78,7 +78,8 @@ static void gfs2_print_trans(const struct gfs2_trans *tr) { pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip); pr_warn("blocks=%u revokes=%u reserved=%u touched=%u\n", - tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); + tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, + test_bit(TR_TOUCHED, &tr->tr_flags)); pr_warn("Buf %u/%u Databuf %u/%u Revoke %u/%u\n", tr->tr_num_buf_new, tr->tr_num_buf_rm, tr->tr_num_databuf_new, tr->tr_num_databuf_rm, @@ -89,12 +90,12 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) { struct gfs2_trans *tr = current->journal_info; s64 nbuf; - int alloced = tr->tr_alloced; + int alloced = test_bit(TR_ALLOCED, &tr->tr_flags); BUG_ON(!tr); current->journal_info = NULL; - if (!tr->tr_touched) { + if (!test_bit(TR_TOUCHED, &tr->tr_flags)) { gfs2_log_release(sdp, tr->tr_reserved); if (alloced) { kfree(tr); @@ -112,8 +113,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_print_trans(tr); gfs2_log_commit(sdp, tr); - if (alloced && !tr->tr_attached) - kfree(tr); + if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags)) + kfree(tr); up_read(&sdp->sd_log_flush_lock); if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) @@ -169,6 +170,10 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) } lock_buffer(bh); + if (buffer_pinned(bh)) { + set_bit(TR_TOUCHED, &tr->tr_flags); + goto out; + } gfs2_log_lock(sdp); bd = bh->b_private; if (bd == NULL) { @@ -182,7 +187,7 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) gfs2_log_lock(sdp); } gfs2_assert(sdp, bd->bd_gl == gl); - tr->tr_touched = 1; + set_bit(TR_TOUCHED, &tr->tr_flags); if (list_empty(&bd->bd_list)) { set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); @@ -191,45 +196,24 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) list_add_tail(&bd->bd_list, &tr->tr_databuf); } gfs2_log_unlock(sdp); +out: unlock_buffer(bh); } -static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) -{ - struct gfs2_meta_header *mh; - struct gfs2_trans *tr; - enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); - - tr = current->journal_info; - tr->tr_touched = 1; - if (!list_empty(&bd->bd_list)) - return; - set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); - set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); - mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; - if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { - pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n", - (unsigned long long)bd->bd_bh->b_blocknr); - BUG(); - } - if (unlikely(state == SFS_FROZEN)) { - printk(KERN_INFO "GFS2:adding buf while frozen\n"); - gfs2_assert_withdraw(sdp, 0); - } - gfs2_pin(sdp, bd->bd_bh); - mh->__pad0 = cpu_to_be64(0); - mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); - list_add(&bd->bd_list, &tr->tr_buf); - tr->tr_num_buf_new++; -} - void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_bufdata *bd; + struct gfs2_meta_header *mh; + struct gfs2_trans *tr = current->journal_info; + enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lock_buffer(bh); + if (buffer_pinned(bh)) { + set_bit(TR_TOUCHED, &tr->tr_flags); + goto out; + } gfs2_log_lock(sdp); bd = bh->b_private; if (bd == NULL) { @@ -245,8 +229,29 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) gfs2_log_lock(sdp); } gfs2_assert(sdp, bd->bd_gl == gl); - meta_lo_add(sdp, bd); + set_bit(TR_TOUCHED, &tr->tr_flags); + if (!list_empty(&bd->bd_list)) + goto out_unlock; + set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); + mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { + pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n", + (unsigned long long)bd->bd_bh->b_blocknr); + BUG(); + } + if (unlikely(state == SFS_FROZEN)) { + printk(KERN_INFO "GFS2:adding buf while frozen\n"); + gfs2_assert_withdraw(sdp, 0); + } + gfs2_pin(sdp, bd->bd_bh); + mh->__pad0 = cpu_to_be64(0); + mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); + list_add(&bd->bd_list, &tr->tr_buf); + tr->tr_num_buf_new++; +out_unlock: gfs2_log_unlock(sdp); +out: unlock_buffer(bh); } @@ -256,7 +261,7 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) BUG_ON(!list_empty(&bd->bd_list)); gfs2_add_revoke(sdp, bd); - tr->tr_touched = 1; + set_bit(TR_TOUCHED, &tr->tr_flags); tr->tr_num_revoke++; } |