diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-31 15:57:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-31 15:57:50 -0700 |
commit | 3d198e42ce25cb1d58ff7052c036407271ebfb51 (patch) | |
tree | c4e639b17c7ce7be04347d331f87c00d2bddcadc /fs | |
parent | f008b1d6e1e06bb61e9402aa8a1cfa681510e375 (diff) | |
parent | 27ca8273fda398638ca994a207323a85b6d81190 (diff) | |
download | lwn-3d198e42ce25cb1d58ff7052c036407271ebfb51.tar.gz lwn-3d198e42ce25cb1d58ff7052c036407271ebfb51.zip |
Merge tag 'gfs2-v5.17-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2
Pull gfs2 fixes from Andreas Gruenbacher:
- To avoid deadlocks, actively cancel dlm locking requests when we give
up on them.
Further dlm operations on the same lock will return -EBUSY until the
cancel has been completed, so in that case, wait and repeat. (This is
rare.)
- Lock inversion fixes in gfs2_inode_lookup() and gfs2_create_inode().
- Some more fallout from the gfs2 mmap + page fault deadlock fixes
(merged in commit c03098d4b9ad7: "Merge tag 'gfs2-v5.15-rc5-mmap-fault'").
- Various other minor bug fixes and cleanups.
* tag 'gfs2-v5.17-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2:
gfs2: Make sure FITRIM minlen is rounded up to fs block size
gfs2: Make sure not to return short direct writes
gfs2: Remove dead code in gfs2_file_read_iter
gfs2: Fix gfs2_file_buffered_write endless loop workaround
gfs2: Minor retry logic cleanup
gfs2: Disable page faults during lockless buffered reads
gfs2: Fix should_fault_in_pages() logic
gfs2: Remove return value for gfs2_indirect_init
gfs2: Initialize gh_error in gfs2_glock_nq
gfs2: Make use of list_is_first
gfs2: Switch lock order of inode and iopen glock
gfs2: cancel timed-out glock requests
gfs2: Expect -EBUSY after canceling dlm locking requests
gfs2: gfs2_setattr_size error path fix
gfs2: assign rgrp glock before compute_bitstructs
Diffstat (limited to 'fs')
-rw-r--r-- | fs/gfs2/bmap.c | 9 | ||||
-rw-r--r-- | fs/gfs2/file.c | 68 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 14 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 51 | ||||
-rw-r--r-- | fs/gfs2/lock_dlm.c | 15 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 19 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 2 | ||||
-rw-r--r-- | fs/gfs2/super.c | 2 |
8 files changed, 104 insertions, 76 deletions
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index d67108489148..39080b2d6cf8 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -606,9 +606,9 @@ out: return ret; } -static inline __be64 *gfs2_indirect_init(struct metapath *mp, - struct gfs2_glock *gl, unsigned int i, - unsigned offset, u64 bn) +static inline void gfs2_indirect_init(struct metapath *mp, + struct gfs2_glock *gl, unsigned int i, + unsigned offset, u64 bn) { __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + ((i > 1) ? sizeof(struct gfs2_meta_header) : @@ -621,7 +621,6 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp, gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); ptr += offset; *ptr = cpu_to_be64(bn); - return ptr; } enum alloc_state { @@ -2146,7 +2145,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8c39a8571b1f..22b41acfbbc3 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) { if (gfs2_rs_active(&ip->i_res)) - gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_rs_delete(ip); gfs2_qa_put(ip); } return 0; @@ -775,8 +775,7 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, size_t *window_size) { size_t count = iov_iter_count(i); - char __user *p; - int pages = 1; + size_t size, offs; if (likely(!count)) return false; @@ -785,18 +784,20 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, if (!iter_is_iovec(i)) return false; + size = PAGE_SIZE; + offs = offset_in_page(i->iov[0].iov_base + i->iov_offset); if (*prev_count != count || !*window_size) { - int pages, nr_dirtied; + size_t nr_dirtied; - pages = min_t(int, BIO_MAX_VECS, DIV_ROUND_UP(count, PAGE_SIZE)); + size = ALIGN(offs + count, PAGE_SIZE); + size = min_t(size_t, size, SZ_1M); nr_dirtied = max(current->nr_dirtied_pause - - current->nr_dirtied, 1); - pages = min(pages, nr_dirtied); + current->nr_dirtied, 8); + size = min(size, nr_dirtied << PAGE_SHIFT); } *prev_count = count; - p = i->iov[0].iov_base + i->iov_offset; - *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p); + *window_size = size - offs; return true; } @@ -851,9 +852,9 @@ retry_under_glock: leftover = fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(gh); if (leftover != window_size) { - if (!gfs2_holder_queued(gh)) - goto retry; - goto retry_under_glock; + if (gfs2_holder_queued(gh)) + goto retry_under_glock; + goto retry; } } if (gfs2_holder_queued(gh)) @@ -920,9 +921,9 @@ retry_under_glock: leftover = fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); if (leftover != window_size) { - if (!gfs2_holder_queued(gh)) - goto retry; - goto retry_under_glock; + if (gfs2_holder_queued(gh)) + goto retry_under_glock; + goto retry; } } out: @@ -950,20 +951,19 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) * and retry. */ - if (iocb->ki_flags & IOCB_DIRECT) { - ret = gfs2_file_direct_read(iocb, to, &gh); - if (likely(ret != -ENOTBLK)) - return ret; - iocb->ki_flags &= ~IOCB_DIRECT; - } + if (iocb->ki_flags & IOCB_DIRECT) + return gfs2_file_direct_read(iocb, to, &gh); + + pagefault_disable(); iocb->ki_flags |= IOCB_NOIO; ret = generic_file_read_iter(iocb, to); iocb->ki_flags &= ~IOCB_NOIO; + pagefault_enable(); if (ret >= 0) { if (!iov_iter_count(to)) return ret; written = ret; - } else { + } else if (ret != -EFAULT) { if (ret != -EAGAIN) return ret; if (iocb->ki_flags & IOCB_NOWAIT) @@ -989,12 +989,11 @@ retry_under_glock: leftover = fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(&gh); if (leftover != window_size) { - if (!gfs2_holder_queued(&gh)) { - if (written) - goto out_uninit; - goto retry; - } - goto retry_under_glock; + if (gfs2_holder_queued(&gh)) + goto retry_under_glock; + if (written) + goto out_uninit; + goto retry; } } if (gfs2_holder_queued(&gh)) @@ -1068,12 +1067,11 @@ retry_under_glock: gfs2_holder_disallow_demote(gh); if (leftover != window_size) { from->count = min(from->count, window_size - leftover); - if (!gfs2_holder_queued(gh)) { - if (read) - goto out_uninit; - goto retry; - } - goto retry_under_glock; + if (gfs2_holder_queued(gh)) + goto retry_under_glock; + if (read && !(iocb->ki_flags & IOCB_DIRECT)) + goto out_uninit; + goto retry; } } out_unlock: @@ -1083,6 +1081,7 @@ out_uninit: gfs2_holder_uninit(gh); if (statfs_gh) kfree(statfs_gh); + from->count = orig_count - read; return read ? read : ret; } @@ -1497,7 +1496,6 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) if (error != GLR_TRYFAILED) break; fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT; - fl_gh->gh_error = 0; msleep(sleeptime); } if (error) { diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6b23399eaee0..630c6550eacf 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -542,7 +542,7 @@ restart: * some reason. If this holder is the head of the list, it * means we have a blocked holder at the head, so return 1. */ - if (gh->gh_list.prev == &gl->gl_holders) + if (list_is_first(&gh->gh_list, &gl->gl_holders)) return 1; do_error(gl, 0); break; @@ -669,6 +669,8 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) /* Check for state != intended state */ if (unlikely(state != gl->gl_target)) { + if (gh && (ret & LM_OUT_CANCELED)) + gfs2_holder_wake(gh); if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { /* move to back of queue and try next entry */ if (ret & LM_OUT_CANCELED) { @@ -1259,7 +1261,6 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; - gh->gh_error = 0; gh->gh_iflags = 0; gfs2_glock_hold(gl); } @@ -1565,6 +1566,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (test_bit(GLF_LRU, &gl->gl_flags)) gfs2_glock_remove_from_lru(gl); + gh->gh_error = 0; spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && @@ -1691,6 +1693,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; spin_lock(&gl->gl_lockref.lock); + if (list_is_first(&gh->gh_list, &gl->gl_holders) && + !test_bit(HIF_HOLDER, &gh->gh_iflags)) { + spin_unlock(&gl->gl_lockref.lock); + gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); + spin_lock(&gl->gl_lockref.lock); + } + __gfs2_glock_dq(gh); spin_unlock(&gl->gl_lockref.lock); } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 89905f4f29bb..c8ec876f33ea 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -131,7 +131,21 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_glock *io_gl; - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); + error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, + &ip->i_gl); + if (unlikely(error)) + goto fail; + + error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, + &io_gl); + if (unlikely(error)) + goto fail; + + if (blktype != GFS2_BLKST_UNLINKED) + gfs2_cancel_delete_work(io_gl); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, + &ip->i_iopen_gh); + gfs2_glock_put(io_gl); if (unlikely(error)) goto fail; @@ -161,16 +175,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); - error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); - if (unlikely(error)) - goto fail; - if (blktype != GFS2_BLKST_UNLINKED) - gfs2_cancel_delete_work(io_gl); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); - gfs2_glock_put(io_gl); - if (unlikely(error)) - goto fail; - /* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */ inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1); inode->i_atime.tv_nsec = 0; @@ -716,13 +720,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); BUG_ON(error); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (error) goto fail_gunlock2; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + if (error) + goto fail_gunlock3; + error = gfs2_trans_begin(sdp, blocks, 0); if (error) - goto fail_gunlock2; + goto fail_gunlock3; if (blocks > 1) { ip->i_eattr = ip->i_no_addr + 1; @@ -731,10 +739,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, init_dinode(dip, ip, symname); gfs2_trans_end(sdp); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); - if (error) - goto fail_gunlock2; - glock_set_object(ip->i_gl, ip); glock_set_object(io_gl, ip); gfs2_set_iop(inode); @@ -745,14 +749,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (default_acl) { error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) - goto fail_gunlock3; + goto fail_gunlock4; posix_acl_release(default_acl); default_acl = NULL; } if (acl) { error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) - goto fail_gunlock3; + goto fail_gunlock4; posix_acl_release(acl); acl = NULL; } @@ -760,11 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, &gfs2_initxattrs, NULL); if (error) - goto fail_gunlock3; + goto fail_gunlock4; error = link_dinode(dip, name, ip, &da); if (error) - goto fail_gunlock3; + goto fail_gunlock4; mark_inode_dirty(inode); d_instantiate(dentry, inode); @@ -782,9 +786,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, unlock_new_inode(inode); return error; -fail_gunlock3: +fail_gunlock4: glock_clear_object(ip->i_gl, ip); glock_clear_object(io_gl, ip); +fail_gunlock3: gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: gfs2_glock_put(io_gl); @@ -793,7 +798,7 @@ fail_free_inode: if (free_vfs_inode) /* else evict will do the put for us */ gfs2_glock_put(ip->i_gl); } - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 50578f881e6d..2559a79cf14b 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -261,6 +261,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, int req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; + int error; req = make_mode(gl->gl_name.ln_sbd, req_state); lkf = make_flags(gl, flags, req); @@ -279,8 +280,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, * Submit the actual lock request. */ - return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, +again: + error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + if (error == -EBUSY) { + msleep(20); + goto again; + } + return error; } static void gdlm_put_lock(struct gfs2_glock *gl) @@ -312,8 +319,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl) return; } +again: error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, NULL, gl); + if (error == -EBUSY) { + msleep(20); + goto again; + } + if (error) { fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0fb3c01bc557..801ad9f4f2be 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); } @@ -922,15 +923,15 @@ static int read_rindex_entry(struct gfs2_inode *ip) spin_lock_init(&rgd->rd_rsspin); mutex_init(&rgd->rd_mutex); - error = compute_bitstructs(rgd); - if (error) - goto fail; - error = gfs2_glock_get(sdp, rgd->rd_addr, &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); if (error) goto fail; + error = compute_bitstructs(rgd); + if (error) + goto fail_glock; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~GFS2_RDF_PREFERRED; if (rgd->rd_data > sdp->sd_max_rg_data) @@ -944,6 +945,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) } error = 0; /* someone else read in the rgrp; free it and ignore it */ +fail_glock: gfs2_glock_put(rgd->rd_gl); fail: @@ -1415,7 +1417,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, q->limits.discard_granularity) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3e2ca1fb4305..46dd94e9e085 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index cf9cf66522b3..bdb773e5c88f 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1396,7 +1396,7 @@ out: truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); |