From 9a4bf31d05a801e2358d96f69b39fb8ce2c69dd8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Dec 2015 10:41:58 -0500 Subject: nfs: add new tracepoint for pnfs_update_layout pnfs_update_layout is really the "nexus" of layout handling. If it returns NULL then we end up going through the MDS. This patch adds some tracepoints to that function that allow us to determine the cause when we end up going through the MDS unexpectedly. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index e7e78537aea2..0e30f2c5ff49 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -592,4 +592,18 @@ enum data_content4 { NFS4_CONTENT_HOLE = 1, }; +enum pnfs_update_layout_reason { + PNFS_UPDATE_LAYOUT_UNKNOWN = 0, + PNFS_UPDATE_LAYOUT_NO_PNFS, + PNFS_UPDATE_LAYOUT_RD_ZEROLEN, + PNFS_UPDATE_LAYOUT_MDSTHRESH, + PNFS_UPDATE_LAYOUT_NOMEM, + PNFS_UPDATE_LAYOUT_BULK_RECALL, + PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, + PNFS_UPDATE_LAYOUT_FOUND_CACHED, + PNFS_UPDATE_LAYOUT_RETURN, + PNFS_UPDATE_LAYOUT_BLOCKED, + PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, +}; + #endif -- cgit v1.2.3 From 99ade3c71b1e40e7174d6527709399a87f3d05e0 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Wed, 2 Dec 2015 09:39:51 -0500 Subject: nfs: machine credential support for additional operations Allow LAYOUTRETURN and DELEGRETURN to use machine credentials if the server supports it. Add request for OPEN_DOWNGRADE as the close path also uses that. Signed-off-by: Andrew Elble Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 20 ++++++++++++++++++++ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 18d862db15b6..a7d564a83665 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5383,6 +5383,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (data == NULL) return -ENOMEM; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + + nfs4_state_protect(server->nfs_client, + NFS_SP4_MACH_CRED_CLEANUP, + &task_setup_data.rpc_client, &msg); + data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->cache_consistency_bitmask; @@ -6859,10 +6864,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { }, .allow.u.words = { [0] = 1 << (OP_CLOSE) | + 1 << (OP_OPEN_DOWNGRADE) | 1 << (OP_LOCKU) | + 1 << (OP_DELEGRETURN) | 1 << (OP_COMMIT), [1] = 1 << (OP_SECINFO - 32) | 1 << (OP_SECINFO_NO_NAME - 32) | + 1 << (OP_LAYOUTRETURN - 32) | 1 << (OP_TEST_STATEID - 32) | 1 << (OP_FREE_STATEID - 32) | 1 << (OP_WRITE - 32) @@ -6927,11 +6935,19 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, } if (test_bit(OP_CLOSE, sp->allow.u.longs) && + test_bit(OP_OPEN_DOWNGRADE, sp->allow.u.longs) && + test_bit(OP_DELEGRETURN, sp->allow.u.longs) && test_bit(OP_LOCKU, sp->allow.u.longs)) { dfprintk(MOUNT, " cleanup mode enabled\n"); set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); } + if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) { + dfprintk(MOUNT, " pnfs cleanup mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP, + &clp->cl_sp4_flags); + } + if (test_bit(OP_SECINFO, sp->allow.u.longs) && test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { dfprintk(MOUNT, " secinfo mode enabled\n"); @@ -8084,6 +8100,10 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) }; int status = 0; + nfs4_state_protect(NFS_SERVER(lrp->args.inode)->nfs_client, + NFS_SP4_MACH_CRED_PNFS_CLEANUP, + &task_setup_data.rpc_client, &msg); + dprintk("--> %s\n", __func__); if (!sync) { lrp->inode = nfs_igrab_and_active(lrp->args.inode); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2469ab0bb3a1..7fcc13c8cf1f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -102,6 +102,7 @@ struct nfs_client { #define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */ #define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ +#define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */ #endif /* CONFIG_NFS_V4 */ /* Our own IP address, as a null-terminated string. -- cgit v1.2.3 From d6c843b96e1cb5199147e3281a724e3c0b69a9ab Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 5 Dec 2015 16:20:43 +0800 Subject: nfs: only remove page from mapping if launder_page fails Instead of dropping pages when write fails, only do it when we get fatal failure in launder_page write back. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- fs/nfs/write.c | 39 +++++++++++++++++++++++---------------- include/linux/nfs_fs.h | 14 +++++++++++++- 3 files changed, 37 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 93e236429c5d..f188dd071dfc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -545,7 +545,7 @@ static int nfs_launder_page(struct page *page) inode->i_ino, (long long)page_offset(page)); nfs_fscache_wait_on_page_write(nfsi, page); - return nfs_wb_page(inode, page); + return nfs_wb_launder_page(inode, page); } static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9dafb08ddae5..4d254232d728 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -559,7 +559,8 @@ static void nfs_write_error_remove_page(struct nfs_page *req) * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page, bool nonblock) + struct page *page, bool nonblock, + bool launder) { struct nfs_page *req; int ret = 0; @@ -578,17 +579,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* - * Remove the problematic req upon fatal errors, - * while other dirty pages can still be around - * until they get flushed. + * Remove the problematic req upon fatal errors + * in launder case, while other dirty pages can + * still be around until they get flushed. */ if (nfs_error_is_fatal(ret)) { nfs_context_set_write_error(req->wb_context, ret); - nfs_write_error_remove_page(req); - } else { - nfs_redirty_request(req); - ret = -EAGAIN; + if (launder) { + nfs_write_error_remove_page(req); + goto out; + } } + nfs_redirty_request(req); + ret = -EAGAIN; } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); @@ -596,12 +599,14 @@ out: return ret; } -static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, + struct nfs_pageio_descriptor *pgio, bool launder) { int ret; nfs_pageio_cond_complete(pgio, page_file_index(page)); - ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); + ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE, + launder); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); ret = 0; @@ -612,7 +617,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st /* * Write an mmapped page to the server. */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +static int nfs_writepage_locked(struct page *page, + struct writeback_control *wbc, + bool launder) { struct nfs_pageio_descriptor pgio; struct inode *inode = page_file_mapping(page)->host; @@ -621,7 +628,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, &nfs_async_write_completion_ops); - err = nfs_do_writepage(page, wbc, &pgio); + err = nfs_do_writepage(page, wbc, &pgio, launder); nfs_pageio_complete(&pgio); if (err < 0) return err; @@ -634,7 +641,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) { int ret; - ret = nfs_writepage_locked(page, wbc); + ret = nfs_writepage_locked(page, wbc, false); unlock_page(page); return ret; } @@ -643,7 +650,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * { int ret; - ret = nfs_do_writepage(page, wbc, data); + ret = nfs_do_writepage(page, wbc, data, false); unlock_page(page); return ret; } @@ -1931,7 +1938,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) /* * Write back all requests on one page - we do this before reading it. */ -int nfs_wb_page(struct inode *inode, struct page *page) +int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder) { loff_t range_start = page_file_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); @@ -1948,7 +1955,7 @@ int nfs_wb_page(struct inode *inode, struct page *page) for (;;) { wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { - ret = nfs_writepage_locked(page, &wbc); + ret = nfs_writepage_locked(page, &wbc, launder); if (ret < 0) goto out_error; continue; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c0e961474a52..b88fc46cfbb8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -517,12 +517,24 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned */ extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_page(struct inode *inode, struct page* page); +extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +static inline int +nfs_wb_launder_page(struct inode *inode, struct page *page) +{ + return nfs_wb_single_page(inode, page, true); +} + +static inline int +nfs_wb_page(struct inode *inode, struct page *page) +{ + return nfs_wb_single_page(inode, page, false); +} + static inline int nfs_have_writebacks(struct inode *inode) { -- cgit v1.2.3 From 37e9ed22b1552fa94ee7db2901a5e7d8bdf60b15 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Dec 2015 12:30:24 -0500 Subject: pNFS: Add flag to track if we've called nfs4_ff_layout_stat_io_start_read/write Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 95 +++++++++++++++++++++++++--------- include/linux/nfs_xdr.h | 2 + 2 files changed, 72 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 14109a82ce84..9257679a15ba 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1279,14 +1279,31 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) return ff_layout_test_devid_unavailable(node); } -static int ff_layout_read_prepare_common(struct rpc_task *task, - struct nfs_pgio_header *hdr) +static void ff_layout_read_record_layoutstats_start(struct rpc_task *task, + struct nfs_pgio_header *hdr) { + if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) + return; nfs4_ff_layout_stat_io_start_read(hdr->inode, FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), hdr->args.count, task->tk_start); +} + +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) + return; + nfs4_ff_layout_stat_io_end_read(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, + hdr->res.count); +} +static int ff_layout_read_prepare_common(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1302,6 +1319,7 @@ static int ff_layout_read_prepare_common(struct rpc_task *task, } hdr->pgio_done_cb = ff_layout_read_done_cb; + ff_layout_read_record_layoutstats_start(task, hdr); return 0; } @@ -1360,10 +1378,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - nfs4_ff_layout_stat_io_end_read(task, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, hdr->res.count); - if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1378,6 +1392,7 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + ff_layout_read_record_layoutstats_done(task, hdr); rpc_count_iostats_metrics(task, &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); } @@ -1453,14 +1468,31 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, return 0; } -static int ff_layout_write_prepare_common(struct rpc_task *task, - struct nfs_pgio_header *hdr) +static void ff_layout_write_record_layoutstats_start(struct rpc_task *task, + struct nfs_pgio_header *hdr) { + if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) + return; nfs4_ff_layout_stat_io_start_write(hdr->inode, FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), hdr->args.count, task->tk_start); +} + +static void ff_layout_write_record_layoutstats_done(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) + return; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); +} +static int ff_layout_write_prepare_common(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1477,6 +1509,7 @@ static int ff_layout_write_prepare_common(struct rpc_task *task, return -EAGAIN; } + ff_layout_write_record_layoutstats_start(task, hdr); return 0; } @@ -1526,23 +1559,45 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - nfs4_ff_layout_stat_io_end_write(task, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, hdr->res.count, - hdr->res.verf->committed); - + ff_layout_write_record_layoutstats_done(task, hdr); rpc_count_iostats_metrics(task, &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } -static void ff_layout_commit_prepare_common(struct rpc_task *task, +static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task, struct nfs_commit_data *cdata) { + if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags)) + return; nfs4_ff_layout_stat_io_start_write(cdata->inode, FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), 0, task->tk_start); } +static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + struct nfs_page *req; + __u64 count = 0; + + if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags)) + return; + + if (task->tk_status == 0) { + list_for_each_entry(req, &cdata->pages, wb_list) + count += req->wb_bytes; + } + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + count, count, NFS_FILE_SYNC); +} + +static void ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + ff_layout_commit_record_layoutstats_start(task, cdata); +} + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { ff_layout_commit_prepare_common(task, data); @@ -1569,18 +1624,8 @@ static void ff_layout_commit_done(struct rpc_task *task, void *data) static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) { struct nfs_commit_data *cdata = data; - struct nfs_page *req; - __u64 count = 0; - - if (task->tk_status == 0) { - list_for_each_entry(req, &cdata->pages, wb_list) - count += req->wb_bytes; - } - - nfs4_ff_layout_stat_io_end_write(task, - FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), - count, count, NFS_FILE_SYNC); + ff_layout_commit_record_layoutstats_done(task, cdata); rpc_count_iostats_metrics(task, &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 11bbae44f4cb..7b30ac0c7def 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1375,6 +1375,7 @@ enum { NFS_IOHDR_ERROR = 0, NFS_IOHDR_EOF, NFS_IOHDR_REDO, + NFS_IOHDR_STAT, }; struct nfs_pgio_header { @@ -1454,6 +1455,7 @@ struct nfs_commit_data { const struct rpc_call_ops *mds_ops; const struct nfs_commit_completion_ops *completion_ops; int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); + unsigned long flags; }; struct nfs_pgio_completion_ops { -- cgit v1.2.3 From dc602dd706cb64036132a7903ead1c67d9a7bcb9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Dec 2015 11:44:06 -0500 Subject: NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs The flexfiles layout in particular, seems to want to poke around in the O_DIRECT flags when retransmitting. This patch sets up an interface to allow it to call back into O_DIRECT to handle retransmission correctly. It also fixes a potential bug whereby we could change the behaviour of O_DIRECT if an error is already pending. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 21 +++++++++++++++------ fs/nfs/flexfilelayout/flexfilelayout.c | 13 +------------ fs/nfs/internal.h | 1 - fs/nfs/write.c | 6 ++++++ include/linux/nfs_xdr.h | 1 + 5 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4b1d08f56aba..e73693f75dee 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } -void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) -{ - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; -} -EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); - static void nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) { @@ -839,10 +833,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head) } } +static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) +{ + struct nfs_direct_req *dreq = hdr->dreq; + + spin_lock(&dreq->lock); + if (dreq->error == 0) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + /* fake unstable write to let common nfs resend pages */ + hdr->verf.committed = NFS_UNSTABLE; + hdr->good_bytes = hdr->args.count; + } + spin_unlock(&dreq->lock); +} + static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { .error_cleanup = nfs_write_sync_pgio_error, .init_hdr = nfs_direct_pgio_init, .completion = nfs_direct_write_completion, + .reschedule_io = nfs_direct_write_reschedule_io, }; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 03516c80855a..df475d42df77 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -912,18 +912,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) hdr->args.count, (unsigned long long)hdr->args.offset); - if (!hdr->dreq) { - struct nfs_open_context *ctx; - - ctx = nfs_list_entry(hdr->pages.next)->wb_context; - set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - hdr->completion_ops->error_cleanup(&hdr->pages); - } else { - nfs_direct_set_resched_writes(hdr->dreq); - /* fake unstable write to let common nfs resend pages */ - hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = hdr->args.count; - } + hdr->completion_ops->reschedule_io(hdr); return; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 313d55402238..99a2919047e9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -519,7 +519,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode) inode_dio_wait(inode); } extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); -extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq); /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_pgio_header *); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b9316406930..0aa3e6b3db70 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1326,9 +1326,15 @@ static void nfs_async_write_error(struct list_head *head) } } +static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr) +{ + nfs_async_write_error(&hdr->pages); +} + static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .error_cleanup = nfs_async_write_error, .completion = nfs_write_completion, + .reschedule_io = nfs_async_write_reschedule_io, }; void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 11bbae44f4cb..e89dbb14138c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1460,6 +1460,7 @@ struct nfs_pgio_completion_ops { void (*error_cleanup)(struct list_head *head); void (*init_hdr)(struct nfs_pgio_header *hdr); void (*completion)(struct nfs_pgio_header *hdr); + void (*reschedule_io)(struct nfs_pgio_header *hdr); }; struct nfs_unlinkdata { -- cgit v1.2.3 From af7cf057933f01dc7f33ddfb5e436ad598ed17ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Sep 2015 20:34:05 -0400 Subject: NFS: Allow multiple commit requests in flight per file Allow synchronous RPC calls to wait for pending RPC calls to finish, but also allow asynchronous ones to just fire off another commit. With this patch, the xfstests generic/074 test completes in 226s instead of 242s Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 6 ----- fs/nfs/file.c | 2 +- fs/nfs/nfstrace.h | 1 - fs/nfs/pnfs_nfs.c | 5 +--- fs/nfs/write.c | 70 +++++++++++++++++++++++-------------------------- include/linux/nfs_fs.h | 1 - include/linux/nfs_xdr.h | 1 - 7 files changed, 35 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e73693f75dee..14f77df79c25 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -721,14 +721,8 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_direct_write_complete(dreq, data->inode); } -static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) -{ - /* There is no lock to clear */ -} - static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { .completion = nfs_direct_commit_complete, - .error_cleanup = nfs_direct_error_cleanup, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 93e236429c5d..e6ef80ec699c 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page, * so it will not block due to pages that will shortly be freeable. */ nfsi = NFS_I(mapping->host); - if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { + if (atomic_read(&nfsi->commit_info.rpcs_out)) { *writeback = true; return; } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 59f838cdc009..9f80a086b612 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -39,7 +39,6 @@ { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ - { 1 << NFS_INO_COMMIT, "COMMIT" }, \ { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 24655b807d44..3c8e3a44e6ea 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, } else { nfs_retry_commit(mds_pages, NULL, cinfo, 0); pnfs_generic_retry_commit(cinfo, 0); - cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } } nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); - if (nreq == 0) { - cinfo->completion_ops->error_cleanup(NFS_I(inode)); + if (nreq == 0) goto out; - } atomic_add(nreq, &cinfo->mds->rpcs_out); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0aa3e6b3db70..ae29f082c9c2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include @@ -1535,27 +1537,29 @@ static void nfs_writeback_result(struct rpc_task *task, } } +static int nfs_wait_atomic_killable(atomic_t *key) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + freezable_schedule_unsafe(); + return 0; +} -static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) +static int wait_on_commit(struct nfs_mds_commit_info *cinfo) { - int ret; + return wait_on_atomic_t(&cinfo->rpcs_out, + nfs_wait_atomic_killable, TASK_KILLABLE); +} - if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) - return 1; - if (!may_wait) - return 0; - ret = out_of_line_wait_on_bit_lock(&nfsi->flags, - NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - return (ret < 0) ? ret : 1; +static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) +{ + atomic_inc(&cinfo->rpcs_out); } -static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) { - clear_bit(NFS_INO_COMMIT, &nfsi->flags); - smp_mb__after_atomic(); - wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); + if (atomic_dec_and_test(&cinfo->rpcs_out)) + wake_up_atomic_t(&cinfo->rpcs_out); } void nfs_commitdata_release(struct nfs_commit_data *data) @@ -1693,7 +1697,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, data->mds_ops, how, 0); out_bad: nfs_retry_commit(head, NULL, cinfo, 0); - cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } @@ -1755,8 +1758,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); nfs_init_cinfo(&cinfo, data->inode, data->dreq); - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) - nfs_commit_clear_lock(NFS_I(data->inode)); + nfs_commit_end(cinfo.mds); } static void nfs_commit_release(void *calldata) @@ -1775,7 +1777,6 @@ static const struct rpc_call_ops nfs_commit_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .completion = nfs_commit_release_pages, - .error_cleanup = nfs_commit_clear_lock, }; int nfs_generic_commit_list(struct inode *inode, struct list_head *head, @@ -1794,30 +1795,25 @@ int nfs_commit_inode(struct inode *inode, int how) LIST_HEAD(head); struct nfs_commit_info cinfo; int may_wait = how & FLUSH_SYNC; + int error = 0; int res; - res = nfs_commit_set_lock(NFS_I(inode), may_wait); - if (res <= 0) - goto out_mark_dirty; nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_commit_begin(cinfo.mds); res = nfs_scan_commit(inode, &head, &cinfo); - if (res) { - int error; - + if (res) error = nfs_generic_commit_list(inode, &head, how, &cinfo); - if (error < 0) - return error; - if (!may_wait) - goto out_mark_dirty; - error = wait_on_bit_action(&NFS_I(inode)->flags, - NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - if (error < 0) - return error; - } else - nfs_commit_clear_lock(NFS_I(inode)); + nfs_commit_end(cinfo.mds); + if (error < 0) + goto out_error; + if (!may_wait) + goto out_mark_dirty; + error = wait_on_commit(cinfo.mds); + if (error < 0) + return error; return res; +out_error: + res = error; /* Note: If we exit without ensuring that the commit is complete, * we must mark the inode as dirty. Otherwise, future calls to * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c0e961474a52..ebf0bd72a42b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -216,7 +216,6 @@ struct nfs_inode { #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ -#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e89dbb14138c..a8905b7d4d7f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1423,7 +1423,6 @@ struct nfs_mds_commit_info { struct nfs_commit_data; struct nfs_inode; struct nfs_commit_completion_ops { - void (*error_cleanup) (struct nfs_inode *nfsi); void (*completion) (struct nfs_commit_data *data); }; -- cgit v1.2.3 From b20135d0b2431900a3a5395970ffb7e4f3767c8b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Dec 2015 09:28:06 -0500 Subject: NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid If the layout segment is invalid, then we should not be adding more write requests to the commit list. Instead, those writes should be replayed after requesting a new layout. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 ++ fs/nfs/direct.c | 12 ++++++++++++ fs/nfs/pnfs.c | 3 +++ fs/nfs/pnfs.h | 6 ++++++ fs/nfs/pnfs_nfs.c | 5 +++++ fs/nfs/write.c | 9 +++++++++ include/linux/nfs_xdr.h | 2 ++ 7 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index e4dbab5dce6e..2be8b252e3b1 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -233,6 +233,8 @@ static u32 initiate_file_draining(struct nfs_client *clp, unlock: spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); + /* Free all lsegs that are attached to commit buckets */ + nfs_commit_inode(ino, 0); pnfs_put_layout_hdr(lo); trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, &args->cbl_stateid, -rv); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 14f77df79c25..a9a93927fe3e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -721,8 +721,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_direct_write_complete(dreq, data->inode); } +static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) +{ + struct nfs_direct_req *dreq = cinfo->dreq; + + spin_lock(&dreq->lock); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); + nfs_mark_request_commit(req, NULL, cinfo, 0); +} + static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { .completion = nfs_direct_commit_complete, + .resched_write = nfs_direct_resched_write, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 360fe95c97b5..6593be7c0129 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -703,6 +703,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, ret = -EAGAIN; spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&lseg_list); + /* Free all lsegs that are attached to commit buckets */ + nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); iput(inode); } @@ -1811,6 +1813,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, pnfs_mark_matching_lsegs_return(lo, &free_me, &range); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&free_me); + nfs_commit_inode(inode, 0); } EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d93c2ebc0fd3..4bd7faf9ce50 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -412,6 +412,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg) return lseg; } +static inline bool +pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg) +{ + return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0; +} + /* Return true if a layout driver is being used for this mountpoint */ static inline int pnfs_enabled_sb(struct nfs_server *nfss) { diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 3c8e3a44e6ea..81ac6480f9e7 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -868,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, buckets = cinfo->ds->buckets; list = &buckets[ds_commit_idx].written; if (list_empty(list)) { + if (!pnfs_is_valid_lseg(lseg)) { + spin_unlock(cinfo->lock); + cinfo->completion_ops->resched_write(cinfo, req); + return; + } /* Non-empty buckets hold a reference on the lseg. That ref * is normally transferred to the COMMIT call and released * there. It could also be released if the last req is pulled diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae29f082c9c2..0aa8d6f23b4c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1676,6 +1676,13 @@ void nfs_retry_commit(struct list_head *page_list, } EXPORT_SYMBOL_GPL(nfs_retry_commit); +static void +nfs_commit_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) +{ + __set_page_dirty_nobuffers(req->wb_page); +} + /* * Commit dirty pages */ @@ -1777,6 +1784,7 @@ static const struct rpc_call_ops nfs_commit_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .completion = nfs_commit_release_pages, + .resched_write = nfs_commit_resched_write, }; int nfs_generic_commit_list(struct inode *inode, struct list_head *head, @@ -1823,6 +1831,7 @@ out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return res; } +EXPORT_SYMBOL_GPL(nfs_commit_inode); int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a8905b7d4d7f..bee3e60a7006 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1420,10 +1420,12 @@ struct nfs_mds_commit_info { struct list_head list; }; +struct nfs_commit_info; struct nfs_commit_data; struct nfs_inode; struct nfs_commit_completion_ops { void (*completion) (struct nfs_commit_data *data); + void (*resched_write) (struct nfs_commit_info *, struct nfs_page *); }; struct nfs_commit_info { -- cgit v1.2.3 From 210c7c1750fdf769647d1d526c9ea34c412c9eee Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 6 Jan 2016 10:40:18 -0500 Subject: NFS: Use wait_on_atomic_t() for unlock after readahead The use of wait_on_atomic_t() for waiting on I/O to complete before unlocking allows us to git rid of the NFS_IO_INPROGRESS flag, and thus the nfs_iocounter's flags member, and finally the nfs_iocounter altogether. The count of I/O is moved to the lock context, and the counter increment/decrement functions become simple enough to open-code. Signed-off-by: Benjamin Coddington [Trond: Fix up conflict with existing function nfs_wait_atomic_killable()] Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- fs/nfs/inode.c | 18 ++++++++++++------ fs/nfs/internal.h | 9 ++------- fs/nfs/pagelist.c | 48 +++++++----------------------------------------- fs/nfs/write.c | 8 -------- include/linux/nfs_fs.h | 8 +------- 6 files changed, 23 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 178ec8da028f..4ef8f5addcad 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -756,7 +756,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); if (!IS_ERR(l_ctx)) { - status = nfs_iocounter_wait(&l_ctx->io_count); + status = nfs_iocounter_wait(l_ctx); nfs_put_lock_context(l_ctx); if (status < 0) return status; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 74fb1223c2f5..4b63d1bd5820 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -71,19 +71,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } -/** - * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks - * @word: long word containing the bit lock - */ -int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) +static int nfs_wait_killable(int mode) { freezable_schedule_unsafe(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; return 0; } + +int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) +{ + return nfs_wait_killable(mode); +} EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); +int nfs_wait_atomic_killable(atomic_t *p) +{ + return nfs_wait_killable(TASK_KILLABLE); +} + /** * nfs_compat_user_ino64 - returns the user-visible inode number * @fileid: 64-bit fileid @@ -699,7 +705,7 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) l_ctx->lockowner.l_owner = current->files; l_ctx->lockowner.l_pid = current->tgid; INIT_LIST_HEAD(&l_ctx->list); - nfs_iocounter_init(&l_ctx->io_count); + atomic_set(&l_ctx->io_count, 0); } static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ee81792d2886..4e8cc942336c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -238,7 +238,7 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); -int nfs_iocounter_wait(struct nfs_io_counter *c); +int nfs_iocounter_wait(struct nfs_lock_context *l_ctx); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); @@ -252,12 +252,6 @@ void nfs_free_request(struct nfs_page *req); struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); -static inline void nfs_iocounter_init(struct nfs_io_counter *c) -{ - c->flags = 0; - atomic_set(&c->io_count, 0); -} - static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) { WARN_ON_ONCE(desc->pg_mirror_count < 1); @@ -386,6 +380,7 @@ extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); +extern int nfs_wait_atomic_killable(atomic_t *p); /* super.c */ extern const struct super_operations nfs_sops; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index eeddbf0bf4c4..cb7e73ba059c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -101,53 +101,18 @@ nfs_page_free(struct nfs_page *p) kmem_cache_free(nfs_page_cachep, p); } -static void -nfs_iocounter_inc(struct nfs_io_counter *c) -{ - atomic_inc(&c->io_count); -} - -static void -nfs_iocounter_dec(struct nfs_io_counter *c) -{ - if (atomic_dec_and_test(&c->io_count)) { - clear_bit(NFS_IO_INPROGRESS, &c->flags); - smp_mb__after_atomic(); - wake_up_bit(&c->flags, NFS_IO_INPROGRESS); - } -} - -static int -__nfs_iocounter_wait(struct nfs_io_counter *c) -{ - wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS); - DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS); - int ret = 0; - - do { - prepare_to_wait(wq, &q.wait, TASK_KILLABLE); - set_bit(NFS_IO_INPROGRESS, &c->flags); - if (atomic_read(&c->io_count) == 0) - break; - ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE); - } while (atomic_read(&c->io_count) != 0 && !ret); - finish_wait(wq, &q.wait); - return ret; -} - /** * nfs_iocounter_wait - wait for i/o to complete - * @c: nfs_io_counter to use + * @l_ctx: nfs_lock_context with io_counter to use * * returns -ERESTARTSYS if interrupted by a fatal signal. * Otherwise returns 0 once the io_count hits 0. */ int -nfs_iocounter_wait(struct nfs_io_counter *c) +nfs_iocounter_wait(struct nfs_lock_context *l_ctx) { - if (atomic_read(&c->io_count) == 0) - return 0; - return __nfs_iocounter_wait(c); + return wait_on_atomic_t(&l_ctx->io_count, nfs_wait_atomic_killable, + TASK_KILLABLE); } /* @@ -370,7 +335,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, return ERR_CAST(l_ctx); } req->wb_lock_context = l_ctx; - nfs_iocounter_inc(&l_ctx->io_count); + atomic_inc(&l_ctx->io_count); /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in @@ -431,7 +396,8 @@ static void nfs_clear_request(struct nfs_page *req) req->wb_page = NULL; } if (l_ctx != NULL) { - nfs_iocounter_dec(&l_ctx->io_count); + if (atomic_dec_and_test(&l_ctx->io_count)) + wake_up_atomic_t(&l_ctx->io_count); nfs_put_lock_context(l_ctx); req->wb_lock_context = NULL; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 94828b3f8c95..8ba4f717b413 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1565,14 +1565,6 @@ static void nfs_writeback_result(struct rpc_task *task, } } -static int nfs_wait_atomic_killable(atomic_t *key) -{ - if (fatal_signal_pending(current)) - return -ERESTARTSYS; - freezable_schedule_unsafe(); - return 0; -} - static int wait_on_commit(struct nfs_mds_commit_info *cinfo) { return wait_on_atomic_t(&cinfo->rpcs_out, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9eee972863a7..196aaceafda7 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -60,18 +60,12 @@ struct nfs_lockowner { pid_t l_pid; }; -#define NFS_IO_INPROGRESS 0 -struct nfs_io_counter { - unsigned long flags; - atomic_t io_count; -}; - struct nfs_lock_context { atomic_t count; struct list_head list; struct nfs_open_context *open_context; struct nfs_lockowner lockowner; - struct nfs_io_counter io_count; + atomic_t io_count; }; struct nfs4_state; -- cgit v1.2.3