diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/lockd/svc.c | 200 | ||||
-rw-r--r-- | fs/lockd/svclock.c | 6 | ||||
-rw-r--r-- | fs/nfs/callback.c | 32 | ||||
-rw-r--r-- | fs/nfs/export.c | 2 | ||||
-rw-r--r-- | fs/nfsd/filecache.c | 79 | ||||
-rw-r--r-- | fs/nfsd/filecache.h | 1 | ||||
-rw-r--r-- | fs/nfsd/netns.h | 27 | ||||
-rw-r--r-- | fs/nfsd/nfs3proc.c | 6 | ||||
-rw-r--r-- | fs/nfsd/nfs3xdr.c | 65 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 24 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 63 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 21 | ||||
-rw-r--r-- | fs/nfsd/nfscache.c | 2 | ||||
-rw-r--r-- | fs/nfsd/nfsctl.c | 27 | ||||
-rw-r--r-- | fs/nfsd/nfsd.h | 2 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.c | 66 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.h | 40 | ||||
-rw-r--r-- | fs/nfsd/nfsproc.c | 8 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 220 | ||||
-rw-r--r-- | fs/nfsd/state.h | 5 | ||||
-rw-r--r-- | fs/nfsd/stats.c | 2 | ||||
-rw-r--r-- | fs/nfsd/stats.h | 4 | ||||
-rw-r--r-- | fs/nfsd/trace.h | 106 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 122 | ||||
-rw-r--r-- | fs/nfsd/vfs.h | 3 |
25 files changed, 568 insertions, 565 deletions
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b220e1b91726..4defefd89cbf 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -54,13 +54,9 @@ EXPORT_SYMBOL_GPL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; -static struct task_struct *nlmsvc_task; -static struct svc_rqst *nlmsvc_rqst; +static struct svc_serv *nlmsvc_serv; unsigned long nlmsvc_timeout; -static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); -static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); - unsigned int lockd_net_id; /* @@ -184,7 +180,12 @@ lockd(void *vrqstp) nlm_shutdown_hosts(); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); - return 0; + + dprintk("lockd_down: service stopped\n"); + + svc_exit_thread(rqstp); + + module_put_and_exit(0); } static int create_lockd_listener(struct svc_serv *serv, const char *name, @@ -290,8 +291,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) __func__, net->ns.inum); } } else { - pr_err("%s: no users! task=%p, net=%x\n", - __func__, nlmsvc_task, net->ns.inum); + pr_err("%s: no users! net=%x\n", + __func__, net->ns.inum); BUG(); } } @@ -302,20 +303,16 @@ static int lockd_inetaddr_event(struct notifier_block *this, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sockaddr_in sin; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nlm_ntf_refcnt)) + if (event != NETDEV_DOWN) goto out; - if (nlmsvc_rqst) { + if (nlmsvc_serv) { dprintk("lockd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; - svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, - (struct sockaddr *)&sin); + svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin); } - atomic_dec(&nlm_ntf_refcnt); - wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -332,21 +329,17 @@ static int lockd_inet6addr_event(struct notifier_block *this, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sockaddr_in6 sin6; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nlm_ntf_refcnt)) + if (event != NETDEV_DOWN) goto out; - if (nlmsvc_rqst) { + if (nlmsvc_serv) { dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ifa->addr; if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6.sin6_scope_id = ifa->idev->dev->ifindex; - svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, - (struct sockaddr *)&sin6); + svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6); } - atomic_dec(&nlm_ntf_refcnt); - wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -357,86 +350,22 @@ static struct notifier_block lockd_inet6addr_notifier = { }; #endif -static void lockd_unregister_notifiers(void) -{ - unregister_inetaddr_notifier(&lockd_inetaddr_notifier); -#if IS_ENABLED(CONFIG_IPV6) - unregister_inet6addr_notifier(&lockd_inet6addr_notifier); -#endif - wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0); -} - -static void lockd_svc_exit_thread(void) -{ - atomic_dec(&nlm_ntf_refcnt); - lockd_unregister_notifiers(); - svc_exit_thread(nlmsvc_rqst); -} - -static int lockd_start_svc(struct svc_serv *serv) -{ - int error; - - if (nlmsvc_rqst) - return 0; - - /* - * Create the kernel thread and wait for it to start. - */ - nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); - if (IS_ERR(nlmsvc_rqst)) { - error = PTR_ERR(nlmsvc_rqst); - printk(KERN_WARNING - "lockd_up: svc_rqst allocation failed, error=%d\n", - error); - lockd_unregister_notifiers(); - goto out_rqst; - } - - atomic_inc(&nlm_ntf_refcnt); - svc_sock_update_bufs(serv); - serv->sv_maxconn = nlm_max_connections; - - nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name); - if (IS_ERR(nlmsvc_task)) { - error = PTR_ERR(nlmsvc_task); - printk(KERN_WARNING - "lockd_up: kthread_run failed, error=%d\n", error); - goto out_task; - } - nlmsvc_rqst->rq_task = nlmsvc_task; - wake_up_process(nlmsvc_task); - - dprintk("lockd_up: service started\n"); - return 0; - -out_task: - lockd_svc_exit_thread(); - nlmsvc_task = NULL; -out_rqst: - nlmsvc_rqst = NULL; - return error; -} - static const struct svc_serv_ops lockd_sv_ops = { .svo_shutdown = svc_rpcb_cleanup, + .svo_function = lockd, .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_module = THIS_MODULE, }; -static struct svc_serv *lockd_create_svc(void) +static int lockd_get(void) { struct svc_serv *serv; + int error; - /* - * Check whether we're already up and running. - */ - if (nlmsvc_rqst) { - /* - * Note: increase service usage, because later in case of error - * svc_destroy() will be called. - */ - svc_get(nlmsvc_rqst->rq_server); - return nlmsvc_rqst->rq_server; + if (nlmsvc_serv) { + svc_get(nlmsvc_serv); + nlmsvc_users++; + return 0; } /* @@ -454,14 +383,41 @@ static struct svc_serv *lockd_create_svc(void) serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } + + serv->sv_maxconn = nlm_max_connections; + error = svc_set_num_threads(serv, NULL, 1); + /* The thread now holds the only reference */ + svc_put(serv); + if (error < 0) + return error; + + nlmsvc_serv = serv; register_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) register_inet6addr_notifier(&lockd_inet6addr_notifier); #endif dprintk("lockd_up: service created\n"); - return serv; + nlmsvc_users++; + return 0; +} + +static void lockd_put(void) +{ + if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n")) + return; + if (--nlmsvc_users) + return; + + unregister_inetaddr_notifier(&lockd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&lockd_inet6addr_notifier); +#endif + + svc_set_num_threads(nlmsvc_serv, NULL, 0); + nlmsvc_serv = NULL; + dprintk("lockd_down: service destroyed\n"); } /* @@ -469,36 +425,21 @@ static struct svc_serv *lockd_create_svc(void) */ int lockd_up(struct net *net, const struct cred *cred) { - struct svc_serv *serv; int error; mutex_lock(&nlmsvc_mutex); - serv = lockd_create_svc(); - if (IS_ERR(serv)) { - error = PTR_ERR(serv); - goto err_create; - } + error = lockd_get(); + if (error) + goto err; - error = lockd_up_net(serv, net, cred); + error = lockd_up_net(nlmsvc_serv, net, cred); if (error < 0) { - lockd_unregister_notifiers(); - goto err_put; + lockd_put(); + goto err; } - error = lockd_start_svc(serv); - if (error < 0) { - lockd_down_net(serv, net); - goto err_put; - } - nlmsvc_users++; - /* - * Note: svc_serv structures have an initial use count of 1, - * so we exit through here on both success and failure. - */ -err_put: - svc_destroy(serv); -err_create: +err: mutex_unlock(&nlmsvc_mutex); return error; } @@ -511,27 +452,8 @@ void lockd_down(struct net *net) { mutex_lock(&nlmsvc_mutex); - lockd_down_net(nlmsvc_rqst->rq_server, net); - if (nlmsvc_users) { - if (--nlmsvc_users) - goto out; - } else { - printk(KERN_ERR "lockd_down: no users! task=%p\n", - nlmsvc_task); - BUG(); - } - - if (!nlmsvc_task) { - printk(KERN_ERR "lockd_down: no lockd running.\n"); - BUG(); - } - kthread_stop(nlmsvc_task); - dprintk("lockd_down: service stopped\n"); - lockd_svc_exit_thread(); - dprintk("lockd_down: service destroyed\n"); - nlmsvc_task = NULL; - nlmsvc_rqst = NULL; -out: + lockd_down_net(nlmsvc_serv, net); + lockd_put(); mutex_unlock(&nlmsvc_mutex); } EXPORT_SYMBOL_GPL(lockd_down); diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e9b85d8fd5fe..cb3658ab9b7a 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -470,8 +470,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_host *host, struct nlm_lock *lock, int wait, struct nlm_cookie *cookie, int reclaim) { - struct nlm_block *block = NULL; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct inode *inode = nlmsvc_file_inode(file); +#endif + struct nlm_block *block = NULL; int error; int mode; int async_block = 0; @@ -484,7 +486,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_end, wait); - if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) { + if (nlmsvc_file_file(file)->f_op->lock) { async_block = wait; wait = 0; } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 86d856de1389..c4994c1d4e36 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -169,12 +169,12 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS) nrservs = NFS4_MIN_NR_CALLBACK_THREADS; - if (serv->sv_nrthreads-1 == nrservs) + if (serv->sv_nrthreads == nrservs) return 0; - ret = serv->sv_ops->svo_setup(serv, NULL, nrservs); + ret = svc_set_num_threads(serv, NULL, nrservs); if (ret) { - serv->sv_ops->svo_setup(serv, NULL, 0); + svc_set_num_threads(serv, NULL, 0); return ret; } dprintk("nfs_callback_up: service started\n"); @@ -235,14 +235,12 @@ err_bind: static const struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) static const struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; @@ -266,14 +264,8 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) /* * Check whether we're already up and running. */ - if (cb_info->serv) { - /* - * Note: increase service usage, because later in case of error - * svc_destroy() will be called. - */ - svc_get(cb_info->serv); - return cb_info->serv; - } + if (cb_info->serv) + return svc_get(cb_info->serv); switch (minorversion) { case 0: @@ -294,7 +286,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); @@ -335,16 +327,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_start; cb_info->users++; - /* - * svc_create creates the svc_serv with sv_nrthreads == 1, and then - * svc_prepare_thread increments that. So we need to call svc_destroy - * on both success and failure so that the refcount is 1 when the - * thread exits. - */ err_net: if (!cb_info->users) cb_info->serv = NULL; - svc_destroy(serv); + svc_put(serv); err_create: mutex_unlock(&nfs_callback_mutex); return ret; @@ -369,8 +355,8 @@ void nfs_callback_down(int minorversion, struct net *net) cb_info->users--; if (cb_info->users == 0) { svc_get(serv); - serv->sv_ops->svo_setup(serv, NULL, 0); - svc_destroy(serv); + svc_set_num_threads(serv, NULL, 0); + svc_put(serv); dprintk("nfs_callback_down: service destroyed\n"); cb_info->serv = NULL; } diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 171c424cb6d5..01596f2d0a1e 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -158,5 +158,5 @@ const struct export_operations nfs_export_ops = { .fetch_iversion = nfs_fetch_iversion, .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| - EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS, + EXPORT_OP_NOATOMIC_ATTR, }; diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index fdf89fcf1a0c..8bc807c5fea4 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -44,12 +44,9 @@ struct nfsd_fcache_bucket { static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); struct nfsd_fcache_disposal { - struct list_head list; struct work_struct work; - struct net *net; spinlock_t lock; struct list_head freeme; - struct rcu_head rcu; }; static struct workqueue_struct *nfsd_filecache_wq __read_mostly; @@ -62,8 +59,6 @@ static long nfsd_file_lru_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; -static DEFINE_SPINLOCK(laundrette_lock); -static LIST_HEAD(laundrettes); static void nfsd_file_gc(void); @@ -194,7 +189,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); } nf->nf_mark = NULL; - init_rwsem(&nf->nf_rwsem); trace_nfsd_file_alloc(nf); } return nf; @@ -249,7 +243,7 @@ nfsd_file_do_unhash(struct nfsd_file *nf) trace_nfsd_file_unhash(nf); if (nfsd_file_check_write_error(nf)) - nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; hlist_del_rcu(&nf->nf_node); atomic_long_dec(&nfsd_filecache_count); @@ -367,19 +361,13 @@ nfsd_file_list_remove_disposal(struct list_head *dst, static void nfsd_file_list_add_disposal(struct list_head *files, struct net *net) { - struct nfsd_fcache_disposal *l; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; - rcu_read_lock(); - list_for_each_entry_rcu(l, &laundrettes, list) { - if (l->net == net) { - spin_lock(&l->lock); - list_splice_tail_init(files, &l->freeme); - spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); - break; - } - } - rcu_read_unlock(); + spin_lock(&l->lock); + list_splice_tail_init(files, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); } static void @@ -755,7 +743,7 @@ nfsd_file_cache_purge(struct net *net) } static struct nfsd_fcache_disposal * -nfsd_alloc_fcache_disposal(struct net *net) +nfsd_alloc_fcache_disposal(void) { struct nfsd_fcache_disposal *l; @@ -763,7 +751,6 @@ nfsd_alloc_fcache_disposal(struct net *net) if (!l) return NULL; INIT_WORK(&l->work, nfsd_file_delayed_close); - l->net = net; spin_lock_init(&l->lock); INIT_LIST_HEAD(&l->freeme); return l; @@ -772,61 +759,27 @@ nfsd_alloc_fcache_disposal(struct net *net) static void nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) { - rcu_assign_pointer(l->net, NULL); cancel_work_sync(&l->work); nfsd_file_dispose_list(&l->freeme); - kfree_rcu(l, rcu); -} - -static void -nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) -{ - spin_lock(&laundrette_lock); - list_add_tail_rcu(&l->list, &laundrettes); - spin_unlock(&laundrette_lock); -} - -static void -nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) -{ - spin_lock(&laundrette_lock); - list_del_rcu(&l->list); - spin_unlock(&laundrette_lock); -} - -static int -nfsd_alloc_fcache_disposal_net(struct net *net) -{ - struct nfsd_fcache_disposal *l; - - l = nfsd_alloc_fcache_disposal(net); - if (!l) - return -ENOMEM; - nfsd_add_fcache_disposal(l); - return 0; + kfree(l); } static void nfsd_free_fcache_disposal_net(struct net *net) { - struct nfsd_fcache_disposal *l; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; - rcu_read_lock(); - list_for_each_entry_rcu(l, &laundrettes, list) { - if (l->net != net) - continue; - nfsd_del_fcache_disposal(l); - rcu_read_unlock(); - nfsd_free_fcache_disposal(l); - return; - } - rcu_read_unlock(); + nfsd_free_fcache_disposal(l); } int nfsd_file_cache_start_net(struct net *net) { - return nfsd_alloc_fcache_disposal_net(net); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nn->fcache_disposal = nfsd_alloc_fcache_disposal(); + return nn->fcache_disposal ? 0 : -ENOMEM; } void diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 7872df5a0fe3..435ceab27897 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -46,7 +46,6 @@ struct nfsd_file { refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; - struct rw_semaphore nf_rwsem; }; int nfsd_file_cache_init(void); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 935c1028c217..1b1a962a1804 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -11,6 +11,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/percpu_counter.h> +#include <linux/siphash.h> /* Hash tables for nfs4_clientid state */ #define CLIENT_HASH_BITS 4 @@ -108,9 +109,8 @@ struct nfsd_net { bool nfsd_net_up; bool lockd_up; - /* Time of server startup */ - struct timespec64 nfssvc_boot; - seqlock_t boot_lock; + seqlock_t writeverf_lock; + unsigned char writeverf[8]; /* * Max number of connections this nfsd container will allow. Defaults @@ -123,12 +123,13 @@ struct nfsd_net { u32 clverifier_counter; struct svc_serv *nfsd_serv; - - wait_queue_head_t ntf_wq; - atomic_t ntf_refcnt; - - /* Allow umount to wait for nfsd state cleanup */ - struct completion nfsd_shutdown_complete; + /* When a listening socket is added to nfsd, keep_active is set + * and this justifies a reference on nfsd_serv. This stops + * nfsd_serv from being freed. When the number of threads is + * set, keep_active is cleared and the reference is dropped. So + * when the last thread exits, the service will be destroyed. + */ + int keep_active; /* * clientid and stateid data for construction of net unique COPY @@ -184,6 +185,10 @@ struct nfsd_net { /* utsname taken from the process that starts the server */ char nfsd_name[UNX_MAXNODENAME+1]; + + struct nfsd_fcache_disposal *fcache_disposal; + + siphash_key_t siphash_key; }; /* Simple check to find out if a given net was properly initialized */ @@ -193,6 +198,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn); extern unsigned int nfsd_net_id; -void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn); -void nfsd_reset_boot_verifier(struct nfsd_net *nn); +void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn); +void nfsd_reset_write_verifier(struct nfsd_net *nn); #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 15dac36ca852..8ef53f6726ec 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -202,15 +202,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; nvecs = svc_fill_write_vector(rqstp, &argp->payload); - if (!nvecs) { - resp->status = nfserr_io; - goto out; - } + resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, rqstp->rq_vec, nvecs, &cnt, resp->committed, resp->verf); resp->count = cnt; -out: return rpc_success; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index c3ac1b6aa3aa..7c45ba4db61b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -487,71 +487,6 @@ neither: return true; } -static bool fs_supports_change_attribute(struct super_block *sb) -{ - return sb->s_flags & SB_I_VERSION || sb->s_export_op->fetch_iversion; -} - -/* - * Fill in the pre_op attr for the wcc data - */ -void fill_pre_wcc(struct svc_fh *fhp) -{ - struct inode *inode; - struct kstat stat; - bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - - if (fhp->fh_no_wcc || fhp->fh_pre_saved) - return; - inode = d_inode(fhp->fh_dentry); - if (fs_supports_change_attribute(inode->i_sb) || !v4) { - __be32 err = fh_getattr(fhp, &stat); - - if (err) { - /* Grab the times from inode anyway */ - stat.mtime = inode->i_mtime; - stat.ctime = inode->i_ctime; - stat.size = inode->i_size; - } - fhp->fh_pre_mtime = stat.mtime; - fhp->fh_pre_ctime = stat.ctime; - fhp->fh_pre_size = stat.size; - } - if (v4) - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); - - fhp->fh_pre_saved = true; -} - -/* - * Fill in the post_op attr for the wcc data - */ -void fill_post_wcc(struct svc_fh *fhp) -{ - bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode = d_inode(fhp->fh_dentry); - - if (fhp->fh_no_wcc) - return; - - if (fhp->fh_post_saved) - printk("nfsd: inode locked twice during operation.\n"); - - fhp->fh_post_saved = true; - - if (fs_supports_change_attribute(inode->i_sb) || !v4) { - __be32 err = fh_getattr(fhp, &fhp->fh_post_attr); - - if (err) { - fhp->fh_post_saved = false; - fhp->fh_post_attr.ctime = inode->i_ctime; - } - } - if (v4) - fhp->fh_post_change = - nfsd4_change_attribute(&fhp->fh_post_attr, inode); -} - /* * XDR decode functions */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a36261f89bdf..ed1ee25647be 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -598,7 +598,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data)); - nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id)); + nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id)); } static __be32 @@ -1101,7 +1101,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - status = nfsd4_clone_file_range(src, clone->cl_src_pos, + status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos, dst, clone->cl_dst_pos, clone->cl_count, EX_ISSYNC(cstate->current_fh.fh_export)); @@ -1510,11 +1510,14 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) { + struct file *dst = copy->nf_dst->nf_file; + struct file *src = copy->nf_src->nf_file; + errseq_t since; ssize_t bytes_copied = 0; u64 bytes_total = copy->cp_count; u64 src_pos = copy->cp_src_pos; u64 dst_pos = copy->cp_dst_pos; - __be32 status; + int status; /* See RFC 7862 p.67: */ if (bytes_total == 0) @@ -1522,9 +1525,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) do { if (kthread_should_stop()) break; - bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file, - src_pos, copy->nf_dst->nf_file, dst_pos, - bytes_total); + bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos, + bytes_total); if (bytes_copied <= 0) break; bytes_total -= bytes_copied; @@ -1534,11 +1536,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) } while (bytes_total > 0 && !copy->cp_synchronous); /* for a non-zero asynchronous copy do a commit of data */ if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) { - down_write(©->nf_dst->nf_rwsem); - status = vfs_fsync_range(copy->nf_dst->nf_file, - copy->cp_dst_pos, + since = READ_ONCE(dst->f_wb_err); + status = vfs_fsync_range(dst, copy->cp_dst_pos, copy->cp_res.wr_bytes_written, 0); - up_write(©->nf_dst->nf_rwsem); + if (!status) + status = filemap_check_wb_err(dst->f_mapping, since); if (!status) copy->committed = true; } @@ -2528,7 +2530,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } - fh_clear_wcc(current_fh); + fh_clear_pre_post_attrs(current_fh); /* If op is non-idempotent */ if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1956d377d1a6..72900b89cf84 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -246,6 +246,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { if (fh_match(fh, &cur->nbl_fh)) { list_del_init(&cur->nbl_list); + WARN_ON(list_empty(&cur->nbl_lru)); list_del_init(&cur->nbl_lru); found = cur; break; @@ -271,6 +272,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); + kref_init(&nbl->nbl_kref); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, &nfsd4_cb_notify_lock_ops, NFSPROC4_CLNT_CB_NOTIFY_LOCK); @@ -280,11 +282,20 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, } static void +free_nbl(struct kref *kref) +{ + struct nfsd4_blocked_lock *nbl; + + nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref); + kfree(nbl); +} + +static void free_blocked_lock(struct nfsd4_blocked_lock *nbl) { locks_delete_block(&nbl->nbl_lock); locks_release_private(&nbl->nbl_lock); - kfree(nbl); + kref_put(&nbl->nbl_kref, free_nbl); } static void @@ -302,6 +313,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo) struct nfsd4_blocked_lock, nbl_list); list_del_init(&nbl->nbl_list); + WARN_ON(list_empty(&nbl->nbl_lru)); list_move(&nbl->nbl_lru, &reaplist); } spin_unlock(&nn->blocked_locks_lock); @@ -360,11 +372,13 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { * st_{access,deny}_bmap field of the stateid, in order to track not * only what share bits are currently in force, but also what * combinations of share bits previous opens have used. This allows us - * to enforce the recommendation of rfc 3530 14.2.19 that the server - * return an error if the client attempt to downgrade to a combination - * of share bits not explicable by closing some of its previous opens. + * to enforce the recommendation in + * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that + * the server return an error if the client attempt to downgrade to a + * combination of share bits not explicable by closing some of its + * previous opens. * - * XXX: This enforcement is actually incomplete, since we don't keep + * This enforcement is arguably incomplete, since we don't keep * track of access/deny bit combinations; so, e.g., we allow: * * OPEN allow read, deny write @@ -372,6 +386,10 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { * DOWNGRADE allow read, deny none * * which we should reject. + * + * But you could also argue that our current code is already overkill, + * since it only exists to return NFS4ERR_INVAL on incorrect client + * behavior. */ static unsigned int bmap_to_share_mode(unsigned long bmap) @@ -6040,7 +6058,11 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, *nfp = NULL; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { - status = check_special_stateids(net, fhp, stateid, flags); + if (cstid) + status = nfserr_bad_stateid; + else + status = check_special_stateids(net, fhp, stateid, + flags); goto done; } @@ -6836,7 +6858,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; - struct super_block *sb; __be32 status = 0; int lkflg; int err; @@ -6858,7 +6879,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_lock: permission denied!\n"); return status; } - sb = cstate->current_fh.fh_dentry->d_sb; if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) @@ -6910,8 +6930,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: - if (nfsd4_has_session(cstate) && - !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS)) + if (nfsd4_has_session(cstate)) fl_flags |= FL_SLEEP; fallthrough; case NFS4_READ_LT: @@ -6923,8 +6942,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fl_type = F_RDLCK; break; case NFS4_WRITEW_LT: - if (nfsd4_has_session(cstate) && - !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS)) + if (nfsd4_has_session(cstate)) fl_flags |= FL_SLEEP; fallthrough; case NFS4_WRITE_LT: @@ -6945,6 +6963,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } + /* + * Most filesystems with their own ->lock operations will block + * the nfsd thread waiting to acquire the lock. That leads to + * deadlocks (we don't want every nfsd thread tied up waiting + * for file locks), so don't attempt blocking lock notifications + * on those filesystems: + */ + if (nf->nf_file->f_op->lock) + fl_flags &= ~FL_SLEEP; + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { dprintk("NFSD: %s: unable to allocate block!\n", __func__); @@ -6975,6 +7003,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); + kref_get(&nbl->nbl_kref); spin_unlock(&nn->blocked_locks_lock); } @@ -6987,6 +7016,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED: + kref_put(&nbl->nbl_kref, free_nbl); nbl = NULL; fallthrough; case -EAGAIN: /* conflock holds conflicting lock */ @@ -7007,8 +7037,13 @@ out: /* dequeue it if we queued it before */ if (fl_flags & FL_SLEEP) { spin_lock(&nn->blocked_locks_lock); - list_del_init(&nbl->nbl_list); - list_del_init(&nbl->nbl_lru); + if (!list_empty(&nbl->nbl_list) && + !list_empty(&nbl->nbl_lru)) { + list_del_init(&nbl->nbl_list); + list_del_init(&nbl->nbl_lru); + kref_put(&nbl->nbl_kref, free_nbl); + } + /* nbl can use one of lists to be linked to reaplist */ spin_unlock(&nn->blocked_locks_lock); } free_blocked_lock(nbl); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5a93a5db4fb0..899de438e529 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -277,21 +277,10 @@ nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf) static __be32 nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen) { - u32 i, count; - __be32 *p; - - if (xdr_stream_decode_u32(argp->xdr, &count) < 0) - return nfserr_bad_xdr; - /* request sanity */ - if (count > 1000) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, count << 2); - if (!p) - return nfserr_bad_xdr; - for (i = 0; i < bmlen; i++) - bmval[i] = (i < count) ? be32_to_cpup(p++) : 0; + ssize_t status; - return nfs_ok; + status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen); + return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok; } static __be32 @@ -4804,8 +4793,8 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, return nfserr_resource; *p++ = htonl(NFS4_CONTENT_HOLE); - p = xdr_encode_hyper(p, read->rd_offset); - p = xdr_encode_hyper(p, count); + p = xdr_encode_hyper(p, read->rd_offset); + p = xdr_encode_hyper(p, count); *eof = (read->rd_offset + count) >= f_size; *maxcount = min_t(unsigned long, count, *maxcount); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 6e0b6f3148dc..a4a69ab6ab28 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -87,7 +87,7 @@ nfsd_hashsize(unsigned int limit) static u32 nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) { - return hash_32(be32_to_cpu(xid), nn->maskbits); + return hash_32((__force u32)xid, nn->maskbits); } static struct svc_cacherep * diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 51a49e0cfe37..b9f27fbcd768 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -742,13 +742,12 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred return err; err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); - if (err < 0) { - nfsd_destroy(net); - return err; - } - /* Decrease the count, but don't shut down the service */ - nn->nfsd_serv->sv_nrthreads--; + if (err >= 0 && + !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + svc_get(nn->nfsd_serv); + + nfsd_put(net); return err; } @@ -783,8 +782,10 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr if (err < 0 && err != -EAFNOSUPPORT) goto out_close; - /* Decrease the count, but don't shut down the service */ - nn->nfsd_serv->sv_nrthreads--; + if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + svc_get(nn->nfsd_serv); + + nfsd_put(net); return 0; out_close: xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); @@ -793,10 +794,7 @@ out_close: svc_xprt_put(xprt); } out_err: - if (!list_empty(&nn->nfsd_serv->sv_permsocks)) - nn->nfsd_serv->sv_nrthreads--; - else - nfsd_destroy(net); + nfsd_put(net); return err; } @@ -1485,9 +1483,8 @@ static __net_init int nfsd_init_net(struct net *net) nn->clientid_counter = nn->clientid_base + 1; nn->s2s_cp_cl_id = nn->clientid_counter++; - atomic_set(&nn->ntf_refcnt, 0); - init_waitqueue_head(&nn->ntf_wq); - seqlock_init(&nn->boot_lock); + get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); + seqlock_init(&nn->writeverf_lock); return 0; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 498e5a489826..3e5008b475ff 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -97,7 +97,7 @@ int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_shutdown_threads(struct net *net); -void nfsd_destroy(struct net *net); +void nfsd_put(struct net *net); bool i_am_nfsd(void); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index f3779fa72c89..145208bcb9bd 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -611,6 +611,70 @@ out_negative: return nfserr_serverfault; } +#ifdef CONFIG_NFSD_V3 + +/** + * fh_fill_pre_attrs - Fill in pre-op attributes + * @fhp: file handle to be updated + * + */ +void fh_fill_pre_attrs(struct svc_fh *fhp) +{ + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); + struct inode *inode; + struct kstat stat; + __be32 err; + + if (fhp->fh_no_wcc || fhp->fh_pre_saved) + return; + + inode = d_inode(fhp->fh_dentry); + err = fh_getattr(fhp, &stat); + if (err) { + /* Grab the times from inode anyway */ + stat.mtime = inode->i_mtime; + stat.ctime = inode->i_ctime; + stat.size = inode->i_size; + } + if (v4) + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; + fhp->fh_pre_size = stat.size; + fhp->fh_pre_saved = true; +} + +/** + * fh_fill_post_attrs - Fill in post-op attributes + * @fhp: file handle to be updated + * + */ +void fh_fill_post_attrs(struct svc_fh *fhp) +{ + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); + struct inode *inode = d_inode(fhp->fh_dentry); + __be32 err; + + if (fhp->fh_no_wcc) + return; + + if (fhp->fh_post_saved) + printk("nfsd: inode locked twice during operation.\n"); + + err = fh_getattr(fhp, &fhp->fh_post_attr); + if (err) { + fhp->fh_post_saved = false; + fhp->fh_post_attr.ctime = inode->i_ctime; + } else + fhp->fh_post_saved = true; + if (v4) + fhp->fh_post_change = + nfsd4_change_attribute(&fhp->fh_post_attr, inode); +} + +#endif /* CONFIG_NFSD_V3 */ + /* * Release a file handle. */ @@ -623,7 +687,7 @@ fh_put(struct svc_fh *fhp) fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); - fh_clear_wcc(fhp); + fh_clear_pre_post_attrs(fhp); } fh_drop_write(fhp); if (exp) { diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index d11e4b6870d6..434930d8a946 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -284,12 +284,13 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) #endif #ifdef CONFIG_NFSD_V3 -/* - * The wcc data stored in current_fh should be cleared - * between compound ops. + +/** + * fh_clear_pre_post_attrs - Reset pre/post attributes + * @fhp: file handle to be updated + * */ -static inline void -fh_clear_wcc(struct svc_fh *fhp) +static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) { fhp->fh_post_saved = false; fhp->fh_pre_saved = false; @@ -323,13 +324,24 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, return time_to_chattr(&stat->ctime); } -extern void fill_pre_wcc(struct svc_fh *fhp); -extern void fill_post_wcc(struct svc_fh *fhp); -#else -#define fh_clear_wcc(ignored) -#define fill_pre_wcc(ignored) -#define fill_post_wcc(notused) -#endif /* CONFIG_NFSD_V3 */ +extern void fh_fill_pre_attrs(struct svc_fh *fhp); +extern void fh_fill_post_attrs(struct svc_fh *fhp); + +#else /* !CONFIG_NFSD_V3 */ + +static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) +{ +} + +static inline void fh_fill_pre_attrs(struct svc_fh *fhp) +{ +} + +static inline void fh_fill_post_attrs(struct svc_fh *fhp) +{ +} + +#endif /* !CONFIG_NFSD_V3 */ /* @@ -355,7 +367,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) inode = d_inode(dentry); inode_lock_nested(inode, subclass); - fill_pre_wcc(fhp); + fh_fill_pre_attrs(fhp); fhp->fh_locked = true; } @@ -372,7 +384,7 @@ static inline void fh_unlock(struct svc_fh *fhp) { if (fhp->fh_locked) { - fill_post_wcc(fhp); + fh_fill_post_attrs(fhp); inode_unlock(d_inode(fhp->fh_dentry)); fhp->fh_locked = false; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index de282f3273c5..18b8eb43a19b 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -235,10 +235,6 @@ nfsd_proc_write(struct svc_rqst *rqstp) argp->len, argp->offset); nvecs = svc_fill_write_vector(rqstp, &argp->payload); - if (!nvecs) { - resp->status = nfserr_io; - goto out; - } resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, rqstp->rq_vec, nvecs, @@ -247,7 +243,6 @@ nfsd_proc_write(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) return rpc_drop_reply; -out: return rpc_success; } @@ -850,6 +845,7 @@ nfserrno (int errno) { nfserr_io, -EIO }, { nfserr_nxio, -ENXIO }, { nfserr_fbig, -E2BIG }, + { nfserr_stale, -EBADF }, { nfserr_acces, -EACCES }, { nfserr_exist, -EEXIST }, { nfserr_xdev, -EXDEV }, @@ -878,6 +874,8 @@ nfserrno (int errno) { nfserr_toosmall, -ETOOSMALL }, { nfserr_serverfault, -ESERVERFAULT }, { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EREMOTEIO }, + { nfserr_stale, -EOPENSTALE }, { nfserr_io, -EUCLEAN }, { nfserr_perm, -ENOKEY }, { nfserr_no_grace, -ENOGRACE}, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 80431921e5d7..07193595b8e0 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/fs_struct.h> #include <linux/swap.h> +#include <linux/siphash.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svcsock.h> @@ -55,18 +56,17 @@ static __be32 nfsd_init_request(struct svc_rqst *, struct svc_process_info *); /* - * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members - * of the svc_serv struct. In particular, ->sv_nrthreads but also to some - * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members + * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks. * * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a - * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number - * of nfsd threads must exist and each must listed in ->sp_all_threads in each - * entry of ->sv_pools[]. + * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless + * nn->keep_active is set). That number of nfsd threads must + * exist and each must be listed in ->sp_all_threads in some entry of + * ->sv_pools[]. * - * Transitions of the thread count between zero and non-zero are of particular - * interest since the svc_serv needs to be created and initialized at that - * point, or freed. + * Each active thread holds a counted reference on nn->nfsd_serv, as does + * the nn->keep_active flag and various transient calls to svc_get(). * * Finally, the nfsd_mutex also protects some of the global variables that are * accessed when nfsd starts and that are settable via the write_* routines in @@ -345,33 +345,57 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn) return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } -void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn) +/** + * nfsd_copy_write_verifier - Atomically copy a write verifier + * @verf: buffer in which to receive the verifier cookie + * @nn: NFS net namespace + * + * This function provides a wait-free mechanism for copying the + * namespace's write verifier without tearing it. + */ +void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn) { int seq = 0; do { - read_seqbegin_or_lock(&nn->boot_lock, &seq); - /* - * This is opaque to client, so no need to byte-swap. Use - * __force to keep sparse happy. y2038 time_t overflow is - * irrelevant in this usage - */ - verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; - } while (need_seqretry(&nn->boot_lock, seq)); - done_seqretry(&nn->boot_lock, seq); + read_seqbegin_or_lock(&nn->writeverf_lock, &seq); + memcpy(verf, nn->writeverf, sizeof(*verf)); + } while (need_seqretry(&nn->writeverf_lock, seq)); + done_seqretry(&nn->writeverf_lock, seq); } -static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn) +static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn) { - ktime_get_real_ts64(&nn->nfssvc_boot); + struct timespec64 now; + u64 verf; + + /* + * Because the time value is hashed, y2038 time_t overflow + * is irrelevant in this usage. + */ + ktime_get_raw_ts64(&now); + verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key); + memcpy(nn->writeverf, &verf, sizeof(nn->writeverf)); } -void nfsd_reset_boot_verifier(struct nfsd_net *nn) +/** + * nfsd_reset_write_verifier - Generate a new write verifier + * @nn: NFS net namespace + * + * This function updates the ->writeverf field of @nn. This field + * contains an opaque cookie that, according to Section 18.32.3 of + * RFC 8881, "the client can use to determine whether a server has + * changed instance state (e.g., server restart) between a call to + * WRITE and a subsequent call to either WRITE or COMMIT. This + * cookie MUST be unchanged during a single instance of the NFSv4.1 + * server and MUST be unique between instances of the NFSv4.1 + * server." + */ +void nfsd_reset_write_verifier(struct nfsd_net *nn) { - write_seqlock(&nn->boot_lock); - nfsd_reset_boot_verifier_locked(nn); - write_sequnlock(&nn->boot_lock); + write_seqlock(&nn->writeverf_lock); + nfsd_reset_write_verifier_locked(nn); + write_sequnlock(&nn->writeverf_lock); } static int nfsd_startup_net(struct net *net, const struct cred *cred) @@ -435,6 +459,7 @@ static void nfsd_shutdown_net(struct net *net) nfsd_shutdown_generic(); } +static DEFINE_SPINLOCK(nfsd_notifier_lock); static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -444,18 +469,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in sin; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nn->ntf_refcnt)) + if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; + spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); } - atomic_dec(&nn->ntf_refcnt); - wake_up(&nn->ntf_wq); + spin_unlock(&nfsd_notifier_lock); out: return NOTIFY_DONE; @@ -475,10 +499,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in6 sin6; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nn->ntf_refcnt)) + if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; + spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; @@ -487,8 +511,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } - atomic_dec(&nn->ntf_refcnt); - wake_up(&nn->ntf_wq); + spin_unlock(&nfsd_notifier_lock); + out: return NOTIFY_DONE; } @@ -505,7 +529,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - atomic_dec(&nn->ntf_refcnt); /* check if the notifier still has clients */ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -513,7 +536,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0); /* * write_ports can create the server without actually starting @@ -594,20 +616,9 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads, .svo_module = THIS_MODULE, }; -static void nfsd_complete_shutdown(struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - WARN_ON(!mutex_is_locked(&nfsd_mutex)); - - nn->nfsd_serv = NULL; - complete(&nn->nfsd_shutdown_complete); -} - void nfsd_shutdown_threads(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -622,11 +633,9 @@ void nfsd_shutdown_threads(struct net *net) svc_get(serv); /* Kill outstanding nfsd threads */ - serv->sv_ops->svo_setup(serv, NULL, 0); - nfsd_destroy(net); + svc_set_num_threads(serv, NULL, 0); + nfsd_put(net); mutex_unlock(&nfsd_mutex); - /* Wait for shutdown of nfsd_serv to complete */ - wait_for_completion(&nn->nfsd_shutdown_complete); } bool i_am_nfsd(void) @@ -638,6 +647,7 @@ int nfsd_create_serv(struct net *net) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nn->nfsd_serv) { @@ -647,19 +657,23 @@ int nfsd_create_serv(struct net *net) if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); - nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_thread_sv_ops); - if (nn->nfsd_serv == NULL) + serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + &nfsd_thread_sv_ops); + if (serv == NULL) return -ENOMEM; - init_completion(&nn->nfsd_shutdown_complete); - nn->nfsd_serv->sv_maxconn = nn->max_connections; - error = svc_bind(nn->nfsd_serv, net); + serv->sv_maxconn = nn->max_connections; + error = svc_bind(serv, net); if (error < 0) { - svc_destroy(nn->nfsd_serv); - nfsd_complete_shutdown(net); + /* NOT nfsd_put() as notifiers (see below) haven't + * been set up yet. + */ + svc_put(serv); return error; } + spin_lock(&nfsd_notifier_lock); + nn->nfsd_serv = serv; + spin_unlock(&nfsd_notifier_lock); set_max_drc(); /* check if the notifier is already set */ @@ -669,8 +683,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - atomic_inc(&nn->ntf_refcnt); - nfsd_reset_boot_verifier(nn); + nfsd_reset_write_verifier(nn); return 0; } @@ -697,16 +710,26 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) return 0; } -void nfsd_destroy(struct net *net) +/* This is the callback for kref_put() below. + * There is no code here as the first thing to be done is + * call svc_shutdown_net(), but we cannot get the 'net' from + * the kref. So do all the work when kref_put returns true. + */ +static void nfsd_noop(struct kref *ref) +{ +} + +void nfsd_put(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - int destroy = (nn->nfsd_serv->sv_nrthreads == 1); - if (destroy) + if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) { svc_shutdown_net(nn->nfsd_serv, net); - svc_destroy(nn->nfsd_serv); - if (destroy) - nfsd_complete_shutdown(net); + svc_destroy(&nn->nfsd_serv->sv_refcnt); + spin_lock(&nfsd_notifier_lock); + nn->nfsd_serv = NULL; + spin_unlock(&nfsd_notifier_lock); + } } int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) @@ -733,7 +756,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) if (tot > NFSD_MAXSERVS) { /* total too large: scale down requested numbers */ for (i = 0; i < n && tot > 0; i++) { - int new = nthreads[i] * NFSD_MAXSERVS / tot; + int new = nthreads[i] * NFSD_MAXSERVS / tot; tot -= (nthreads[i] - new); nthreads[i] = new; } @@ -753,12 +776,13 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* apply the new numbers */ svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, - &nn->nfsd_serv->sv_pools[i], nthreads[i]); + err = svc_set_num_threads(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], + nthreads[i]); if (err) break; } - nfsd_destroy(net); + nfsd_put(net); return err; } @@ -795,21 +819,19 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) error = nfsd_startup_net(net, cred); if (error) - goto out_destroy; - error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, - NULL, nrservs); + goto out_put; + error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); if (error) goto out_shutdown; - /* We are holding a reference to nn->nfsd_serv which - * we don't want to count in the return value, - * so subtract 1 - */ - error = nn->nfsd_serv->sv_nrthreads - 1; + error = nn->nfsd_serv->sv_nrthreads; out_shutdown: if (error < 0 && !nfsd_up_before) nfsd_shutdown_net(net); -out_destroy: - nfsd_destroy(net); /* Release server */ +out_put: + /* Threads now hold service active */ + if (xchg(&nn->keep_active, 0)) + nfsd_put(net); + nfsd_put(net); out: mutex_unlock(&nfsd_mutex); return error; @@ -923,9 +945,6 @@ nfsd(void *vrqstp) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int err; - /* Lock module and set up kernel thread */ - mutex_lock(&nfsd_mutex); - /* At this point, the thread shares current->fs * with the init process. We need to create files with the * umask as defined by the client instead of init's umask. */ @@ -945,8 +964,7 @@ nfsd(void *vrqstp) allow_signal(SIGINT); allow_signal(SIGQUIT); - nfsdstats.th_cnt++; - mutex_unlock(&nfsd_mutex); + atomic_inc(&nfsdstats.th_cnt); set_freezable(); @@ -973,19 +991,35 @@ nfsd(void *vrqstp) /* Clear signals before calling svc_exit_thread() */ flush_signals(current); - mutex_lock(&nfsd_mutex); - nfsdstats.th_cnt --; + atomic_dec(&nfsdstats.th_cnt); out: - rqstp->rq_server = NULL; + /* Take an extra ref so that the svc_put in svc_exit_thread() + * doesn't call svc_destroy() + */ + svc_get(nn->nfsd_serv); /* Release the thread */ svc_exit_thread(rqstp); - nfsd_destroy(net); + /* We need to drop a ref, but may not drop the last reference + * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that + * could deadlock with nfsd_shutdown_threads() waiting for us. + * So three options are: + * - drop a non-final reference, + * - get the mutex without waiting + * - sleep briefly andd try the above again + */ + while (!svc_put_not_last(nn->nfsd_serv)) { + if (mutex_trylock(&nfsd_mutex)) { + nfsd_put(net); + mutex_unlock(&nfsd_mutex); + break; + } + msleep(20); + } /* Release module */ - mutex_unlock(&nfsd_mutex); module_put_and_exit(0); return 0; } @@ -1096,7 +1130,6 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) mutex_unlock(&nfsd_mutex); return -ENODEV; } - /* bump up the psudo refcount while traversing */ svc_get(nn->nfsd_serv); ret = svc_pool_stats_open(nn->nfsd_serv, file); mutex_unlock(&nfsd_mutex); @@ -1109,8 +1142,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file) struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); - /* this function really, really should have been called svc_put() */ - nfsd_destroy(net); + nfsd_put(net); mutex_unlock(&nfsd_mutex); return ret; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e73bdbb1634a..95457cfd37fc 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -568,6 +568,10 @@ struct nfs4_ol_stateid { struct list_head st_locks; struct nfs4_stateowner *st_stateowner; struct nfs4_clnt_odstate *st_clnt_odstate; +/* + * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the + * comment above bmap_to_share_mode() for explanation: + */ unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid *st_openstp; @@ -629,6 +633,7 @@ struct nfsd4_blocked_lock { struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; + struct kref nbl_kref; }; struct nfsd4_compound_state; diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 1d3b881e7382..a8c5a02a84f0 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -45,7 +45,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); /* thread usage: */ - seq_printf(seq, "th %u 0", nfsdstats.th_cnt); + seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); /* deprecated thread usage histogram stats */ for (i = 0; i < 10; i++) diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index 51ecda852e23..9b43dc3d9991 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -29,11 +29,9 @@ enum { struct nfsd_stats { struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; - /* Protected by nfsd_mutex */ - unsigned int th_cnt; /* number of available threads */ + atomic_t th_cnt; /* number of available threads */ }; - extern struct nfsd_stats nfsdstats; extern struct svc_stat nfsd_svcstats; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index f1e0d3c51bc2..c4cf56327843 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -47,7 +47,7 @@ rqstp->rq_xprt->xpt_remotelen); \ } while (0); -TRACE_EVENT(nfsd_garbage_args_err, +DECLARE_EVENT_CLASS(nfsd_xdr_err_class, TP_PROTO( const struct svc_rqst *rqstp ), @@ -69,27 +69,13 @@ TRACE_EVENT(nfsd_garbage_args_err, ) ); -TRACE_EVENT(nfsd_cant_encode_err, - TP_PROTO( - const struct svc_rqst *rqstp - ), - TP_ARGS(rqstp), - TP_STRUCT__entry( - NFSD_TRACE_PROC_ARG_FIELDS - - __field(u32, vers) - __field(u32, proc) - ), - TP_fast_assign( - NFSD_TRACE_PROC_ARG_ASSIGNMENTS +#define DEFINE_NFSD_XDR_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \ + TP_PROTO(const struct svc_rqst *rqstp), \ + TP_ARGS(rqstp)) - __entry->vers = rqstp->rq_vers; - __entry->proc = rqstp->rq_proc; - ), - TP_printk("xid=0x%08x vers=%u proc=%u", - __entry->xid, __entry->vers, __entry->proc - ) -); +DEFINE_NFSD_XDR_ERR_EVENT(garbage_args); +DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); #define show_nfsd_may_flags(x) \ __print_flags(x, "|", \ @@ -413,6 +399,56 @@ TRACE_EVENT(nfsd_dirent, ) ) +DECLARE_EVENT_CLASS(nfsd_copy_err_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *src_fhp, + loff_t src_offset, + struct svc_fh *dst_fhp, + loff_t dst_offset, + u64 count, + int status), + TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, src_fh_hash) + __field(loff_t, src_offset) + __field(u32, dst_fh_hash) + __field(loff_t, dst_offset) + __field(u64, count) + __field(int, status) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle); + __entry->src_offset = src_offset; + __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle); + __entry->dst_offset = dst_offset; + __entry->count = count; + __entry->status = status; + ), + TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld " + "dst_fh_hash=0x%08x dst_offset=%lld " + "count=%llu status=%d", + __entry->xid, __entry->src_fh_hash, __entry->src_offset, + __entry->dst_fh_hash, __entry->dst_offset, + (unsigned long long)__entry->count, + __entry->status) +) + +#define DEFINE_NFSD_COPY_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *src_fhp, \ + loff_t src_offset, \ + struct svc_fh *dst_fhp, \ + loff_t dst_offset, \ + u64 count, \ + int status), \ + TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \ + count, status)) + +DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); + #include "state.h" #include "filecache.h" #include "vfs.h" @@ -538,6 +574,34 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \ DEFINE_NET_EVENT(grace_start); DEFINE_NET_EVENT(grace_complete); +TRACE_EVENT(nfsd_writeverf_reset, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_rqst *rqstp, + int error + ), + TP_ARGS(nn, rqstp, error), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(u32, xid) + __field(int, error) + __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->error = error; + + /* avoid seqlock inside TP_fast_assign */ + memcpy(__entry->verifier, nn->writeverf, + NFS4_VERIFIER_SIZE); + ), + TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s", + __entry->boot_time, __entry->xid, __entry->error, + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + ) +); + TRACE_EVENT(nfsd_clid_cred_mismatch, TP_PROTO( const struct nfs4_client *clp, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c99857689e2c..99c2b9dfbb10 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -40,6 +40,7 @@ #include "../internal.h" #include "acl.h" #include "idmap.h" +#include "xdr4.h" #endif /* CONFIG_NFSD_V4 */ #include "nfsd.h" @@ -517,15 +518,23 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif -__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, - struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) +static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp) +{ + return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate; +} + +__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, + struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, + u64 count, bool sync) { struct file *src = nf_src->nf_file; struct file *dst = nf_dst->nf_file; + errseq_t since; loff_t cloned; __be32 ret = 0; - down_write(&nf_dst->nf_rwsem); + since = READ_ONCE(dst->f_wb_err); cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); if (cloned < 0) { ret = nfserrno(cloned); @@ -540,15 +549,25 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); if (!status) + status = filemap_check_wb_err(dst->f_mapping, since); + if (!status) status = commit_inode_metadata(file_inode(src)); if (status < 0) { - nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net, - nfsd_net_id)); + struct nfsd_net *nn = net_generic(nf_dst->nf_net, + nfsd_net_id); + + trace_nfsd_clone_file_range_err(rqstp, + &nfsd4_get_cstate(rqstp)->save_fh, + src_pos, + &nfsd4_get_cstate(rqstp)->current_fh, + dst_pos, + count, status); + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, status); ret = nfserrno(status); } } out_err: - up_write(&nf_dst->nf_rwsem); return ret; } @@ -777,6 +796,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { __be32 err; + bool retried = false; validate_process_creds(); /* @@ -792,9 +812,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, */ if (type == S_IFREG) may_flags |= NFSD_MAY_OWNER_OVERRIDE; +retry: err = fh_verify(rqstp, fhp, type, may_flags); - if (!err) + if (!err) { err = __nfsd_open(rqstp, fhp, type, may_flags, filp); + if (err == nfserr_stale && !retried) { + retried = true; + fh_put(fhp); + goto retry; + } + } validate_process_creds(); return err; } @@ -944,10 +971,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, unsigned long *cnt, int stable, __be32 *verf) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file *file = nf->nf_file; struct super_block *sb = file_inode(file)->i_sb; struct svc_export *exp; struct iov_iter iter; + errseq_t since; __be32 nfserr; int host_err; int use_wgather; @@ -985,36 +1014,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, flags |= RWF_SYNC; iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); - if (flags & RWF_SYNC) { - down_write(&nf->nf_rwsem); - host_err = vfs_iter_write(file, &iter, &pos, flags); - if (host_err < 0) - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); - up_write(&nf->nf_rwsem); - } else { - down_read(&nf->nf_rwsem); - if (verf) - nfsd_copy_boot_verifier(verf, - net_generic(SVC_NET(rqstp), - nfsd_net_id)); - host_err = vfs_iter_write(file, &iter, &pos, flags); - up_read(&nf->nf_rwsem); - } + since = READ_ONCE(file->f_wb_err); + if (verf) + nfsd_copy_write_verifier(verf, nn); + host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) { - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, host_err); goto out_nfserr; } *cnt = host_err; nfsd_stats_io_write_add(exp, *cnt); fsnotify_modify(file); + host_err = filemap_check_wb_err(file->f_mapping, since); + if (host_err < 0) + goto out_nfserr; if (stable && use_wgather) { host_err = wait_for_concurrent_writes(file); - if (host_err < 0) - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); + if (host_err < 0) { + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, host_err); + } } out_nfserr: @@ -1089,19 +1110,6 @@ out: } #ifdef CONFIG_NFSD_V3 -static int -nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset, - loff_t end) -{ - struct address_space *mapping = nf->nf_file->f_mapping; - int ret = filemap_fdatawrite_range(mapping, offset, end); - - if (ret) - return ret; - filemap_fdatawait_range_keep_errors(mapping, offset, end); - return 0; -} - /* * Commit all pending writes to stable storage. * @@ -1115,6 +1123,7 @@ __be32 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long count, __be32 *verf) { + struct nfsd_net *nn; struct nfsd_file *nf; loff_t end = LLONG_MAX; __be32 err = nfserr_inval; @@ -1131,29 +1140,28 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); if (err) goto out; + nn = net_generic(nf->nf_net, nfsd_net_id); if (EX_ISSYNC(fhp->fh_export)) { - int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end); + errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); + int err2; - down_write(&nf->nf_rwsem); - if (!err2) - err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: - nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, - nfsd_net_id)); + nfsd_copy_write_verifier(verf, nn); + err2 = filemap_check_wb_err(nf->nf_file->f_mapping, + since); break; case -EINVAL: err = nfserr_notsupp; break; default: - err = nfserrno(err2); - nfsd_reset_boot_verifier(net_generic(nf->nf_net, - nfsd_net_id)); + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, err2); } - up_write(&nf->nf_rwsem); + err = nfserrno(err2); } else - nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, - nfsd_net_id)); + nfsd_copy_write_verifier(verf, nn); nfsd_file_put(nf); out: @@ -1747,8 +1755,8 @@ retry: * so do it by hand */ trap = lock_rename(tdentry, fdentry); ffhp->fh_locked = tfhp->fh_locked = true; - fill_pre_wcc(ffhp); - fill_pre_wcc(tfhp); + fh_fill_pre_attrs(ffhp); + fh_fill_pre_attrs(tfhp); odentry = lookup_one_len(fname, fdentry, flen); host_err = PTR_ERR(odentry); @@ -1808,8 +1816,8 @@ retry: * were the same, so again we do it by hand. */ if (!close_cached) { - fill_post_wcc(ffhp); - fill_post_wcc(tfhp); + fh_fill_post_attrs(ffhp); + fh_fill_post_attrs(tfhp); } unlock_rename(tdentry, fdentry); ffhp->fh_locked = tfhp->fh_locked = false; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index b21b76e6b9a8..9f56dcb22ff7 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -57,7 +57,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); -__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, +__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, + struct nfsd_file *nf_src, u64 src_pos, struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ |