summaryrefslogtreecommitdiff
path: root/fs/nfsd/nfs4state.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd/nfs4state.c')
-rw-r--r--fs/nfsd/nfs4state.c527
1 files changed, 375 insertions, 152 deletions
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 741b9449f727..153eeea2c7c9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1909,17 +1909,86 @@ gen_sessionid(struct nfsd4_session *ses)
*/
#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
+static struct shrinker *nfsd_slot_shrinker;
+static DEFINE_SPINLOCK(nfsd_session_list_lock);
+static LIST_HEAD(nfsd_session_list);
+/* The sum of "target_slots-1" on every session. The shrinker can push this
+ * down, though it can take a little while for the memory to actually
+ * be freed. The "-1" is because we can never free slot 0 while the
+ * session is active.
+ */
+static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
+
static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
{
int i;
- for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
- free_svc_cred(&ses->se_slots[i]->sl_cred);
- kfree(ses->se_slots[i]);
+ if (from >= ses->se_fchannel.maxreqs)
+ return;
+
+ for (i = from; i < ses->se_fchannel.maxreqs; i++) {
+ struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+
+ /*
+ * Save the seqid in case we reactivate this slot.
+ * This will never require a memory allocation so GFP
+ * flag is irrelevant
+ */
+ xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
+ free_svc_cred(&slot->sl_cred);
+ kfree(slot);
+ }
+ ses->se_fchannel.maxreqs = from;
+ if (ses->se_target_maxslots > from) {
+ int new_target = from ?: 1;
+ atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
+ ses->se_target_maxslots = new_target;
}
}
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses: The session to affect
+ * @dec: how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock. As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ * The number of slots that the target was reduced by.
+ */
+static int
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+ struct nfsd_net *nn = net_generic(ses->se_client->net,
+ nfsd_net_id);
+ int ret = 0;
+
+ if (ses->se_target_maxslots <= 1)
+ return ret;
+ if (!spin_trylock(&nn->client_lock))
+ return ret;
+ ret = min(dec, ses->se_target_maxslots-1);
+ ses->se_target_maxslots -= ret;
+ atomic_sub(ret, &nfsd_total_target_slots);
+ ses->se_slot_gen += 1;
+ if (ses->se_slot_gen == 0) {
+ int i;
+ ses->se_slot_gen = 1;
+ for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+ struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+ slot->sl_generation = 0;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+ return ret;
+}
+
/*
* We don't actually need to cache the rpc and session headers, so we
* can allocate a little less for each slot:
@@ -1935,89 +2004,46 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
return size + sizeof(struct nfsd4_slot);
}
-/*
- * XXX: If we run out of reserved DRC memory we could (up to a point)
- * re-negotiate active sessions and reduce their slot usage to make
- * room for new connections. For now we just fail the create session.
- */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
-{
- u32 slotsize = slot_bytes(ca);
- u32 num = ca->maxreqs;
- unsigned long avail, total_avail;
- unsigned int scale_factor;
-
- spin_lock(&nfsd_drc_lock);
- if (nfsd_drc_max_mem > nfsd_drc_mem_used)
- total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
- else
- /* We have handed out more space than we chose in
- * set_max_drc() to allow. That isn't really a
- * problem as long as that doesn't make us think we
- * have lots more due to integer overflow.
- */
- total_avail = 0;
- avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
- /*
- * Never use more than a fraction of the remaining memory,
- * unless it's the only way to give this client a slot.
- * The chosen fraction is either 1/8 or 1/number of threads,
- * whichever is smaller. This ensures there are adequate
- * slots to support multiple clients per thread.
- * Give the client one slot even if that would require
- * over-allocation--it is better than failure.
- */
- scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
-
- avail = clamp_t(unsigned long, avail, slotsize,
- total_avail/scale_factor);
- num = min_t(int, num, avail / slotsize);
- num = max_t(int, num, 1);
- nfsd_drc_mem_used += num * slotsize;
- spin_unlock(&nfsd_drc_lock);
-
- return num;
-}
-
-static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
-{
- int slotsize = slot_bytes(ca);
-
- spin_lock(&nfsd_drc_lock);
- nfsd_drc_mem_used -= slotsize * ca->maxreqs;
- spin_unlock(&nfsd_drc_lock);
-}
-
static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
struct nfsd4_channel_attrs *battrs)
{
int numslots = fattrs->maxreqs;
int slotsize = slot_bytes(fattrs);
struct nfsd4_session *new;
+ struct nfsd4_slot *slot;
int i;
- BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
- > PAGE_SIZE);
-
- new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return NULL;
+ xa_init(&new->se_slots);
/* allocate each struct nfsd4_slot and data cache in one piece */
- for (i = 0; i < numslots; i++) {
- new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
- if (!new->se_slots[i])
- goto out_free;
- }
+ slot = kzalloc(slotsize, GFP_KERNEL);
+ if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
+ goto out_free;
+ for (i = 1; i < numslots; i++) {
+ const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+ slot = kzalloc(slotsize, gfp);
+ if (!slot)
+ break;
+ if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) {
+ kfree(slot);
+ break;
+ }
+ }
+ fattrs->maxreqs = i;
memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+ new->se_target_maxslots = i;
+ atomic_add(i - 1, &nfsd_total_target_slots);
new->se_cb_slot_avail = ~0U;
new->se_cb_highest_slot = min(battrs->maxreqs - 1,
NFSD_BC_SLOT_TABLE_SIZE - 1);
spin_lock_init(&new->se_lock);
return new;
out_free:
- while (i--)
- kfree(new->se_slots[i]);
+ kfree(slot);
+ xa_destroy(&new->se_slots);
kfree(new);
return NULL;
}
@@ -2123,17 +2149,47 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
static void __free_session(struct nfsd4_session *ses)
{
- free_session_slots(ses);
+ free_session_slots(ses, 0);
+ xa_destroy(&ses->se_slots);
kfree(ses);
}
static void free_session(struct nfsd4_session *ses)
{
nfsd4_del_conns(ses);
- nfsd4_put_drc_mem(&ses->se_fchannel);
__free_session(ses);
}
+static unsigned long
+nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
+{
+ unsigned long cnt = atomic_read(&nfsd_total_target_slots);
+
+ return cnt ? cnt : SHRINK_EMPTY;
+}
+
+static unsigned long
+nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ struct nfsd4_session *ses;
+ unsigned long scanned = 0;
+ unsigned long freed = 0;
+
+ spin_lock(&nfsd_session_list_lock);
+ list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
+ freed += reduce_session_slots(ses, 1);
+ scanned += 1;
+ if (scanned >= sc->nr_to_scan) {
+ /* Move starting point for next scan */
+ list_move(&nfsd_session_list, &ses->se_all_sessions);
+ break;
+ }
+ }
+ spin_unlock(&nfsd_session_list_lock);
+ sc->nr_scanned = scanned;
+ return freed;
+}
+
static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
{
int idx;
@@ -2158,6 +2214,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&clp->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+ list_add_tail(&new->se_all_sessions, &nfsd_session_list);
+ spin_unlock(&nfsd_session_list_lock);
+
{
struct sockaddr *sa = svc_addr(rqstp);
/*
@@ -2227,6 +2287,9 @@ unhash_session(struct nfsd4_session *ses)
spin_lock(&ses->se_client->cl_lock);
list_del(&ses->se_perclnt);
spin_unlock(&ses->se_client->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+ list_del(&ses->se_all_sessions);
+ spin_unlock(&nfsd_session_list_lock);
}
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -2362,8 +2425,12 @@ unhash_client_locked(struct nfs4_client *clp)
}
list_del_init(&clp->cl_lru);
spin_lock(&clp->cl_lock);
- list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ spin_lock(&nfsd_session_list_lock);
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) {
list_del_init(&ses->se_hash);
+ list_del_init(&ses->se_all_sessions);
+ }
+ spin_unlock(&nfsd_session_list_lock);
spin_unlock(&clp->cl_lock);
}
@@ -2685,6 +2752,7 @@ static const char *cb_state2str(int state)
static int client_info_show(struct seq_file *m, void *v)
{
struct inode *inode = file_inode(m->file);
+ struct nfsd4_session *ses;
struct nfs4_client *clp;
u64 clid;
@@ -2721,6 +2789,16 @@ static int client_info_show(struct seq_file *m, void *v)
seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
seq_printf(m, "admin-revoked states: %d\n",
atomic_read(&clp->cl_admin_revoked));
+ spin_lock(&clp->cl_lock);
+ seq_printf(m, "session slots:");
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+ seq_printf(m, "\nsession target slots:");
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ seq_printf(m, " %u", ses->se_target_maxslots);
+ spin_unlock(&clp->cl_lock);
+ seq_puts(m, "\n");
+
drop_client(clp);
return 0;
@@ -2873,6 +2951,21 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
return 0;
}
+static char *nfs4_show_deleg_type(u32 dl_type)
+{
+ switch (dl_type) {
+ case OPEN_DELEGATE_READ:
+ return "r";
+ case OPEN_DELEGATE_WRITE:
+ return "w";
+ case OPEN_DELEGATE_READ_ATTRS_DELEG:
+ return "ra";
+ case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ return "wa";
+ }
+ return "?";
+}
+
static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_delegation *ds;
@@ -2886,8 +2979,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
nfs4_show_stateid(s, &st->sc_stateid);
seq_puts(s, ": { type: deleg, ");
- seq_printf(s, "access: %s",
- ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+ seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type));
/* XXX: lease time, whether it's being recalled. */
@@ -3708,10 +3800,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
kfree(exid->server_impl_name);
}
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
{
/* The slot is in use, and no response has been sent. */
- if (slot_inuse) {
+ if (flags & NFSD4_SLOT_INUSE) {
if (seqid == slot_seqid)
return nfserr_jukebox;
else
@@ -3720,6 +3812,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
/* Note unsigned 32-bit arithmetic handles wraparound: */
if (likely(seqid == slot_seqid + 1))
return nfs_ok;
+ if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+ return nfs_ok;
if (seqid == slot_seqid)
return nfserr_replay_cache;
return nfserr_seq_misordered;
@@ -3778,17 +3872,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
- /*
- * Note decreasing slot size below client's request may make it
- * difficult for client to function correctly, whereas
- * decreasing the number of slots will (just?) affect
- * performance. When short on memory we therefore prefer to
- * decrease number of slots instead of their size. Clients that
- * request larger slots than they need will get poor results:
- * Note that we always allow at least one slot, because our
- * accounting is soft and provides no guarantees either way.
- */
- ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
return nfs_ok;
}
@@ -3866,11 +3949,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
return status;
status = check_backchannel_attrs(&cr_ses->back_channel);
if (status)
- goto out_release_drc_mem;
+ goto out_err;
status = nfserr_jukebox;
new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
if (!new)
- goto out_release_drc_mem;
+ goto out_err;
conn = alloc_conn_from_crses(rqstp, cr_ses);
if (!conn)
goto out_free_session;
@@ -3979,8 +4062,7 @@ out_free_conn:
free_conn(conn);
out_free_session:
__free_session(new);
-out_release_drc_mem:
- nfsd4_put_drc_mem(&cr_ses->fore_channel);
+out_err:
return status;
}
@@ -4278,17 +4360,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (seq->slotid >= session->se_fchannel.maxreqs)
goto out_put_session;
- slot = session->se_slots[seq->slotid];
+ slot = xa_load(&session->se_slots, seq->slotid);
dprintk("%s: slotid %d\n", __func__, seq->slotid);
- /* We do not negotiate the number of slots yet, so set the
- * maxslots to the session maxreqs which is used to encode
- * sr_highest_slotid and the sr_target_slot id to maxslots */
- seq->maxslots = session->se_fchannel.maxreqs;
-
trace_nfsd_slot_seqid_sequence(clp, seq, slot);
- status = check_slot_seqid(seq->seqid, slot->sl_seqid,
- slot->sl_flags & NFSD4_SLOT_INUSE);
+ status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
if (status == nfserr_replay_cache) {
status = nfserr_seq_misordered;
if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4313,6 +4389,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out_put_session;
+ if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+ slot->sl_generation == session->se_slot_gen &&
+ seq->maxslots <= session->se_target_maxslots)
+ /* Client acknowledged our reduce maxreqs */
+ free_session_slots(session, session->se_target_maxslots);
+
buflen = (seq->cachethis) ?
session->se_fchannel.maxresp_cached :
session->se_fchannel.maxresp_sz;
@@ -4323,9 +4405,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
svc_reserve(rqstp, buflen);
status = nfs_ok;
- /* Success! bump slot seqid */
+ /* Success! accept new slot seqid */
slot->sl_seqid = seq->seqid;
+ slot->sl_flags &= ~NFSD4_SLOT_REUSED;
slot->sl_flags |= NFSD4_SLOT_INUSE;
+ slot->sl_generation = session->se_slot_gen;
if (seq->cachethis)
slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
else
@@ -4335,7 +4419,51 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->session = session;
cstate->clp = clp;
+ /*
+ * If the client ever uses the highest available slot,
+ * gently try to allocate another 20%. This allows
+ * fairly quick growth without grossly over-shooting what
+ * the client might use.
+ */
+ if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+ session->se_target_maxslots >= session->se_fchannel.maxreqs &&
+ session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
+ int s = session->se_fchannel.maxreqs;
+ int cnt = DIV_ROUND_UP(s, 5);
+ void *prev_slot;
+
+ do {
+ /*
+ * GFP_NOWAIT both allows allocation under a
+ * spinlock, and only succeeds if there is
+ * plenty of memory.
+ */
+ slot = kzalloc(slot_bytes(&session->se_fchannel),
+ GFP_NOWAIT);
+ prev_slot = xa_load(&session->se_slots, s);
+ if (xa_is_value(prev_slot) && slot) {
+ slot->sl_seqid = xa_to_value(prev_slot);
+ slot->sl_flags |= NFSD4_SLOT_REUSED;
+ }
+ if (slot &&
+ !xa_is_err(xa_store(&session->se_slots, s, slot,
+ GFP_NOWAIT))) {
+ s += 1;
+ session->se_fchannel.maxreqs = s;
+ atomic_add(s - session->se_target_maxslots,
+ &nfsd_total_target_slots);
+ session->se_target_maxslots = s;
+ } else {
+ kfree(slot);
+ slot = NULL;
+ }
+ } while (slot && --cnt > 0);
+ }
+
out:
+ seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+ seq->target_maxslots = session->se_target_maxslots;
+
switch (clp->cl_cb_state) {
case NFSD4_CB_DOWN:
seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
@@ -4739,7 +4867,7 @@ static void init_nfs4_replay(struct nfs4_replay *rp)
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
- atomic_set(&rp->rp_locked, RP_UNLOCKED);
+ rp->rp_locked = RP_UNLOCKED;
}
static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
@@ -4747,9 +4875,9 @@ static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
{
if (!nfsd4_has_session(cstate)) {
wait_var_event(&so->so_replay.rp_locked,
- atomic_cmpxchg(&so->so_replay.rp_locked,
- RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
- if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED)
+ cmpxchg(&so->so_replay.rp_locked,
+ RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
+ if (so->so_replay.rp_locked == RP_UNHASHED)
return -EAGAIN;
cstate->replay_owner = nfs4_get_stateowner(so);
}
@@ -4762,9 +4890,7 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
if (so != NULL) {
cstate->replay_owner = NULL;
- atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED);
- smp_mb__after_atomic();
- wake_up_var(&so->so_replay.rp_locked);
+ store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED);
nfs4_put_stateowner(so);
}
}
@@ -5069,9 +5195,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* Some threads with a reference might be waiting for rp_locked,
* so tell them to stop waiting.
*/
- atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
- smp_mb__after_atomic();
- wake_up_var(&oo->oo_owner.so_replay.rp_locked);
+ store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
@@ -5472,7 +5596,7 @@ retry:
static inline __be32
nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
{
- if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+ if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type))
return nfserr_openmode;
else
return nfs_ok;
@@ -5704,8 +5828,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
-static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
- int flag)
+static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp)
{
struct file_lease *fl;
@@ -5714,7 +5837,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
return NULL;
fl->fl_lmops = &nfsd_lease_mng_ops;
fl->c.flc_flags = FL_DELEG;
- fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+ fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK;
fl->c.flc_owner = (fl_owner_t)dp;
fl->c.flc_pid = current->tgid;
fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
@@ -5829,13 +5952,14 @@ static struct nfs4_delegation *
nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent)
{
- int status = 0;
+ bool deleg_ts = open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS;
struct nfs4_client *clp = stp->st_stid.sc_client;
struct nfs4_file *fp = stp->st_stid.sc_file;
struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
struct nfsd_file *nf = NULL;
struct file_lease *fl;
+ int status = 0;
u32 dl_type;
/*
@@ -5860,7 +5984,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
*/
if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
nf = find_rw_file(fp);
- dl_type = NFS4_OPEN_DELEGATE_WRITE;
+ dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE;
}
/*
@@ -5869,7 +5993,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
*/
if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
nf = find_readable_file(fp);
- dl_type = NFS4_OPEN_DELEGATE_READ;
+ dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ;
}
if (!nf)
@@ -5901,7 +6025,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (!dp)
goto out_delegees;
- fl = nfs4_alloc_init_lease(dp, dl_type);
+ fl = nfs4_alloc_init_lease(dp);
if (!fl)
goto out_clnt_odstate;
@@ -5958,20 +6082,20 @@ out_delegees:
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
{
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
if (status == -EAGAIN)
open->op_why_no_deleg = WND4_CONTENTION;
else {
open->op_why_no_deleg = WND4_RESOURCE;
switch (open->op_deleg_want) {
- case NFS4_SHARE_WANT_READ_DELEG:
- case NFS4_SHARE_WANT_WRITE_DELEG:
- case NFS4_SHARE_WANT_ANY_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
break;
- case NFS4_SHARE_WANT_CANCEL:
+ case OPEN4_SHARE_ACCESS_WANT_CANCEL:
open->op_why_no_deleg = WND4_CANCELLED;
break;
- case NFS4_SHARE_WANT_NO_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
WARN_ON_ONCE(1);
}
}
@@ -6027,13 +6151,14 @@ static void
nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *currentfh)
{
- struct nfs4_delegation *dp;
+ bool deleg_ts = open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS;
struct nfs4_openowner *oo = openowner(stp->st_stateowner);
struct nfs4_client *clp = stp->st_stid.sc_client;
struct svc_fh *parent = NULL;
- int cb_up;
- int status = 0;
+ struct nfs4_delegation *dp;
struct kstat stat;
+ int status = 0;
+ int cb_up;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
@@ -6074,20 +6199,22 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
destroy_delegation(dp);
goto out_no_deleg;
}
- open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
+ open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG :
+ OPEN_DELEGATE_WRITE;
dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat);
trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
} else {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG :
+ OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
}
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
+ open->op_delegate_type = OPEN_DELEGATE_NONE;
if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
- open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
+ open->op_delegate_type != OPEN_DELEGATE_NONE) {
dprintk("NFSD: WARNING: refusing delegation reclaim\n");
open->op_recall = true;
}
@@ -6101,21 +6228,32 @@ out_no_deleg:
static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
struct nfs4_delegation *dp)
{
- if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
- dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
- open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
- } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
- dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
- open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ if (deleg_is_write(dp->dl_type)) {
+ if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
+ } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ }
}
/* Otherwise the client must be confused wanting a delegation
* it already has, therefore we don't return
- * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
+ * OPEN_DELEGATE_NONE_EXT and reason.
*/
}
+/* Are we returning only a delegation stateid? */
+static bool open_xor_delegation(struct nfsd4_open *open)
+{
+ if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
+ return false;
+ /* Did we actually get a delegation? */
+ if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type))
+ return false;
+ return true;
+}
+
/**
* nfsd4_process_open2 - finish open processing
* @rqstp: the RPC transaction being executed
@@ -6201,8 +6339,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
mutex_unlock(&stp->st_mutex);
if (nfsd4_has_session(&resp->cstate)) {
- if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
open->op_why_no_deleg = WND4_NOT_WANTED;
goto nodeleg;
}
@@ -6213,12 +6351,23 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* OPEN succeeds even if we fail.
*/
nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
+
+ /*
+ * If there is an existing open stateid, it must be updated and
+ * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new
+ * open stateid would have to be created.
+ */
+ if (new_stp && open_xor_delegation(open)) {
+ memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid));
+ open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID;
+ release_open_stateid(stp);
+ }
nodeleg:
status = nfs_ok;
trace_nfsd_open(&stp->st_stid.sc_stateid);
out:
/* 4.1 client trying to upgrade/downgrade delegation? */
- if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
+ if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp &&
open->op_deleg_want)
nfsd4_deleg_xgrade_none_ext(open, dp);
@@ -6229,7 +6378,7 @@ out:
/*
* To finish the open response, we just need to set the rflags.
*/
- open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
+ open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
if (nfsd4_has_session(&resp->cstate))
open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
@@ -7966,7 +8115,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
- struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -7986,7 +8134,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
if (status != nfs_ok)
return status;
- sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -8721,7 +8868,6 @@ skip_grace:
}
/* initialization to perform when the nfsd service is started: */
-
int
nfs4_state_start(void)
{
@@ -8731,6 +8877,15 @@ nfs4_state_start(void)
if (ret)
return ret;
+ nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
+ if (!nfsd_slot_shrinker) {
+ rhltable_destroy(&nfs4_file_rhltable);
+ return -ENOMEM;
+ }
+ nfsd_slot_shrinker->count_objects = nfsd_slot_count;
+ nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
+ shrinker_register(nfsd_slot_shrinker);
+
set_max_delegations();
return 0;
}
@@ -8772,6 +8927,7 @@ void
nfs4_state_shutdown(void)
{
rhltable_destroy(&nfs4_file_rhltable);
+ shrinker_free(nfsd_slot_shrinker);
}
static void
@@ -8889,6 +9045,78 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
}
/**
+ * set_cb_time - vet and set the timespec for a cb_getattr update
+ * @cb: timestamp from the CB_GETATTR response
+ * @orig: original timestamp in the inode
+ * @now: current time
+ *
+ * Given a timestamp in a CB_GETATTR response, check it against the
+ * current timestamp in the inode and the current time. Returns true
+ * if the inode's timestamp needs to be updated, and false otherwise.
+ * @cb may also be changed if the timestamp needs to be clamped.
+ */
+static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig,
+ const struct timespec64 *now)
+{
+
+ /*
+ * "When the time presented is before the original time, then the
+ * update is ignored." Also no need to update if there is no change.
+ */
+ if (timespec64_compare(cb, orig) <= 0)
+ return false;
+
+ /*
+ * "When the time presented is in the future, the server can either
+ * clamp the new time to the current time, or it may
+ * return NFS4ERR_DELAY to the client, allowing it to retry."
+ */
+ if (timespec64_compare(cb, now) > 0) {
+ /* clamp it */
+ *cb = *now;
+ }
+
+ return true;
+}
+
+static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp)
+{
+ struct inode *inode = d_inode(dentry);
+ struct timespec64 now = current_time(inode);
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+ struct iattr attrs = { };
+ int ret;
+
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ struct timespec64 atime = inode_get_atime(inode);
+ struct timespec64 mtime = inode_get_mtime(inode);
+
+ attrs.ia_atime = ncf->ncf_cb_atime;
+ attrs.ia_mtime = ncf->ncf_cb_mtime;
+
+ if (set_cb_time(&attrs.ia_atime, &atime, &now))
+ attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+
+ if (set_cb_time(&attrs.ia_mtime, &mtime, &now)) {
+ attrs.ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET;
+ attrs.ia_ctime = attrs.ia_mtime;
+ }
+ } else {
+ attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME;
+ attrs.ia_mtime = attrs.ia_ctime = now;
+ }
+
+ if (!attrs.ia_valid)
+ return 0;
+
+ attrs.ia_valid |= ATTR_DELEG;
+ inode_lock(inode);
+ ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
+ inode_unlock(inode);
+ return ret;
+}
+
+/**
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
* @dentry: dentry of inode to be checked for a conflict
@@ -8914,7 +9142,6 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
struct file_lock_context *ctx;
struct nfs4_delegation *dp = NULL;
struct file_lease *fl;
- struct iattr attrs;
struct nfs4_cb_fattr *ncf;
struct inode *inode = d_inode(dentry);
@@ -8976,11 +9203,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
* not update the file's metadata with the client's
* modified size
*/
- attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
- attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG;
- inode_lock(inode);
- err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
- inode_unlock(inode);
+ err = cb_getattr_update_times(dentry, dp);
if (err) {
status = nfserrno(err);
goto out_status;